node2:/root/pachong/tongbanjie#cat test.pl
use LWP::UserAgent;
use POSIX;
use HTML::TreeBuilder::XPath;
use Encode;
use HTML::TreeBuilder;
use Data::Dumper;
use HTML::TreeBuilder::XPath;
use DBI;
use Encode;
my $user="root";
my $passwd="1234567";
$dbh = DBI->connect("dbi:mysql:database=licai;host=127.0.0.1;port=3306",$user,$passwd) or die "can't connect to database ". DBI-errstr;
$dbh->do("SET NAMES utf8");
my $ua = LWP::UserAgent->new;
$ua->timeout(10);
$ua->env_proxy;
$ua->agent("Mozilla/8.0");
my $tree= HTML::TreeBuilder::XPath->new;
$tree->parse_file("test.html");
my @pageString = $tree->findvalues('/html/body//div[@class="page-control"]/a');
print "@pageString is @pageString
";
node2:/root/pachong/tongbanjie#cat test.html
<div id="pager_COM0_20" class="pager">
<div class="page-control">
<a href="javascript:void(0)" class="page-button page-cur-button">1</a>
<a href="javascript:void(0);" data-curPage="2" data-totalPageCount="1091" class="page-button">2</a>
<a href="javascript:void(0);" data-curPage="3" data-totalPageCount="1091" class="page-button">3</a>
...
<a href="javascript:void(0)" class="page-button" data-totalPageCount="1091" data-curPage="1091">1091</a>
<span class="page-next"><a href="javascript:void(0)"data-totalPageCount="1091" data-curPage="2">下一页</a></span>
</div>
node2:/root/pachong/tongbanjie#perl test.pl
@pageString is 1 2 3 1091