jrhmpt01:/root/lwp/0526# cat a2.pl use LWP::UserAgent; use DBI; use POSIX; use Data::Dumper; use HTML::TreeBuilder; my $ua = LWP::UserAgent->new; $ua->timeout(10); $ua->env_proxy; $ua->agent("Mozilla/8.0"); use HTML::TreeBuilder::XPath; $tree= HTML::TreeBuilder::XPath->new; $tree->parse_file( "0526.txt"); my @pages=$tree->find_by_tag_name('li'); #@urlall除了包含每个类别的文章,还包含阅读排行里的文章 foreach (@pages) { @titlepage = $_->attr('class'); foreach (@titlepage) { if ($_){ print "$_ is $_ "; unless ($_ ~~ @urlall) { push (@urlall ,$_);}; }; }; }; print @urlall ; print " "; foreach my $var (@urlall){ #my $url=qq(/html/body//li[@class='$var']); my $url="/html/body//li[@class=xxx]"; $url =~ s/xxx/"$var"/g; print "$url is $url "; @total= $tree->findvalues("$url"); print @total; print " "; #my @title= $tree->findvalues('/html/body//li[@class="alcw4 alcw41"]'); }; jrhmpt01:/root/lwp/0526# cat 0526.txt <li class="alcw4 alcw41"> <div class="ajjbfb txdbfb bfb100">100<span>%</span></div> <div class="ajjbfb txdbfb bfb100">200<span>%</span></div> </li> <li class="alcw4 alcw42"> <div class="ajjbfb txdbfb bfb100">100<span>%</span></div> <div class="ajjbfb txdbfb bfb100">200<span>%</span></div> <div class="ajjbfb txdbfb bfb100">scan<span>huihui</span></div> </li> jrhmpt01:/root/lwp/0526# perl a2.pl $_ is alcw4 alcw41 $_ is alcw4 alcw42 alcw4 alcw41alcw4 alcw42 $url is /html/body//li[@class="alcw4 alcw41"] 100%200% $url is /html/body//li[@class="alcw4 alcw42"] 100%200%scanhuihui