<pre name="code" class="python">use LWP::UserAgent; use utf8; use DBI; use POSIX; use Data::Dumper; use HTML::TreeBuilder; use HTML::TreeBuilder::XPath; my $ua = LWP::UserAgent->new; $ua->timeout(10); $ua->env_proxy; $ua->agent("Mozilla/8.0"); #my $response = $ua->get('http://data.10jqka.com.cn/financial/yjyg/date/2016-12-31/board/ALL/field/enddate/order/desc/page/1/ajax/1/'); #my $response = $ua->get('http://data.10jqka.com.cn/financial/yjyg/'); my @array=('2016-12-31','2016-03-31','2015-12-31','2015-09-30','2015-06-30','2015-03-31','2014-12-31','2014-09-30','2014-03-31'); foreach (@array){ print "$_ is $_ "; my $url="http://data.10jqka.com.cn/financial/yjyg/date/$_/board/ALL/field/enddate/order/desc/page/1/ajax/1/"; print "$url is $url "; my $response = $ua->get($url); if ($response->is_success) { open DATAFH,">data.html" || die "open data file failed:$!"; print DATAFH "<html>"; print DATAFH " "; print DATAFH $response->decoded_content; # or whatever print DATAFH "</html>"; print DATAFH " "; }; close DATAFH; unlink("ths.html"); system('cp data.html ths.html'); $tree= HTML::TreeBuilder::XPath->new; $tree->parse_file( "ths.html"); my $title="$_"; #my $title= $tree->findvalue('/html/body//span[@class="text-value"]'); print "$title is $title "; my @pages=""; my @titlepage=""; $max=""; my @pages=$tree->find_by_tag_name('a'); print "@pages is @pages "; #@urlall除了包含每个类别的文章,还包含阅读排行里的文章 foreach (@pages) { @titlepage = $_->attr('page'); foreach (@titlepage) { if ($_){ if ( $_ > $max ){ $max=$_; }; ###获取版块中每个页面的url }; }; }; unless ($max){$max=1}; print "$max is $max "; sleep (5); for ($m=1;$m<=$max; $m++){ my $url="http://data.10jqka.com.cn/financial/yjyg/date/$_/board/ALL/field/enddate/order/desc/page/$m/ajax/1/"; my $response = $ua->get("$url"); if ($response->is_success) { open DATAFH,">data.html" || die "open data file failed:$!"; print DATAFH "<html>"; print DATAFH " "; print DATAFH $response->decoded_content; # or whatever print DATAFH "</html>"; print DATAFH " "; close DATATH; }; unlink("ths.html"); system('cp data.html ths.html'); $tree= HTML::TreeBuilder::XPath->new; $tree->parse_file( "ths.html"); my @arr1= $tree->find_by_tag_name("tr") ; #shift @arr1; foreach my $row ( @arr1) { my @arr2= $row->content_list; my $str1= $arr2[0]->as_text; my $str2= $arr2[1]->as_text; my $str3= $arr2[2]->as_text; my $str4= $arr2[3]->as_text; my $str5= $arr2[4]->as_text; my $str6= $arr2[5]->as_text; my $str7= $arr2[6]->as_text; my $str8= $arr2[7]->as_text; print $str1, $str2, $str3, $str4, $str5, $str6, $str7,$str8." "; open( E, ">>", "$title-$m.txt" ); print E ($str1."|".$str2."|".$str3."|".$str4."|".$str5."|".$str6."|".$str7."|".$str8." "); close E; } } }