use LWP::UserAgent; use utf8; use DBI; $user="root"; $passwd='xxx'; $dbh=""; $dbh = DBI->connect("dbi:mysql:database=zjzc_vote;host=14.5.5.57;port=3306",$user,$passwd) or die "can't connect to database ". DBI-errstr; $dbh->do("SET NAMES utf8"); use POSIX; use Data::Dumper; use HTML::TreeBuilder; open DATAFH,">data.html" || die "open data file failed:$!"; my $ua = LWP::UserAgent->new; $ua->timeout(10); $ua->env_proxy; $ua->agent("Mozilla/8.0"); my $response = $ua->get('http://data.10jqka.com.cn/financial/yjyg/'); if ($response->is_success) { print DATAFH $response->decoded_content; # or whatever # print $response->decoded_content; # or whatever use HTML::TreeBuilder::XPath; $tree= HTML::TreeBuilder::XPath->new; $tree->parse_file( "data.html"); }; my $title= $tree->findvalue('/html/body//span[@class="text-value"]'); print "$title is $title "; my @pages=$tree->find_by_tag_name('a'); #@urlall除了包含每个类别的文章,还包含阅读排行里的文章 foreach (@pages) { @titlepage = $_->attr('page'); foreach (@titlepage) { if ($_){ if ( $_ > $max ){ $max=$_; }; ###获取版块中每个页面的url }; }; }; print "$max is $max "; for ($m=1;$m<=$max; $m++){ my @arr1= $tree->find_by_tag_name("tr") ; shift @arr1; foreach my $row ( @arr1) { my @arr2= $row->content_list; my $str1= $arr2[0]->as_text; my $str2= $arr2[1]->as_text; my $str3= $arr2[2]->as_text; my $str4= $arr2[3]->as_text; my $str5= $arr2[4]->as_text; my $str6= $arr2[5]->as_text; my $str7= $arr2[6]->as_text; my $str8= $arr2[7]->as_text; print $str1, $str2, $str3, $str4, $str5, $str6, $str7,$str8." "; open( E, ">>", "$title-$m.txt" ); print E ($str1."|".$str2."|".$str3."|".$str4."|".$str5."|".$str6."|".$str7."|".$str8." "); close E; } }