本文用Web::Scraper抓取新浪中的文本
#!/usr/bin/perl -w
use strict;
use utf8;
use URI;
use Web::Scraper;
binmode( STDOUT, ':encoding(utf8)' );
my $url = "http://news.sina.com.cn/c/2010-06-04/203520413927.shtml";
my $proce = scraper {
process '.blkContainerSblk >h1', "news[]" => "TEXT";
process '.blkContainerSblkCon >p', "texts[]" => "TEXT";
};
my $res = $proce->scrape( URI->new($url) );
for my $result ( @{ $res->{news} } ) {
print "$result\n";
}
for my $result ( @{ $res->{texts} } ) {
print "$result\n";
}