zoukankan      html  css  js  c++  java
  • perl 爬取同花顺数据

    use  LWP::UserAgent;
    use utf8;
    use DBI;  
    $user="root";  
    $passwd='xxx';  
    $dbh="";  
    $dbh = DBI->connect("dbi:mysql:database=zjzc_vote;host=14.5.5.57;port=3306",$user,$passwd) or die "can't connect to  database ". DBI-errstr;  
    $dbh->do("SET NAMES utf8"); 
    use POSIX;
    use Data::Dumper;
    use HTML::TreeBuilder;
    open DATAFH,">data.html" || die "open data file failed:$!"; 
    my $ua = LWP::UserAgent->new;
    $ua->timeout(10);
    $ua->env_proxy;
    $ua->agent("Mozilla/8.0");
    my $response = $ua->get('http://data.10jqka.com.cn/financial/yjyg/');
    
    
    
    if ($response->is_success) {
     print DATAFH  $response->decoded_content;  # or whatever
    # print   $response->decoded_content;  # or whatever
      use HTML::TreeBuilder::XPath;
       $tree= HTML::TreeBuilder::XPath->new;
      $tree->parse_file( "data.html");
    };
    
    my $title=  $tree->findvalue('/html/body//span[@class="text-value"]');
    print "$title is $title
    ";
    
    my    @pages=$tree->find_by_tag_name('a');
                          #@urlall除了包含每个类别的文章,还包含阅读排行里的文章
                          foreach (@pages) {
                                                   @titlepage = $_->attr('page');
                                                   foreach (@titlepage) {
                                                     if ($_){ 
                                                     if ( $_ > $max ){
                                                       $max=$_;
    							};				   ###获取版块中每个页面的url
                                                         };
                                               };
    };
    print "$max is $max
    ";
    
    for ($m=1;$m<=$max; $m++){
    
    my @arr1= $tree->find_by_tag_name("tr") ;
    shift @arr1;
    foreach my $row ( @arr1) {
       my @arr2= $row->content_list;
        
        my $str1= $arr2[0]->as_text;   
        my $str2= $arr2[1]->as_text;   
        my $str3= $arr2[2]->as_text;   
        my $str4= $arr2[3]->as_text;   
        my $str5= $arr2[4]->as_text;   
        my $str6= $arr2[5]->as_text;   
        my $str7= $arr2[6]->as_text;   
        my $str8= $arr2[7]->as_text;   
        print $str1, $str2, $str3, $str4, $str5, $str6, $str7,$str8."
    ";
       open( E, ">>", "$title-$m.txt" );
          print E ($str1."|".$str2."|".$str3."|".$str4."|".$str5."|".$str6."|".$str7."|".$str8."
    ");
          close E; 
    
                      }
        }

  • 相关阅读:
    苹果信息推送服务(Apple Push Notification Service)使用总结
    Xcode 相关路径总结
    微信红包随机算法 OC
    Xcode真机测试could not find developer disk image解决方法
    字典转模型 重写初始化方法
    Xcode 写代码没有补全提示解决:删缓存及显示隐藏文件命令
    按位与、或、异或等运算方法
    OC语言@property @synthesize和id
    iOS开发—Quartz2D简单介绍
    iOS开发—CoreLocation定位服务
  • 原文地址:https://www.cnblogs.com/zhaoyangjian724/p/6200219.html
Copyright © 2011-2022 走看看