zoukankan      html  css  js  c++  java
  • perl 爬取 find_by_tag_name

    find_by_tag_name
    
     @elements = $h->find_by_tag_name('tag', ...);
     $first_match = $h->find_by_tag_name('tag', ...);
    
    
    在上下文列表, 返回元素的列表 在$h下 有任何指定的tag名字
    
    node2:/root/pachong/yylc#
    node2:/root/pachong/yylc#cat t500.html 
       <tr>
                        <th>项目名</th>
                        <th>年利率</th>
                        <th>期 限</th>
                        <th>起购金额</th>
                        <th>进度</th>
                        <th>项目状态</th>
                        <th>操作</th>
                    </tr>
                                    <tr>
                    	<td><div class="fresh"><p class="text-ellipsis-2"><i class="fresh-icon"></i><a href="/detail/31836-260-500-913-8627.htm">房贷3518号</a></p></div></td>
                        <td>12.00 %</td>
                        <td>15天</td>
                        <td>10.00元</td>
                        <td>
                        	<div class="wyd-list-bar">
                        				                    	<div class="wyd-inner-bar-1" style="70.42%;"></div>
    		                                            </div>
                            	                        		<p class="wyd-list-txt">已融资<i>70.42%</i> | 剩余<span>266160.00</span>元</p>
                            	                    </td>
                        <td >
                        				            	投标中
    			                                </td>
                        <td>
    	                    <p class="pro-btn">
    	                    					            	<a href="/detail/31836-260-500-706-8408.htm" class="icon-n-sprite icon-n-3" id="Btn">投 标</a>
    				            	                    </p>
                        </td>
                    </tr
      use  LWP::UserAgent;    
      use POSIX;    
      use HTML::TreeBuilder::XPath;     
      use Encode;     
      use HTML::TreeBuilder;    
       my $tree= HTML::TreeBuilder::XPath->new;    
                $tree->parse_file( "t500.html");  
                my @arr1= $tree->find_by_tag_name("tr") ;  
                foreach my $row ( @arr1) {    
                    my @arr2= $row->content_list;    
            
                    my $str1= $arr2[0]->as_text;       
                    my $str2= $arr2[1]->as_text;       
                    my $str3= $arr2[2]->as_text;       
                    my $str4= $arr2[3]->as_text;       
                    my $str5= $arr2[4]->as_text;       
                    my $str6= $arr2[5]->as_text;       
                    print $str1,$str2,$str3,$str4,$str5,$str6."
    ";   
                    };   
    
    node2:/root/pachong/yylc#perl t500.pl
    项目名||年利率||期 限||起购金额||进度||项目状态||
    房易贷3518号||12.00 %||15天||10.00元||已融资70.42% | 剩余266160.00元|| 投标中 ||
    


    
                                        
    
  • 相关阅读:
    JavaScript autoComplete 1.2
    Linux下安装配置git
    《SQL Server 2008从入门到精通》20180627
    《SQL必知必会》知识点汇总
    关于js的setTimeout和setInterval
    关于js的闭包
    web图片类型
    关于绑定事件
    js原型与继承
    关于js的mouseout
  • 原文地址:https://www.cnblogs.com/hzcya1995/p/13349800.html
Copyright © 2011-2022 走看看