zoukankan      html  css  js  c++  java
  • perl xpath 根据a标签 查找属性为href的值

    [root@yyjk sbin]# 
    [root@yyjk sbin]# cat a2.pl 
    use LWP::UserAgent;
    use HTTP::Cookies;
    use HTTP::Headers;
    use HTTP::Response;
    use Encode;
    use JSON;
    use File::Temp qw/tempfile/;
    use HTML::TreeBuilder::XPath;
    use Encode;
    use HTML::TreeBuilder;
    use Data::Dumper;
    use HTML::TreeBuilder::XPath;
    use DBI;
    use Encode;
    my $ua = LWP::UserAgent->new( ssl_opts => { verify_hostname => 0 }, );;
    $ua->timeout(10);
    $ua->env_proxy;
    my $now          = time();
    $ua->agent("Mozilla/8.0");
    my $cookie_jar = HTTP::Cookies->new(
    
        file           => 'lwp_cookies.txt',
        autosave       => 1,
        ignore_discard => 1
    );
    my $tree= HTML::TreeBuilder::XPath->new;
    $tree->parse_file("test.html");
    ##获取url
    $tree->parse_file( "fh2.html");
    #获取博客分类的URL,根据a标签查找属性为href 
    @Links = $tree->find_by_tag_name('a'); 
      foreach (@Links) {  
          @Href = $_->attr('href');
          print @Href;
          print "
    ";
          print @Href + 0;
          print "
    ";
    };
    
    
    [root@yyjk sbin]# cat fh2.html 
    <div class="daohang-kuai">
            <div class="daohang-org"><span>风险管理部</span></div>
            <div class="daohang-links"><a href="http://990.2.200.17:7003/loan-web/4ALogon.jsp?tgt=TGT-684250-VtgMdfLLmP6b9Xtb2xjivAxqZR7Dhe0CTDqa2IjmleHgshVEJe-cas01.example.org" target="_blank">信贷系统</a> <span >|</span><a href="http://9000.2.200.17:7003/loan-web/sqlogon.html?tgt=TGT-684250-VtgMdfLLmP6b9Xtb2xjivAxqZR7Dhe0CTDqa2IjmleHgshVEJe-cas01.example.org" target="_blank">信贷系统(授权码)</a> <span >|</span><div class="clear"></div></div><div class="daohang-links"><a href="http://800.16.75.1:9001/datamanage/IM00000001_4a_signIn.do?tgt=TGT-684250-VtgMdfLLmP6b9Xtb2xjivAxqZR7Dhe0CTDqa2IjmleHgshVEJe-cas01.example.org" target="_blank">外部数据管理平</a> <span >|</span><a href="/tailong/syslink/oaforward.jsp?idName=oics&tgt=TGT-684250-VtgMdfLLmP6b9Xtb2xjivAxqZR7Dhe0CTDqa2IjmleHgshVEJe-cas01.example.org" target="_blank">非现场监测系统</a> <span >|</span><div class="clear"></div></div><div class="daohang-links"><a href="/tailong/syslink/oaforward.jsp?idName=tlorm&tgt=TGT-684250-VtgMdfLLmP6b9Xtb2xjivAxqZR7Dhe0CTDqa2IjmleHgshVEJe-cas01.example.org" target="_blank">风险事件报送系统</a> <span >|</span><div class="clear"></div></div>
            </div>
                
                
                
                <div class="daohang-kuai">
                    <div class="daohang-org"><span>国际业务部</span></div>
                    <div class="daohang-links"><a href="http://800.3.248.1:7001/UtanWeb?tgt=TGT-684250-VtgMdfLLmP6b9Xtb2xjivAxqZR7Dhe0CTDqa2IjmleHgshVEJe-cas01.example.org" target="_blank">国际结算系统</a> <span >|</span><div class="clear"></div></div>
                </div>
    [root@yyjk sbin]# perl a2.pl 
    http://999.2.200.17:7003/loan-web/4ALogon.jsp?tgt=TGT-684250-VtgMdfLLmP6b9Xtb2xjivAxqZR7Dhe0CTDqa2IjmleHgshVEJe-cas01.example.org
    1
    http://999.2.200.17:7003/loan-web/sqlogon.html?tgt=TGT-684250-VtgMdfLLmP6b9Xtb2xjivAxqZR7Dhe0CTDqa2IjmleHgshVEJe-cas01.example.org
    1
    http://999.16.75.1:9001/datamanage/IM00000001_4a_signIn.do?tgt=TGT-684250-VtgMdfLLmP6b9Xtb2xjivAxqZR7Dhe0CTDqa2IjmleHgshVEJe-cas01.example.org
    1
    /tailong/syslink/oaforward.jsp?idName=oics&tgt=TGT-684250-VtgMdfLLmP6b9Xtb2xjivAxqZR7Dhe0CTDqa2IjmleHgshVEJe-cas01.example.org
    1
    /tailong/syslink/oaforward.jsp?idName=tlorm&tgt=TGT-684250-VtgMdfLLmP6b9Xtb2xjivAxqZR7Dhe0CTDqa2IjmleHgshVEJe-cas01.example.org
    1
    http://999.3.248.1:7001/UtanWeb?tgt=TGT-684250-VtgMdfLLmP6b9Xtb2xjivAxqZR7Dhe0CTDqa2IjmleHgshVEJe-cas01.example.org
    1
    http://999.3.246.2:7001/newaml?tgt=TGT-684883-iOIvoMW2UUuJirHRm6YaUDPuBBUJEMkPBUf6FgS90aZp9GYJsC-cas01.example.org
    1
    /tailong/syslink/goAml.jsp?tgt=TGT-684883-iOIvoMW2UUuJirHRm6YaUDPuBBUJEMkPBUf6FgS90aZp9GYJsC-cas01.example.org
    1
    /tailong/syslink/oaforward.jsp?idName=crmis&tgt=TGT-684883-iOIvoMW2UUuJirHRm6YaUDPuBBUJEMkPBUf6FgS90aZp9GYJsC-cas01.example.org
    1
    http://999.3.200.16:7001/UtanWeb/index.jsp?tgt=TGT-684883-iOIvoMW2UUuJirHRm6YaUDPuBBUJEMkPBUf6FgS90aZp9GYJsC-cas01.example.org
    1
    http://990.2.200.17:7003/loan-web/4ALogon.jsp?tgt=TGT-684250-VtgMdfLLmP6b9Xtb2xjivAxqZR7Dhe0CTDqa2IjmleHgshVEJe-cas01.example.org
    1
    http://9000.2.200.17:7003/loan-web/sqlogon.html?tgt=TGT-684250-VtgMdfLLmP6b9Xtb2xjivAxqZR7Dhe0CTDqa2IjmleHgshVEJe-cas01.example.org
    1
    http://800.16.75.1:9001/datamanage/IM00000001_4a_signIn.do?tgt=TGT-684250-VtgMdfLLmP6b9Xtb2xjivAxqZR7Dhe0CTDqa2IjmleHgshVEJe-cas01.example.org
    1
    /tailong/syslink/oaforward.jsp?idName=oics&tgt=TGT-684250-VtgMdfLLmP6b9Xtb2xjivAxqZR7Dhe0CTDqa2IjmleHgshVEJe-cas01.example.org
    1
    /tailong/syslink/oaforward.jsp?idName=tlorm&tgt=TGT-684250-VtgMdfLLmP6b9Xtb2xjivAxqZR7Dhe0CTDqa2IjmleHgshVEJe-cas01.example.org
    1
    http://800.3.248.1:7001/UtanWeb?tgt=TGT-684250-VtgMdfLLmP6b9Xtb2xjivAxqZR7Dhe0CTDqa2IjmleHgshVEJe-cas01.example.org
    1
  • 相关阅读:
    自学人工智能之数学篇,数学入门并不难
    2018-8-10-win10-uwp-使用资源在后台创建控件
    2019-9-2-win10-uwp-弹起键盘不隐藏界面元素
    2019-7-31-程序猿修养-日志应该如何写
    2018-11-19-WPF-在image控件用鼠标拖拽出矩形
    2019-8-31-C#-如何给-ValueTuple-返回值添加注释
    2019-11-12-浅谈-Windows-桌面端触摸架构演进
    2018-8-10-win10-uwp-打开文件管理器选择文件
    2018-8-10-win10-uwp-验证输入-自定义用户控件
    2019-8-31-dotnet-特性-DynamicallyInvokable-是用来做什么的
  • 原文地址:https://www.cnblogs.com/hzcya1995/p/13349026.html
Copyright © 2011-2022 走看看