zoukankan      html  css  js  c++  java
  • Jsoup解析网页html

    Jsoup解析网页html

    解析网页demo:

    利用Jsoup获取截图中的数据信息:

    html代码片段:

      1  <!-- 当前基金档案计算定投开户 start -->
      2             <div class="wrapper">
      3               <div class="wrapper_min">
      4                 <div class="merchandiseDetail">
      5                   <div class="fundDetail-header">
      6                     <div class="fundDetail-tit">
      7                       <div style="float: left">兴全社会责任混合
      8                         <span>(</span>
      9                         <span class="ui-num">340007</span></div>)</div>
     10                     <div class="fundDetail-tools">
     11                       <a class="jijinba" href="http://guba.eastmoney.com/list,of340007.html">基金吧</a>
     12                       <!-- 未自选 start -->
     13                       <a class="addSel" id="addSel" href="javascript:;" target="_self">加自选</a>
     14                       <!-- 未自选 end -->
     15                       <a class="addCom" id="addCom" href="javascript:;" target="_self" onclick="common.addCompare()">加对比</a>
     16                       <a class="addDownApp" href="http://fundact.eastmoney.com/app/">手机版天天基金下载</a></div>
     17                   </div>
     18                   <div class="fundDetail-main">
     19                     <!-- 档案 start -->
     20                     <div class="fundInfoItem">
     21                       <!--开放式基金收益率模块-->
     22                       <div class="dataOfFund">
     23                         <dl class="dataItem01">
     24                           <dt>
     25                             <p>
     26                               <span>
     27                                 <span class="sp01">净值估算</span></span>
     28                               <span id="gz_gztime">(17-12-20 15:00)</span>
     29                               <span class="infoTips">
     30                                 <span class="tipsBubble" style="display: none;">净值估算每个交易日9:30-15:00盘中实时更新(QDII基金为海外交易时段),是按照基金持仓、指数走势和基金过往业绩估算,估算数据并不代表真实净值,仅供参考,请以基金管理人披露净值为准。</span></span>
     31                             </p>
     32                           </dt>
     33                           <dd class="dataNums">
     34                             <dl class="floatleft">
     35                               <span class="ui-font-large ui-color-green ui-num" id="gz_gsz">3.7576</span></dl>
     36                             <dl id="gz_icon" class="gzdown"></dl>
     37                             <dl class="floatleft fundZdf">
     38                               <span class="ui-font-middle ui-color-green ui-num" id="gz_gszze">0.0594</span>
     39                               <span class="ui-font-middle ui-color-green ui-num" id="gz_gszzl">-1.56%</span></dl>
     40                           </dd>
     41                           <dd>
     42                             <span>近1月:</span>
     43                             <span class="ui-font-middle ui-color-green ui-num">-4.62%</span></dd>
     44                           <dd>
     45                             <span>近1年:</span>
     46                             <span class="ui-font-middle ui-color-red ui-num">44.20%</span></dd>
     47                         </dl>
     48                         <span class="dataOfFund-line"></span>
     49                         <dl class="dataItem02">
     50                           <dt>
     51                             <p>
     52                               <span class="ui-color-blue">
     53                                 <span class="sp01">
     54                                   <a href="http://fund.eastmoney.com/f10/jjjz_340007.html">单位净值</a></span>(</span>2017-12-19)</p>
     55                           </dt>
     56                           <dd class="dataNums">
     57                             <span class="ui-font-large ui-color-red ui-num">3.8170</span>
     58                             <span class="ui-font-middle ui-color-red ui-num">1.41%</span></dd>
     59                           <dd>
     60                             <span>近3月:</span>
     61                             <span class="ui-font-middle ui-color-red ui-num">13.47%</span></dd>
     62                           <dd>
     63                             <span>近3年:</span>
     64                             <span class="ui-font-middle ui-color-red ui-num">113.48%</span></dd>
     65                         </dl>
     66                         <span class="dataOfFund-line"></span>
     67                         <dl class="dataItem03">
     68                           <dt>
     69                             <p>
     70                               <span class="ui-color-blue">
     71                                 <span class="sp01">
     72                                   <a href="http://fund.eastmoney.com/f10/jjjz_340007.html">累计净值</a></span>
     73                               </span>
     74                             </p>
     75                           </dt>
     76                           <dd class="dataNums">
     77                             <span class="ui-font-large ui-color-red ui-num">4.0070</span></dd>
     78                           <dd>
     79                             <span>近6月:</span>
     80                             <span class="ui-font-middle ui-color-red ui-num">25.35%</span></dd>
     81                           <dd>
     82                             <span>成立来:</span>
     83                             <span class="ui-font-middle ui-color-red ui-num">332.92%</span></dd>
     84                         </dl>
     85                       </div>
     86                       <div class="infoOfFund">
     87                         <div class="infoOfFund-line"></div>
     88                         <table>
     89                           <tr>
     90                             <td>基金类型:
     91                               <a href="http://fund.eastmoney.com/HH_jzzzl.html#os_0;isall_0;ft_;pt_3">混合型</a>&nbsp;&nbsp;|&nbsp;&nbsp;中高风险</td>
     92                             <td>
     93                               <a href="http://fund.eastmoney.com/f10/gmbd_340007.html">基金规模</a>:76.83亿元(2017-09-30)</td>
     94                             <td>基金经理:
     95                               <a href="http://fund.eastmoney.com/f10/jjjl_340007.html">傅鹏博</a></td>
     96                           </tr>
     97                           <tr>
     98                             <td>
     99                               <span class="letterSpace01">成 立 日</span>:2008-04-30</td>
    100                             <td>
    101                               <span class="letterSpace01">管 理 人</span>102                               <a href="http://fund.eastmoney.com/company/80036742.html">兴全基金</a></td>
    103                             <td>
    104                               <a class="floatleft" href="http://fund.eastmoney.com/f10/jjpj_340007.html">基金评级</a>
    105                               <span class="floatleft"></span>
    106                               <div class="jjpj4"></div>
    107                             </td>
    108                           </tr>
    109                         </table>
    110                       </div>
    111                     </div>
    112                     <!-- 档案 end -->

    java实现代码:

    /** 
     * Project Name:wlpc 
     * File Name:XyzqTask.java 
     * Package Name:com.xyzq.wlpc.task 
     * Date:2017年12月20日下午1:48:16 
     * Copyright (c) 2017 All Rights Reserved. 
     * 
    */  
      import java.io.IOException;;import net.sf.json.JSONObject;
    import org.jsoup.Jsoup;
    import org.jsoup.nodes.Document;
    import org.jsoup.nodes.Element;
    import org.jsoup.select.Elements;/** 
     * ClassName:XyzqTask 
     * Function: TODO
     * Reason:   TODO 
     * Date:     2017年12月20日 下午1:48:16 
     * @author   lizm  
     * @since    JDK 1.6 
     *       
     */
    public class XyzqTask extends BaseTask {
    
        private void getHtml(){
            String url = "";
            url = Pub.getPropertiesValue("wlpc", "wlpc.web.url");
            try {
                Document doc = Jsoup.connect(url).get();
                //class等于fundDetail-tit的div标签  
                Elements fundDetail_tit = doc.select("div.fundDetail-tit");  
                for (Element element : fundDetail_tit){
                    //特殊字符'(',使用 \( 或 [(] 
                    System.out.println("fundDetail_tit>>>>:"+element.text().split("\(")[0]);
                    //获取div下的第一个span的class为ui-num的值
                    Document elementDoc = Jsoup.parse(element.toString());
                    Element elm = elementDoc.select("span.ui-num").first();
                    System.out.println("elm>>>>:"+elm.text());
                }
                //id等于gz_gztime的span标签
                Elements gz_gztime = doc.select("span#gz_gztime");  
                for (Element element : gz_gztime){
                    System.out.println("gz_gztime>>>>:"+element.text().replace("(", "").replace(")", ""));
                }
                //id等于gz_gsz的span标签
                Elements gz_gsz = doc.select("span#gz_gsz");  
                for (Element element : gz_gsz){
                    System.out.println("gz_gsz>>>>:"+element.text());
                }
                //id等于gz_gszze的span标签
                Elements gz_gszze = doc.select("span#gz_gszze");  
                for (Element element : gz_gszze){
                    System.out.println("gz_gszze>>>>:"+element.text());
                }
                
                //id等于gz_gszzl的span标签
                Elements gz_gszzl = doc.select("span#gz_gszzl");  
                for (Element element : gz_gszzl){
                    System.out.println("gz_gszzl>>>>:"+element.text());
                }
                //class等于dataItem02的dl标签  
                Elements dataItem02 = doc.select("dl.dataItem02"); 
                for (Element element : dataItem02){
                    Document elementDoc = Jsoup.parse(element.toString());
                    Element elm1 = elementDoc.getElementsByTag("p").first();
                    System.out.println("elm>>>>:"+elm1.text().replace("单位净值 (", "").replace(")", ""));
                    Element elm_dd = elementDoc.select("dd.dataNums").first();
                    Document doc_dd = Jsoup.parse(elm_dd.toString());
                    Element elm_dd_span1 = doc_dd.getElementsByTag("span").first();
                    System.out.println("elm_dd_span1>>>:"+elm_dd_span1.text());
                    Element elm_dd_span2 = doc_dd.getElementsByTag("span").last();
                    System.out.println("elm_dd_span2>>>:"+elm_dd_span2.text());
                }
            } catch (IOException e) {
                // TODO Auto-generated catch block
                e.printStackTrace();
            }
        }
    
        public static void main(String[] args) {
            XyzqTask client = new XyzqTask();
            client.getHtml();
        }
    }
     

    输出结果:

    fundDetail_tit>>>>:兴全社会责任混合
    elm>>>>:340007
    gz_gztime>>>>:17-12-21 15:00
    gz_gsz>>>>:3.8583
    gz_gszze>>>>:+0.0933
    gz_gszzl>>>>:+2.48%
    elm>>>>:2017-12-20
    elm_dd_span1>>>:3.7650
    elm_dd_span2>>>:-1.36%
  • 相关阅读:
    jQuery 点击超链接生成一个的页面,点击几次,生成几个新页面
    Zookeeper基本信息
    性能测试基础
    Java安装及基础01
    手机抓包手册
    在linux环境下部署禅道环境
    Linux基础命令2
    Linux基础命令1
    SQL SERVER 基本操作语句
    MYSQL多表查询
  • 原文地址:https://www.cnblogs.com/lizm166/p/8080190.html
Copyright © 2011-2022 走看看