zoukankan      html  css  js  c++  java
  • jsoup-处理html中的script数据

    /**
     * 价值在线数据-左边分类抓取
     * http://www.valueonline.cn/laws/laws?typeid=96219074211635284
     * @author hwaggLee
     */
    public class UtilsHtmValueonLineType {
        
        public static void main(String[] args) {
            String url = "http://www.valueonline.cn/laws/laws?typeid=96219074211635284";
            readHtml(url);
        }
        
        public static List<Object> readHtml(String url){
            List<Object> list  = new ArrayList<Object>();
            //
            Document doc = null;
            try {
                doc = Jsoup.connect(url).get();
            } catch (Exception e) {
                ///e.printStackTrace();
                System.out.println(e.getMessage()+":--------------->"+url);
            }
            if( doc == null )return list;
            Elements elScripts = doc.getElementsByTag("script");  
            String[] elScriptList = elScripts.get(0).data().toString().split("var");
            String strTypeList = elScriptList[2];
            if( StringUtils.isNotBlank(strTypeList)){
                /*strTypeList = strTypeList.substring(strTypeList.indexOf("["), strTypeList.lastIndexOf("]")+1);
                JSONArray array = JSONArray.fromObject(strTypeList);
                JSONArray arrayList = JSONArray.fromObject(array.get(0));
                for (Object o : arrayList) {
                    JSONObject object = JSONObject.fromObject(o);
                    StringBuilder sb = new StringBuilder();
                    sb.append("insert into n3b_vl_plate_type values ");
                    sb.append(" ( ");
                    sb.append("'"+object.get("id")+"'");
                    sb.append(",'"+object.get("parentId")+"'");
                    sb.append(","+object.get("level")+"");
                    sb.append(",'"+object.get("declareTypeName")+"'");
                    sb.append(",'"+object.get("declareTypeNo")+"'");
                    sb.append(",'"+object.get("validFlag")+"'");
                    sb.append(","+object.get("oftenFlag")+"");
                    sb.append(",'"+object.get("showTypeName")+"'");
                    sb.append(" ); ");
                    System.out.println(sb.toString());
                }*/
            }
            
            strTypeList = elScriptList[3];
            System.out.println(strTypeList);
            if( StringUtils.isNotBlank(strTypeList) ){
                strTypeList = strTypeList.substring(strTypeList.indexOf("["), strTypeList.lastIndexOf("]")+1);
                JSONArray arrayList = JSONArray.fromObject(strTypeList);
                for (Object o : arrayList) {
                    JSONObject object = JSONObject.fromObject(o);
                    StringBuilder sb = new StringBuilder();
                    sb.append("insert into n3b_vl_market_type values ");
                    sb.append(" ( ");
                    sb.append("'0"+object.get("code_value")+"'");
                    sb.append(",'"+object.get("code_name")+"'");
                    sb.append(",'"+object.get("code_no")+"'");
                    sb.append(",'"+object.get("code_value")+"'");
                    sb.append(",'"+object.get("valid_flag")+"'");
                    sb.append(",'"+object.get("version")+"'");
                    sb.append(",'"+object.get("code_type")+"'");
                    sb.append(" ); ");
                    System.out.println(sb.toString());
                }
            }
            return list;
        }
        
    }
  • 相关阅读:
    windows用户管理与远程管理
    Linux之基础网络配置
    linux之程序包管理
    linux之特殊权限
    设计模式学习笔记——Prototype原型模式
    设计模式学习笔记——Visitor 访问者模式
    XXX is not a function
    终于决定要开始写自己的博客了,先Mark一下
    element ui 与vue跨域通信操作 和框架语法,contentype
    跨域与版本控制
  • 原文地址:https://www.cnblogs.com/hwaggLee/p/5616229.html
Copyright © 2011-2022 走看看