zoukankan      html  css  js  c++  java
  • jsoup-处理html中的script数据

    /**
     * 价值在线数据-左边分类抓取
     * http://www.valueonline.cn/laws/laws?typeid=96219074211635284
     * @author hwaggLee
     */
    public class UtilsHtmValueonLineType {
        
        public static void main(String[] args) {
            String url = "http://www.valueonline.cn/laws/laws?typeid=96219074211635284";
            readHtml(url);
        }
        
        public static List<Object> readHtml(String url){
            List<Object> list  = new ArrayList<Object>();
            //
            Document doc = null;
            try {
                doc = Jsoup.connect(url).get();
            } catch (Exception e) {
                ///e.printStackTrace();
                System.out.println(e.getMessage()+":--------------->"+url);
            }
            if( doc == null )return list;
            Elements elScripts = doc.getElementsByTag("script");  
            String[] elScriptList = elScripts.get(0).data().toString().split("var");
            String strTypeList = elScriptList[2];
            if( StringUtils.isNotBlank(strTypeList)){
                /*strTypeList = strTypeList.substring(strTypeList.indexOf("["), strTypeList.lastIndexOf("]")+1);
                JSONArray array = JSONArray.fromObject(strTypeList);
                JSONArray arrayList = JSONArray.fromObject(array.get(0));
                for (Object o : arrayList) {
                    JSONObject object = JSONObject.fromObject(o);
                    StringBuilder sb = new StringBuilder();
                    sb.append("insert into n3b_vl_plate_type values ");
                    sb.append(" ( ");
                    sb.append("'"+object.get("id")+"'");
                    sb.append(",'"+object.get("parentId")+"'");
                    sb.append(","+object.get("level")+"");
                    sb.append(",'"+object.get("declareTypeName")+"'");
                    sb.append(",'"+object.get("declareTypeNo")+"'");
                    sb.append(",'"+object.get("validFlag")+"'");
                    sb.append(","+object.get("oftenFlag")+"");
                    sb.append(",'"+object.get("showTypeName")+"'");
                    sb.append(" ); ");
                    System.out.println(sb.toString());
                }*/
            }
            
            strTypeList = elScriptList[3];
            System.out.println(strTypeList);
            if( StringUtils.isNotBlank(strTypeList) ){
                strTypeList = strTypeList.substring(strTypeList.indexOf("["), strTypeList.lastIndexOf("]")+1);
                JSONArray arrayList = JSONArray.fromObject(strTypeList);
                for (Object o : arrayList) {
                    JSONObject object = JSONObject.fromObject(o);
                    StringBuilder sb = new StringBuilder();
                    sb.append("insert into n3b_vl_market_type values ");
                    sb.append(" ( ");
                    sb.append("'0"+object.get("code_value")+"'");
                    sb.append(",'"+object.get("code_name")+"'");
                    sb.append(",'"+object.get("code_no")+"'");
                    sb.append(",'"+object.get("code_value")+"'");
                    sb.append(",'"+object.get("valid_flag")+"'");
                    sb.append(",'"+object.get("version")+"'");
                    sb.append(",'"+object.get("code_type")+"'");
                    sb.append(" ); ");
                    System.out.println(sb.toString());
                }
            }
            return list;
        }
        
    }
  • 相关阅读:
    unsupported jsonb version number 123
    如何在MPlayer上支持RTSP
    TDengine 时序数据库的 ADO.Net Core 提供程序 Maikebing.EntityFrameworkCore.Taos
    如何使用IoTSharp对接ModBus?
    如何从源码启动和编译IoTSharp
    Asp.Net Core 自动适应Windows服务、Linux服务、手动启动时的内容路径的扩展方法
    MQTTnet 的Asp.Net Core 认证事件的扩展
    Asp.Net Core 中利用QuartzHostedService 实现 Quartz 注入依赖 (DI)
    The remote certificate is invalid according to the validation procedure 远程证书验证无效
    settings插拔式源码
  • 原文地址:https://www.cnblogs.com/hwaggLee/p/5616229.html
Copyright © 2011-2022 走看看