zoukankan      html  css  js  c++  java
  • 第八周总结

    通过热词统计,再一次进行数据的爬取,这次可以爬取,也学习了解析的技术

    源码:

    import java.util.ArrayList;import java.util.Date;import java.util.List;

    import org.jsoup.Jsoup;import org.jsoup.nodes.Document;import org.jsoup.nodes.Element;import org.jsoup.select.Elements;

    import com.gargoylesoftware.htmlunit.BrowserVersion;

    import com.gargoylesoftware.htmlunit.WebClient;

    import Dao.AddService;

     

     public class Paqu {

     

        public static void main(String args[]) {

            // TODO Auto-generated method stub

            String sheng="";

            String xinzeng="";

            String leiji="";

            String zhiyu="";

            String siwang="";

             String url = "";

            

            int i=0;

            

            try {

                //构造一个webClient 模拟Chrome 浏览器

            WebClient webClient = new WebClient(BrowserVersion.CHROME);

               webClient.getOptions().setThrowExceptionOnScriptError(false);

             webClient.getOptions().setThrowExceptionOnFailingStatusCode(false);

                webClient.getOptions().setTimeout(8000);

                HtmlPage rootPage = webClient.getPage(url);

                            webClient.waitForBackgroundJavaScript(6000);

                String html = rootPage.asXml();

                Document doc = Jsoup.parse(html);

                //System.out.println(doc);

                Element listdiv1 = doc.select(".wrap").first();

                Elements listdiv2 = listdiv1.select(".province");

                for(Element s:listdiv2) {

                    Elements span = s.getElementsByTag("span");

                    Elements real_name=span.select(".item_name");

                    

                    xinzeng=real_newconfirm.text();

                    leiji=real_confirm.text();

                    zhiyu=real_heal.text();

                    siwang=real_dead.text();

       

                

            } catch (IOException e) {

                // TODO Auto-generated catch block            e.printStackTrace();

                System.out.println("爬取失败");

            }

        }

        

    }

       

      AddService.java:

    package Dao;

    import java.sql.Connection;import java.sql.Statement;

    import utils.DBUtils;

    public class AddService {

        public void add(String table,String sheng,String xinzeng,String leiji,String zhiyu,String dead,String time) {

            String sql = "insert into "+table+" (Province,Newconfirmed_num ,Confirmed_num,Cured_num,Dead_num,Time) values('" + sheng + "','" + xinzeng +"','" + leiji +"','" + zhiyu + "','" + dead+ "','" + time+ "')";

            System.out.println(sql);

            Connection conn = DBUtils.getConn();

            Statement state = null;

            int a = 0;

            try {

                state = conn.createStatement();

                a=state.executeUpdate(sql);

            } catch (Exception e) {

                e.printStackTrace();

            } finally {

                DBUtils.close(state, conn);

            }        

        }

    }

     

  • 相关阅读:
    XGBoost,GBDT原理详解,与lightgbm比较
    开机或联网时自启动gunicorn
    mac下查看jdk安装版本及安装目录
    Linux常用操作
    ssh远程登录出现Host key verification failed.解决办法
    gunicorn运行显示connection in use解决办法
    nginx,gunicorn常用命令
    Git入门--创建版本库,关联远程库,从远程库下载
    【新手向】阿里云上ubuntu+flask+gunicorn+nginx服务器部署(二)项目部署
    系统护肤+身体
  • 原文地址:https://www.cnblogs.com/2210633591zhang/p/13094673.html
Copyright © 2011-2022 走看看