zoukankan      html  css  js  c++  java
  • jsoup爬取某网站安全数据

    jsoup爬取某网站安全数据

    package com.vfsd.net;
    
    import java.io.IOException;
    import java.sql.SQLException;
    import java.util.Map;
    
    import javax.servlet.ServletException;
    import javax.servlet.annotation.WebServlet;
    import javax.servlet.http.HttpServlet;
    import javax.servlet.http.HttpServletRequest;
    import javax.servlet.http.HttpServletResponse;
    
    import org.jsoup.Jsoup;
    import org.jsoup.nodes.Document;
    import org.jsoup.nodes.Element;
    import org.jsoup.select.Elements;
    
    import com.vfsd.dao.ManageMySQL;
    
    /**
     * Servlet implementation class GetURL13
     */
    @WebServlet("/GetURL13")
    public class GetURL13 extends HttpServlet {
        private static final long serialVersionUID = 1L;
           
        /**
         * @see HttpServlet#HttpServlet()
         */
        public GetURL13() {
            super();
            // TODO Auto-generated constructor stub
        }
        private String message;
        
        @Override
        public void init() throws ServletException {
            message = "Hello world, this message is from servlet!";
            System.out.println("------"+message);
            try {
                ManageMySQL.getConnection();
                
            } catch (SQLException e) {
                // TODO Auto-generated catch block
                e.printStackTrace();
            } catch (Exception e) {
                // TODO Auto-generated catch block
                e.printStackTrace();
            }
        }
        /**
         * @see HttpServlet#doGet(HttpServletRequest request, HttpServletResponse response)
         */
        protected void doGet(HttpServletRequest request, HttpServletResponse response) throws ServletException, IOException {
            // TODO Auto-generated method stub
            //response.getWriter().append("Served at: ").append(request.getContextPath());
            String agent1 = "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/74.0.3729.169 Safari/537.36";
            
            int pageNum=1;
            int pageSize=10;
            //for(pageNum=1;pageNum<101;pageNum++)
            for(pageNum=1;pageNum<924;pageNum++)
            {
                try {
                    int page1= (pageNum-1)*pageSize;
                    Map<Integer,String> map1 = ManageMySQL.getNewsLinkInTable(page1,pageSize,"data_bjszfhcxjswyh");
                    for(Integer key : map1.keySet())
                    {
                        System.out.println(key+"  "+map1.get(key));
                        String news_link = map1.get(key);
                        String context1="";
                        String source1="";
                        String publishDate = "";
                        //String context1 = getContentByURL(news_link).replace(" ", "");
                        
                        if(!news_link.contains("void"))
                        {
                            if(news_link.endsWith("html"))
                            {
                                Document documentRoot = Jsoup.connect(news_link).userAgent(agent1).get();
                                Elements elements2 = documentRoot.select("#content_list");
                                //Elements elements2_1 = documentRoot.select("div.div_right");
                                if(elements2.size()==1)
                                {
                                    Element div_ele = elements2.get(0);
                                    context1 = div_ele.text();
                                    ManageMySQL.updateContextAndPublishDate2(key, context1.replace("'", "").replace(""", ""),source1,publishDate,"data_bjszfhcxjswyh");
                                }
                                
                                
                            }
                            
                        }
                        
                    }
                } catch (Exception e) {
                    // TODO Auto-generated catch block
                    e.printStackTrace();
                }
                
            }
        }
    
    
    
        /**
         * @see HttpServlet#doPost(HttpServletRequest request, HttpServletResponse response)
         */
        protected void doPost(HttpServletRequest request, HttpServletResponse response) throws ServletException, IOException {
            // TODO Auto-generated method stub
            doGet(request, response);
        }
    
    }
  • 相关阅读:
    C#中IPAddress转换成整型int
    没有注册类 (异常来自 HRESULT:0x80040154 (REGDB_E_CLASSNOTREG))
    VB.NET或C#报错:You must hava a license to use this ActiveX control.
    c#几种随机数组和数组乱序
    C#封装的websocket协议类
    VB生成条形码(EAN-13)
    VB控件间的拖放
    VB用API模拟截屏键PrintScreen
    VB读写进程的内存
    几个VB常见又内涵的错误
  • 原文地址:https://www.cnblogs.com/herd/p/11784128.html
Copyright © 2011-2022 走看看