zoukankan      html  css  js  c++  java
  • 顶会热词统计

    任务分解: 

    1.          爬取CVPR2019论文到数据库中;
    2.          分析、查找出关键词,并排序;
    3.          生成热词汇云图;
    4.          点击热词云中的热词可以找到与之对应的文章题目;

    一、python爬虫

    分析网站的JSON数据,利用Python爬取数据,然后存储到Mysql数据库中。爬取的数据为论文的题目和摘要内容,关键词通过将题目拆分获得。

    import requests
    import pymysql
    from bs4 import BeautifulSoup
    
    #链接到本地数据库
    db = pymysql.connect('127.0.0.1',
                         port=3306,
                         user='root',
                         password='root',
                         db='test',
                         charset='utf8')
    
    cursor = db.cursor()
    
    #定义头文件
    headers={
            "User-Agent":"Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/74.0.3729.169 Safari/537.36"
            }
    #get方法抓取数据
    url="http://openaccess.thecvf.com/CVPR2019.py"
    html=requests.get(url)
    
    #使用 Beautiful Soup 解析网页
    soup=BeautifulSoup(html.content,'html.parser')
    
    soup.a.contents=='pdf'
    
    pdfs=soup.findAll(name="a",text="pdf")
    
    lis = []
    jianjie=""
    for i,pdf in enumerate(pdfs):
        pdf_name=pdf["href"].split('/')[-1]
        name=pdf_name.split('.')[0].replace("_CVPR_2019_paper","")
        link="http://openaccess.thecvf.com/content_CVPR_2019/html/"+name+"_CVPR_2019_paper.html"
        url1=link
        print(url1)
        html1 = requests.get(url1)
        soup1 = BeautifulSoup(html1.content, 'html.parser')
        weizhi = soup1.find('div', attrs={'id':'abstract'})
        if weizhi:
            jianjie =weizhi.get_text();
        print("ok")
        info = {}
        info['title'] = name
        info['link'] =link
        info['abstract']=jianjie
    
        lis.append(info)
        print(lis)
    
    cursor = db.cursor()
    for i in range(len(lis)):
        cols = ", ".join('`{}`'.format(k) for k in lis[i].keys())
        print(cols)  # '`name`, `age`'
    
        val_cols = ', '.join('%({})s'.format(k) for k in lis[i].keys())
        print(val_cols)  # '%(name)s, %(age)s'
    
        sql = "insert into lunwen(%s) values(%s)"
        res_sql = sql % (cols, val_cols)
        print(res_sql)
    
        cursor.execute(res_sql, lis[i])  # 将字典a传入
        db.commit()
        print("ok")
    View Code

    数据库的结构:

    二、提取关键词

    dao层:

    package dao;
    
    
    import java.sql.Connection;
    import java.sql.PreparedStatement;
    import java.sql.ResultSet;
    import java.sql.SQLException;
    import java.sql.Statement;
    import java.util.ArrayList;
    import java.util.HashMap;
    import java.util.LinkedHashMap;
    import java.util.Map;
    import java.util.stream.Collectors;
    
    import Bean.Data;
    
    import java.sql.Connection;
    import java.sql.PreparedStatement;
    import java.sql.ResultSet;
    import java.sql.SQLException;
    import java.sql.Statement;
    import java.util.List;
    import jdbc.*;
    
    
    
    public class Dao {
        public static Map<String,Integer> getrc()
        {
            String sql="select * from lunwen";
            Map<String, Integer>map= new HashMap<String, Integer>();
            Map<String, Integer>results= new LinkedHashMap<String, Integer>();
            Connection con=null;
            Statement state=null;
            ResultSet rs=null;
            con=Util.getConn();
            try {
                state=con.createStatement();
                rs=state.executeQuery(sql);
                while(rs.next())
                {
                    String keywords=rs.getString("keywords");
                    //System.out.println(keywords);
                    String[] split = keywords.split(",");
                    for(int i=0;i<split.length;i++)
                    {
                        if(map.get(split[i])==null)
                        {
                            map.put(split[i],0);
                        }
                        else
                        {
                            map.replace(split[i], map.get(split[i])+1);
                        }
                    }
                }
            } catch (SQLException e) {
                // TODO Auto-generated catch block
                e.printStackTrace();
            }
            Util.close(rs, state, con);
            map.entrySet()                
            .stream()               
            .sorted((p1, p2) -> p2.getValue().compareTo(p1.getValue()))                
            .collect(Collectors.toList())
            .forEach(ele -> results.put(ele.getKey(), ele.getValue()));
    
           
            return results;
        }
        
        
        public List<Data> list(String keywords) { // 查询所有信息
    
    
            List<Data> list = new ArrayList<Data>(); // 创建集合
            Connection conn = Util.getConn();
            String sql = "select * from lunwen where keywords like "+"'%"+keywords+"%'"; // SQL查询语句
    
            try {
    
                PreparedStatement pst = conn.prepareStatement(sql);
    
                ResultSet rs = pst.executeQuery();
                
                Data data = null;
                
                while (rs.next()) {
                    String title = rs.getString("title");              
                    String link = rs.getString("link");               
                    String as= rs.getString("abstract");
                        data = new Data(title,link,as,keywords);
                    list.add(data);
                }
                rs.close(); // 关闭
                pst.close(); // 关闭
    
            } catch (SQLException e1) {
                e1.printStackTrace(); // 抛出异常
            }
            return list; // 返回一个集合
        }
        public static List<Data> list2() { // 查询所有信息
    
    
            List<Data> list2 = new ArrayList<Data>(); // 创建集合
            Connection conn = Util.getConn();
            String sql = "select * from lunwen "; // SQL查询语句
    
            try {
    
                PreparedStatement pst = conn.prepareStatement(sql);
    
                ResultSet rs = pst.executeQuery();
                
                Data data = null;
                
                while (rs.next()) {
                    String title = rs.getString("title");              
                    String link = rs.getString("link");               
                    String as= rs.getString("abstract");
                    String keywords=rs.getString("keywords");
                        data = new Data(title,link,as,keywords);
                    list2.add(data);
                }
                rs.close(); // 关闭
                pst.close(); // 关闭
    
            } catch (SQLException e1) {
                e1.printStackTrace(); // 抛出异常
            }
            return list2; // 返回一个集合
        }
    }
    dao

    bean:

    package Bean;
    
    public class Data {
    
        private String title;
        private String link;
        private String as;
        private String keywords;
        private int value;
          public int getValue() {
                return this.value;
          }
          public void SetValue(int value){
                this.value=value;
          }
          public String getTitle() {
                return this.title;
          }
          public void SetTitle(String title){
                this.title=title;
          }
          public String getLink() {
                return this.link;
          }
          public void SetLink(String link){
                this.link=link;
          }
          public String getAs() {
                return this.as;
          }
          public void SetAs(String as){
                this.as=as;
          }
          public String getKeywords() {
                return this.keywords;
          }
          public void SetKeywords(String keywords){
                this.keywords=keywords;
          }
          
          public Data(String title,String link,String as,String keywords ) {
              this.title=title;
              this.link=link;
              this.as=as;
              this.keywords=keywords;
          }
          public Data(String title,int value) {
              this.title=title;
              this.value=value;
          }
        
        
    }
    bean

    三、热词云可视化展示:

    <%@ page language="java" contentType="text/html; charset=UTF-8"
        pageEncoding="UTF-8"%>
    <%@taglib uri="http://java.sun.com/jsp/jstl/core" prefix="c"%>
    
    <%request.setCharacterEncoding("utf-8"); 
    response.setCharacterEncoding("utf-8");%>
    <!DOCTYPE html>
    <html>
    <head>
    <meta charset="utf-8">
    <title>Hot-words</title>
    </head>
    
    <meta charset="UTF-8">
         <link type="text/css" rel="stylesheet" href="css/style.css">
    <script src="js/jquery-3.4.1.min.js"></script>
    <script src="js/echarts.min.js"></script>
    <script src="js/echarts-cloud.js"></script>
    <style>
      
                    
               #main{
                   30%;
                  height: 500px;
                  
                  border:1px solid #ddd;
                  float:right;
              }
              #table{
                    overflow-x: auto;
                     overflow-y: auto;
                     60%;
                    height: 500px;
                    float:left;
                    margin-top:100dp;
                    padding-top:100dp;
                    
                }
    
            </style>
    
    </head>
    
    <body >
    <br>
    <h1>Hot-words</h1>
    <br>
    <br>
    <br>
    
    <div id="table">
      <table id='gradient-style' >
        <tr>
          <th align="center">link</th>
        </tr>
        <c:forEach var="item" items="${list}">
          <tr>
            <td><a href="${item.link}">${item.title}</a></td>
          </tr>
        </c:forEach>
      </table>
    </div>
    
    
      <div id="main">
      
      </div>
      <script type="text/javascript">
    
        var dt;
       
                $.ajax({
                    url : "RcServlet",
                    async : true,
                    type : "POST",
                    data : {        
                    },
                    dataType : "json",
                    success : function(data) {
                        dt = data;
                        
                         var mydata = new Array(0);
                         for (var i = 0; i < dt.length; i++) {
                              var d = {};
                              
                              d["name"] = dt[i].name;
                             
                              d["value"] = dt[i].value;
                              mydata.push(d);
                          }
                         var myChart = echarts.init(document.getElementById('main'));
                         //设置点击效果
                        
                         
                         
                         myChart.setOption({
                             title: {
                                 text: ''
                             },
                             tooltip: {},
                             series: [{
                                 type : 'wordCloud',  //类型为字符云
                                     shape:'smooth',  //平滑
                                     gridSize : 8, //网格尺寸
                                     size : ['50%','50%'],
                                     //sizeRange : [ 50, 100 ],
                                     rotationRange : [-45, 0, 45, 90], //旋转范围
                                     textStyle : {
                                         normal : {
                                             fontFamily:'微软雅黑',
                                             color: function() {
                                                 return 'rgb(' + 
                                                     Math.round(Math.random() * 255) +
                                              ', ' + Math.round(Math.random() * 255) +
                                              ', ' + Math.round(Math.random() * 255) + ')'
                                                    }
                                             },
                                         emphasis : {
                                             shadowBlur : 5,  //阴影距离
                                             shadowColor : '#333'  //阴影颜色
                                         }
                                     },
                                     left: 'center',
                                     top: 'center',
                                     right: null,
                                     bottom: null,
                                     '100%',
                                     height:'100%',
                                     data:mydata
                             }]
                         });
                         
                         myChart.on('click', function (params) {
                             var url = "ClickServlet?keywords=" + params.name;
                             window.location.href = url;
                           });
                         
                        alert("成功!");
                       
       
                    },
                    error : function() {
                        alert("请求失败");
                    },
               });
        
             
           
    
    
    </script>
        
    
    </body>
    </html>
    JSP

    四、点击热词之后的跳转:

    clickservlet:

    package servlet;
    
    import java.io.IOException;
    import java.util.List;
    
    import javax.servlet.ServletException;
    import javax.servlet.annotation.WebServlet;
    import javax.servlet.http.HttpServlet;
    import javax.servlet.http.HttpServletRequest;
    import javax.servlet.http.HttpServletResponse;
    
    import Bean.Data;
    import dao.Dao;
    
    /**
     * Servlet implementation class ClickServlet
     */
    @WebServlet("/ClickServlet")
    public class ClickServlet extends HttpServlet {
        private static final long serialVersionUID = 1L;
           
        /**
         * @see HttpServlet#HttpServlet()
         */
        public ClickServlet() {
            super();
            // TODO Auto-generated constructor stub
        }
    
        /**
         * @see HttpServlet#doGet(HttpServletRequest request, HttpServletResponse response)
         */
        protected void doGet(HttpServletRequest request, HttpServletResponse response) throws ServletException, IOException {
            // TODO Auto-generated method stub
            response.getWriter().append("Served at: ").append(request.getContextPath());
            doPost(request, response);
        }
    
        /**
         * @see HttpServlet#doPost(HttpServletRequest request, HttpServletResponse response)
         */
        protected void doPost(HttpServletRequest request, HttpServletResponse response) throws ServletException, IOException {
            System.out.println("ClickServlet 运行");
            request.setCharacterEncoding("UTF-8");
            String keywords=request.getParameter("keywords");
            Dao dao = new Dao();
            List<Data> list=null;
            list=dao.list(keywords);
            System.out.println(list);
            request.setAttribute("list",list); 
            
            request.getRequestDispatcher("Rc.jsp").forward(request, response);
        }
    
    }
    clickservlet

    linkservlet:

    package servlet;
    
    import java.io.IOException;
    import java.util.List;
    
    import javax.servlet.ServletException;
    import javax.servlet.annotation.WebServlet;
    import javax.servlet.http.HttpServlet;
    import javax.servlet.http.HttpServletRequest;
    import javax.servlet.http.HttpServletResponse;
    
    import Bean.Data;
    import dao.Dao;
    
    /**
     * Servlet implementation class Linkservlet
     */
    @WebServlet("/Linkservlet")
    public class Linkservlet extends HttpServlet {
        private static final long serialVersionUID = 1L;
           
        /**
         * @see HttpServlet#HttpServlet()
         */
        public Linkservlet() {
            super();
            // TODO Auto-generated constructor stub
        }
    
        /**
         * @see HttpServlet#doGet(HttpServletRequest request, HttpServletResponse response)
         */
        protected void doGet(HttpServletRequest request, HttpServletResponse response) throws ServletException, IOException {
            // TODO Auto-generated method stub
            System.out.println("LinkServlet 运行");
            response.getWriter().append("Served at: ").append(request.getContextPath());
            response.setContentType("text/html;charset=UTF-8");
            request.setCharacterEncoding("UTF-8");
            String method = request.getParameter("method");
            if(method.equals("find"))
            {
                find(request,response);
            }
        }
    
        /**
         * @see HttpServlet#doPost(HttpServletRequest request, HttpServletResponse response)
         */
        private void find(HttpServletRequest request, HttpServletResponse response) throws ServletException, IOException {
            
            response.setContentType("text/html;charset=UTF-8");
            request.setCharacterEncoding("UTF-8");
            List<Data> list=null;
            list=Dao.list2();
            System.out.println(list);
            request.setAttribute("list", list);
            request.getRequestDispatcher("Rc.jsp").forward(request,response);
            
        }
        protected void doPost(HttpServletRequest request, HttpServletResponse response) throws ServletException, IOException {
            // TODO Auto-generated method stub
            doGet(request, response);
            
        }
    }
    linkservlet

    rcservlet:

    package servlet;
    
    import java.io.IOException;
    import java.sql.Date;
    import java.util.List;
    import java.util.Map;
    
    import javax.servlet.ServletException;
    import javax.servlet.annotation.WebServlet;
    import javax.servlet.http.HttpServlet;
    import javax.servlet.http.HttpServletRequest;
    import javax.servlet.http.HttpServletResponse;
    
    import Bean.Data;
    import dao.Dao;
    import net.sf.json.JSONArray;
    import net.sf.json.JSONObject;
    
    @WebServlet("/RcServlet")
    public class RcServlet extends HttpServlet {
        
        private static final long serialVersionUID = 1L;
           
        /**
         * @see HttpServlet#HttpServlet()
         */
        public RcServlet() {
            super();
            // TODO Auto-generated constructor stub
        }
    
        /**
         * @see HttpServlet#doGet(HttpServletRequest request, HttpServletResponse response)
         */
        protected void doGet(HttpServletRequest request, HttpServletResponse response) throws ServletException, IOException {
    
            this.doPost(request, response);
        }
    
        /**
         * @see HttpServlet#doPost(HttpServletRequest request, HttpServletResponse response)
         */
        protected void doPost(HttpServletRequest request, HttpServletResponse response) throws ServletException, IOException {
            System.out.println("Rcservlet 运行");
            response.setContentType("text/html;charset=UTF-8");
            request.setCharacterEncoding("UTF-8");
            Map<String, Integer>sortMap=Dao.getrc();
            JSONArray json =new JSONArray();
            int k=0;
            for (Map.Entry<String, Integer> entry : sortMap.entrySet()) 
            {
                JSONObject ob=new JSONObject();
                ob.put("name", entry.getKey());
                ob.put("value", entry.getValue());
                if(!(entry.getKey().equals("for")||entry.getKey().equals("and")||entry.getKey().equals("With")||entry.getKey().equals("of")||entry.getKey().equals("in")||entry.getKey().equals("From")||entry.getKey().equals("A")||entry.getKey().equals("to")||entry.getKey().equals("a")||entry.getKey().equals("the")||entry.getKey().equals("by")))
                {
                    json.add(ob);
                    k++;
                }
                if(k==10)
                    break;
            }
            System.out.println(json.toString());//输出JSON数据
            response.getWriter().write(json.toString());
            
        }
        
    }
    rcservlet

    五、结果展示:

  • 相关阅读:
    zendstudio xdebug 配置
    一键清除cvs/svn 目录
    mysql 引擎区分
    ngnix 配置
    linux下mysql安装、目录结构、配置
    tomacat 配置ssl协议
    HTML中<title>与<h1>区别
    HTML中<strong>与<b>,<em>与<i>标签的区别
    bootstrap的总结1
    JavaScript的DOM(文档对象)基础语法总结2
  • 原文地址:https://www.cnblogs.com/studya/p/13070544.html
Copyright © 2011-2022 走看看