使用python对论文网站进行爬虫,然后分析出出现所有论文中出现次数最多的词,在web端,通过视图可视化将热词以云形式显示出来,点击热词云中的热词,可以弹出链接的文章列表。
package com.dao; import java.sql.Connection; import java.sql.ResultSet; import java.sql.SQLException; import java.sql.Statement; import java.util.ArrayList; import java.util.List; import com.pjh.domain.Cloud; import com.pjh.util.BaseConnection; public class DBUtil { public static List<Cloud> queryCloud() { List<Cloud> list = new ArrayList<Cloud>(); Connection conn = BaseConnection.getConnection(); Statement statement = null; String sql = "SELECT * FROM lunwen"; ResultSet rs = null; try { statement = conn.createStatement(); rs = statement.executeQuery(sql); Cloud cloud = null; while(rs.next()) { String title = rs.getString(4); cloud = new Cloud(title); list.add(cloud); } }catch (SQLException e) { e.printStackTrace(); }finally { BaseConnection.close(rs, statement, conn); } return list; } }
package com.domain; public class Cloud { private String title; public Cloud(String title) { super(); this.title = title; } public String getTitle() { return title; } public void setTitle(String title) { this.title = title; } }
package com.servlet; import java.io.IOException; import java.util.List; import javax.servlet.ServletException; import javax.servlet.annotation.WebServlet; import javax.servlet.http.HttpServlet; import javax.servlet.http.HttpServletRequest; import javax.servlet.http.HttpServletResponse; import com.pjh.domain.Cloud; import com.pjh.dao.DBUtil;; @WebServlet("/cloudServlet") public class cloudServlet extends HttpServlet { private static final long serialVersionUID = 1L; public cloudServlet() { super(); } protected void doGet(HttpServletRequest request, HttpServletResponse response) throws ServletException, IOException { response.setContentType("text/html;charset=UTF-8"); request.setCharacterEncoding("UTF-8"); String method = request.getParameter("method"); //System.out.print(method); if(method.equals("pc")) { add(request,response); } } private void add(HttpServletRequest request, HttpServletResponse response) throws ServletException, IOException { response.setContentType("text/html;charset=UTF-8"); request.setCharacterEncoding("UTF-8"); List<Cloud> list = DBUtil.queryCloud(); System.out.println(list); request.setAttribute("list", list); request.getRequestDispatcher("wordCloud.jsp").forward(request,response); } protected void doPost(HttpServletRequest request, HttpServletResponse response) throws ServletException, IOException { // TODO Auto-generated method stub doGet(request, response); } }
package com.util; import java.sql.Connection; import java.sql.DriverManager; import java.sql.ResultSet; import java.sql.SQLException; import java.sql.Statement; public class BaseConnection { public static Connection getConnection(){ Connection conn=null; String driver = "com.mysql.cj.jdbc.Driver"; String url = "jdbc:mysql://localhost:3306/papercrawl?serverTimezone=UTC&characterEncoding=utf8&useSSL=true"; String user = "root"; String password = "123456"; try{ Class.forName(driver); conn=DriverManager. getConnection(url,user,password); }catch(Exception e){ e.printStackTrace(); } return conn; } public static void close (Statement state, Connection conn) { if (state != null) { try { state.close(); } catch (SQLException e) { e.printStackTrace(); } } if (conn != null) { try { conn.close(); } catch (SQLException e) { e.printStackTrace(); } } } public static void close (ResultSet rs, Statement state, Connection conn) { if (rs != null) { try { rs.close(); } catch (SQLException e) { e.printStackTrace(); } } if (state != null) { try { state.close(); } catch (SQLException e) { e.printStackTrace(); } } if (conn != null) { try { conn.close(); } catch (SQLException e) { e.printStackTrace(); } } } }
<%@ page language="java" contentType="text/html; charset=UTF-8" pageEncoding="UTF-8"%> <%@ page import="java.util.*" %> <%@taglib uri="http://java.sun.com/jsp/jstl/core" prefix="c"%> <!DOCTYPE html PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN" "http://www.w3.org/TR/html4/loose.dtd"> <html style="height:100%;"> <head> <meta charset="utf-8"> <title>论文热词云</title> <script src="js/jquery.min.js"></script> <script src="js/echarts.js"></script> <script src="js/worldcloud.js"></script> </head> <body> <div id="main" style=" 100%; height: 600px"></div> <script> var gdata1 = []; <c:forEach items="${requestScope.list}" var="item" varStatus="status"> var gd1 = {}; gd1["name"] = '${item.title}'; gd1["value"] = Math.ceil(Math.random()*1000000); gdata1.push(gd1); </c:forEach> onload = function () { var data = { value:gdata1, image: "" } var myChart = echarts.init(document.getElementById('main')); var ecConfig = echarts.config; myChart.on('click', eConsole); var maskImage = new Image(); maskImage.src = data.image maskImage.onload = function () { myChart.setOption({ backgroundColor: '#fff', tooltip: { show: false }, series: [{ type: 'wordCloud', gridSize: 1, sizeRange: [12, 55], rotationRange: [-45, 0, 45, 90], maskImage: maskImage, textStyle: { normal: { color: function () { return 'rgb(' + Math.round(Math.random() * 255) + ', ' + Math.round(Math.random() * 255) + ', ' + Math.round(Math.random() * 255) + ')' } } }, left: 'center', top: 'center', // '96%', // height: '100%', right: null, bottom: null, // 300, // height: 200, // top: 20, data: data.value }] }) } } function eConsole(param) { if (typeof param.seriesIndex == 'undefined') { return; } if (param.type == 'click') { var form = document.getElementById('test_form'); var t = document.getElementById("txt"); t.value = param.name; form.submit(); } } </script> <form name= "f_checkbox" action="${pageContext.request.contextPath}/cloudServlet?method=pc" method="post" id="registForm" onsubmit="return check()"> <button type="submit" >生成热词云</button> </form> <script> $(function(){ $("#txt").hide() $("#Search").hide() }); </script> <form id = 'test_form' action="http://openaccess.thecvf.com/ICCV2019_search.py" method="post" accept-charset="utf-8" target="_blank"> <input type="text" id="txt" name="query"> <input type="submit" id="Search" value="Search"> </form> </body> </html>