热词统计cvpr2019
<%@ page language="java" contentType="text/html; charset=UTF-8" pageEncoding="UTF-8"%> <%@ taglib uri="http://java.sun.com/jsp/jstl/core" prefix="c"%> <!DOCTYPE html> <html> <head> <meta charset="UTF-8"> <title>Insert title here</title> <link rel="stylesheet" href="css/bootstrap.min.css" type="text/css" /> <script src="jquery-3.4.1.js" type="text/javascript"></script> <script type="text/javascript" src="echarts.js"></script> <script type="text/javascript" src="js/china.js"></script> <script src="js/bootstrap.min.js" type="text/javascript"></script> <script src='https://cdn.bootcss.com/echarts/3.7.0/echarts.simple.js'></script> <script src='js/echarts-wordcloud.js'></script> </head> <body> <div id="main" style=" 100%;height: 400px"></div> <div> <table class="table" style=" 100%;align-content: center;" > <tr> <th align="center">论文连接</th> </tr> <c:forEach var="item" items="${list}"> <tr> <td><a href="${item.lianjie }">${item.title}</a></td> </tr> </c:forEach> </table> </div> <script> var chart = echarts.init(document.getElementById('main')); var dt; $.ajax({ url : "PaperServlet_", async : false, type : "POST", success : function(data) { dt = data; // alert(dt[0].title); }, error : function() { alert("请求失败"); }, dataType : "json" }); var mydata = new Array(0); for (var i = 0; i < dt.length; i++) { var d = {}; d["name"] = dt[i].name; //alert(dt[i].name); d["value"] = dt[i].value; mydata.push(d); } var option = { tooltip: {}, series: [ { type: 'wordCloud', gridSize: 2, sizeRange: [20, 50], rotationRange: [-90, 90], shape: 'pentagon', 600, height: 300, drawOutOfBound: true, textStyle: { normal: { color: function () { return 'rgb(' + [ Math.round(Math.random() * 160), Math.round(Math.random() * 160), Math.round(Math.random() * 160) ].join(',') + ')'; } }, emphasis: { shadowBlur: 10, shadowColor: '#333' } }, data: mydata } ] }; chart.setOption(option); chart.on('click', function (params) { var url = "ClickServlet?geunjian=" + params.name; window.location.href = url; }); window.onresize = chart.resize; </script> </body> </html>
package utils; import java.io.IOException; import java.net.MalformedURLException; import java.net.URL; import java.text.SimpleDateFormat; import java.util.Date; import java.util.Set; import java.util.HashMap; import java.util.Iterator; import java.util.Map; import org.apache.http.client.config.RequestConfig; import org.apache.http.client.methods.CloseableHttpResponse; import org.apache.http.client.methods.HttpGet; import org.apache.http.client.utils.URIBuilder; import org.apache.http.impl.client.CloseableHttpClient; import org.apache.http.impl.client.HttpClients; import org.apache.http.impl.conn.PoolingHttpClientConnectionManager; import org.jsoup.Jsoup; import org.jsoup.nodes.Attributes; import org.jsoup.nodes.Document; import org.jsoup.nodes.Element; import org.jsoup.select.Elements; import dao.dao; import entity.Cvf; /** * 这是使用Jsoup解析 * @author 张志伟 * */ public class Jsouputil { /** * 使用Selector选择器获取元素 */ public static void testSelector()throws Exception{ //获取Document对象 HttpClientPool httpClientPool =new HttpClientPool(); //创建连接池管理器 PoolingHttpClientConnectionManager cm =new PoolingHttpClientConnectionManager(); //获取网页HTML字符串 String content=httpClientPool.doGet(cm); //解析字符串 Document doc = Jsoup.parse(content); // System.out.println(doc.toString()); //[attr=value],利用属性获取 Elements elements = doc.select("div[id=content]").select("dl").select("dt[class=ptitle]"); System.out.println(elements.toString()); Cvf cvf=new Cvf(); dao dao=new dao(); if(elements!=null) { for(Element ele:elements) { String href="http://openaccess.thecvf.com/"; String cname=ele.select("a").text(); System.out.println(cname); String href2=ele.select("a").attr("href"); String chref=href.concat(href2); System.out.println(chref); String cabstract =null; String ckeyword =null; //获取title的内容 CloseableHttpClient httpClient = HttpClients.custom().setConnectionManager(cm).build(); //创建URIBuilder URIBuilder uribuilder= new URIBuilder(chref); HttpGet httpGet= new HttpGet(uribuilder.build()); RequestConfig config = RequestConfig.custom().setConnectTimeout(10000*10000)//设置创建连接的最长时间,单位为毫秒 .setConnectionRequestTimeout(10000*10000)//设置获取连接的最长时间,单位为毫秒 .setSocketTimeout(100000*1000000)//设置传输数据的最长时间,单位为毫秒 .build(); //给请求设置请求信息 httpGet.setConfig(config); CloseableHttpResponse response=null; response = httpClient.execute(httpGet); //解析响应,获取数据 //判断状态码是否为两百 if(response.getStatusLine().getStatusCode()==200||response.getStatusLine().getStatusCode()==302) { Document document = Jsoup.parse(new URL(chref), 100000); cabstract = document.select("div[id=abstract]").text(); System.out.println("已获取摘要"); String[] strs = strTostrArray(cname+cabstract); ckeyword=keyword(strs); } else { System.out.println(response.getStatusLine().getStatusCode()); cabstract =null; ckeyword=null; } if(response!=null) { //关闭response response.close(); } cvf=new Cvf(cname,chref,cabstract,ckeyword); dao.add(cvf); } } } public static String[] strTostrArray(String str) { /* * 将非字母字符全部替换为空格字符" " 得到一个全小写的纯字母字符串包含有空格字符 */ str = str.toLowerCase();// 将字符串中的英文部分的字符全部变为小写 String regex = "[\W]+";// 非字母的正则表达式 --W:表示任意一个非单词字符 str = str.replaceAll(regex, " "); String[] strs = str.split(" "); // 以空格作为分隔符获得字符串数组 return strs; } public static String keyword(String[] strs) { /* * 建立字符串(String)出现次数(Integer)的映射 */ HashMap<String, Integer> strhash = new HashMap<String, Integer>(); Integer in = null;// 用于存放put操作的返回值 for (String s : strs) {// 遍历数组 strs in = strhash.put(s, 1); if (in != null) {// 判断如果返回的不是null,则+1再放进去就是出现的次数 strhash.put(s, in + 1); } } Set<java.util.Map.Entry<String, Integer>> entrySet = strhash.entrySet(); String maxStr = null;// 用于存放出现最多的单词 int maxValue = 0;// 用于存放出现最多的次数 for (java.util.Map.Entry<String, Integer> e : entrySet) { String key = e.getKey(); Integer value = e.getValue(); if(key.equals("a")||key.equals("the")||key.equals("to")||key.equals("and")||key.equals("in")||key.equals("of")||key.equals("our")||key.equals("your")||key.equals("we")||key.equals("is")||key.equals("on")||key.equals("for")||key.equals("that")||key.equals("an")||key.equals("are")) { value=0; } if (value > maxValue) { maxValue = value;// 这里有自动拆装箱 maxStr = key; } } System.out.println("出现最多的单词是:" + maxStr + "出现了" + maxValue + "次"); return maxStr; } }