zoukankan      html  css  js  c++  java
  • 4.17号自学成果

    热词统计cvpr2019

    <%@ page language="java" contentType="text/html; charset=UTF-8"
        pageEncoding="UTF-8"%>
    <%@ taglib uri="http://java.sun.com/jsp/jstl/core" prefix="c"%>
    <!DOCTYPE html>
    <html>
    <head>
    <meta charset="UTF-8">
    <title>Insert title here</title>
    <link rel="stylesheet" href="css/bootstrap.min.css" type="text/css" />
    <script src="jquery-3.4.1.js" type="text/javascript"></script>
    <script type="text/javascript" src="echarts.js"></script>
    <script type="text/javascript" src="js/china.js"></script>
    <script src="js/bootstrap.min.js" type="text/javascript"></script>
    <script src='https://cdn.bootcss.com/echarts/3.7.0/echarts.simple.js'></script>
    <script src='js/echarts-wordcloud.js'></script>
    </head>
    <body>
    <div id="main" style=" 100%;height: 400px"></div>
    <div>
      <table class="table" style=" 100%;align-content: center;" >
        <tr>
          <th align="center">论文连接</th>
        </tr>
        <c:forEach var="item" items="${list}">
          <tr>
            <td><a href="${item.lianjie }">${item.title}</a></td>
          </tr>
        </c:forEach>
      </table>
    </div>
    <script>
      var chart = echarts.init(document.getElementById('main'));
      var dt;
      $.ajax({
        url : "PaperServlet_",
        async : false,
        type : "POST",
        success : function(data) {
          dt = data;
         // alert(dt[0].title);
        },
        error : function() {
          alert("请求失败");
        },
        dataType : "json"
      });
      var mydata = new Array(0);
      for (var i = 0; i < dt.length; i++) {
          var d = {};
          
          d["name"] = dt[i].name;
          //alert(dt[i].name);
          d["value"] = dt[i].value;
          mydata.push(d);
      }
      var option = {
        tooltip: {},
        series: [ {
          type: 'wordCloud',
          gridSize: 2,
          sizeRange: [20, 50],
          rotationRange: [-90, 90],
          shape: 'pentagon',
           600,
          height: 300,
          drawOutOfBound: true,
          textStyle: {
            normal: {
              color: function () {
                return 'rgb(' + [
                  Math.round(Math.random() * 160),
                  Math.round(Math.random() * 160),
                  Math.round(Math.random() * 160)
                ].join(',') + ')';
              }
            },
            emphasis: {
              shadowBlur: 10,
              shadowColor: '#333'
            }
          },
          data: mydata
        } ]
      };
    
      chart.setOption(option);
      chart.on('click', function (params) {
          var url = "ClickServlet?geunjian=" + params.name;
          window.location.href = url;
        });
      window.onresize = chart.resize;
    </script>
    </body>
    </html>
    package utils;
    
    import java.io.IOException;
    import java.net.MalformedURLException;
    import java.net.URL;
    import java.text.SimpleDateFormat;
    import java.util.Date;
    import java.util.Set;
    import java.util.HashMap;  
    import java.util.Iterator;  
    import java.util.Map;
    
    import org.apache.http.client.config.RequestConfig;
    import org.apache.http.client.methods.CloseableHttpResponse;
    import org.apache.http.client.methods.HttpGet;
    import org.apache.http.client.utils.URIBuilder;
    import org.apache.http.impl.client.CloseableHttpClient;
    import org.apache.http.impl.client.HttpClients;
    import org.apache.http.impl.conn.PoolingHttpClientConnectionManager;
    import org.jsoup.Jsoup;
    import org.jsoup.nodes.Attributes;
    import org.jsoup.nodes.Document;
    import org.jsoup.nodes.Element;
    import org.jsoup.select.Elements;
    
    import dao.dao;
    import entity.Cvf;
    
    
    
    /**
     * 这是使用Jsoup解析
     * @author 张志伟
     *
     */
    public class Jsouputil {
    
        /**
         * 使用Selector选择器获取元素
         */
        public static void testSelector()throws Exception{
            //获取Document对象
            HttpClientPool httpClientPool =new HttpClientPool();
            //创建连接池管理器
            PoolingHttpClientConnectionManager cm =new  PoolingHttpClientConnectionManager();
            //获取网页HTML字符串
            String content=httpClientPool.doGet(cm);
                            
            //解析字符串
            Document doc = Jsoup.parse(content);
    //        System.out.println(doc.toString());
        
            //[attr=value],利用属性获取
            Elements elements = doc.select("div[id=content]").select("dl").select("dt[class=ptitle]");
            System.out.println(elements.toString());
            
            Cvf cvf=new Cvf();
            dao dao=new dao();
            if(elements!=null) 
            {
            for(Element ele:elements)
            {
                String href="http://openaccess.thecvf.com/";
                String cname=ele.select("a").text();
                System.out.println(cname);
                String href2=ele.select("a").attr("href");
                String chref=href.concat(href2);
                System.out.println(chref);
                String cabstract =null;
                String ckeyword  =null;
                    //获取title的内容
                CloseableHttpClient httpClient = HttpClients.custom().setConnectionManager(cm).build();
                    //创建URIBuilder
                      URIBuilder uribuilder= new URIBuilder(chref);
                      HttpGet httpGet= new HttpGet(uribuilder.build());
                      RequestConfig config = RequestConfig.custom().setConnectTimeout(10000*10000)//设置创建连接的最长时间,单位为毫秒
                              .setConnectionRequestTimeout(10000*10000)//设置获取连接的最长时间,单位为毫秒
                              .setSocketTimeout(100000*1000000)//设置传输数据的最长时间,单位为毫秒
                              .build();
                              //给请求设置请求信息
                              httpGet.setConfig(config);
                              CloseableHttpResponse response=null;
                              response = httpClient.execute(httpGet);
                      //解析响应,获取数据
                      //判断状态码是否为两百
                      if(response.getStatusLine().getStatusCode()==200||response.getStatusLine().getStatusCode()==302) {
                          Document document = Jsoup.parse(new URL(chref), 100000);
                          cabstract = document.select("div[id=abstract]").text();
                          System.out.println("已获取摘要");
                        String[] strs = strTostrArray(cname+cabstract);
                          
                         ckeyword=keyword(strs);
                         
                      }
                      else {
                          System.out.println(response.getStatusLine().getStatusCode());
                          cabstract =null;
                          ckeyword=null;
                      }
                      if(response!=null) {
                          //关闭response 
                          response.close();
                      }
                      
                cvf=new Cvf(cname,chref,cabstract,ckeyword);
                dao.add(cvf);
    
            }
            }
            
        }
        public static String[] strTostrArray(String str) {
              /*
               * 将非字母字符全部替换为空格字符" " 得到一个全小写的纯字母字符串包含有空格字符
               */
              str = str.toLowerCase();// 将字符串中的英文部分的字符全部变为小写
              String regex = "[\W]+";// 非字母的正则表达式 --W:表示任意一个非单词字符
              str = str.replaceAll(regex, " ");
              String[] strs = str.split(" "); // 以空格作为分隔符获得字符串数组
              return strs;
             }
             public static String keyword(String[] strs) {
              /*
               * 建立字符串(String)出现次数(Integer)的映射
               */
              HashMap<String, Integer> strhash = new HashMap<String, Integer>();
              Integer in = null;// 用于存放put操作的返回值
              for (String s : strs) {// 遍历数组 strs
    
                      in = strhash.put(s, 1);
                      if (in != null) {// 判断如果返回的不是null,则+1再放进去就是出现的次数
                          strhash.put(s, in + 1);
                      }
    
              }
              Set<java.util.Map.Entry<String, Integer>> entrySet = strhash.entrySet();
              String maxStr = null;// 用于存放出现最多的单词
              int maxValue = 0;// 用于存放出现最多的次数
              for (java.util.Map.Entry<String, Integer> e : entrySet) {
               String key = e.getKey();
               Integer value = e.getValue();
               if(key.equals("a")||key.equals("the")||key.equals("to")||key.equals("and")||key.equals("in")||key.equals("of")||key.equals("our")||key.equals("your")||key.equals("we")||key.equals("is")||key.equals("on")||key.equals("for")||key.equals("that")||key.equals("an")||key.equals("are")) {
                   value=0;
               }
               if (value > maxValue) {
                maxValue = value;// 这里有自动拆装箱
                maxStr = key;
               }
              }
              System.out.println("出现最多的单词是:" + maxStr + "出现了" + maxValue + "");
              return maxStr;
             }
    }
  • 相关阅读:
    关于DOS的常用操作
    <leetcode 第188场周赛>
    大挑战!状压dp!
    41. 缺失的第一个正数
    1095. 山脉数组中查找目标值 (二分查找)
    “人活着就是为了贪心”——贪心算法日
    二分查找
    2020.4.25 leetcode 编程战队赛
    <leetcode c++>221. 最大正方形
    <leetcode c++>面试题51. 数组中的逆序对
  • 原文地址:https://www.cnblogs.com/sunhongbin/p/12780566.html
Copyright © 2011-2022 走看看