zoukankan      html  css  js  c++  java
  • 预会热词统计

    一、要求:

    1、完成论文的题目、摘要、关键词、原文链接四项内容爬取;

    2、存储到本地数据库中;

    3、按照题目、关键词分类统计得到最热的十个领域方向;

    4、热词越多,在热词云中显示的就越大,还要将热词与文章链接,点击热词云中的热词可以找到与之对应的文章题目;

    二、效果

    三、设计思路:

    1、爬取、存取数据:

    2、使用echart 的 wordCloud 实现热词云。

    <%@ page language="java" contentType="text/html; charset=UTF-8"
        pageEncoding="UTF-8"%>
    <%@ taglib uri="http://java.sun.com/jsp/jstl/core" prefix="c"%>
    <!DOCTYPE html>
    <html>
    <head>
    <meta charset="UTF-8">
    <title>Insert title here</title>
    <link rel="stylesheet" href="css/bootstrap.min.css" type="text/css" />
    <script src="js/jquery-1.11.3.min.js" type="text/javascript"></script>
    <script type="text/javascript" src="js/echarts.min.js"></script>
    <script type="text/javascript" src="js/china.js"></script>
    <script src="js/bootstrap.min.js" type="text/javascript"></script>
    <script src='https://cdn.bootcss.com/echarts/3.7.0/echarts.simple.js'></script>
    <script src='js/echarts-wordcloud.js'></script>
    </head>
    <body>
    <div id="main" style=" 100%;height: 400px"></div>
    <div>
      <table class="table" style=" 100%;align-content: center;" >
        <tr>
          <th align="center">论文连接</th>
        </tr>
        <c:forEach var="item" items="${list}">
          <tr>
            <td><a href="${item.lianjie }">${item.title}</a></td>
          </tr>
        </c:forEach>
      </table>
    </div>
    <script>
      var chart = echarts.init(document.getElementById('main'));
      var dt;
      $.ajax({
        url : "PaperServlet_",
        async : false,
        type : "POST",
        success : function(data) {
          dt = data;
         // alert(dt[0].title);},
        error : function() {
          alert("请求失败");},
        dataType : "json"
      });
      var mydata = new Array(0);
      for (var i = 0; i < dt.length; i++) {
          var d = {};
          
          d["name"] = dt[i].name;
          //alert(dt[i].name);
          d["value"] = dt[i].value;
          mydata.push(d);}
      var option = {
        tooltip: {},
        series: [ {
          type: 'wordCloud',
          gridSize: 2,
          sizeRange: [20, 50],
          rotationRange: [-90, 90],
          shape: 'pentagon',
           600,
          height: 300,
          drawOutOfBound: true,
          textStyle: {
            normal: {
              color: function () {
                return 'rgb(' + [
                  Math.round(Math.random() * 160),
                  Math.round(Math.random() * 160),
                  Math.round(Math.random() * 160)
                ].join(',') + ')';}},
            emphasis: {
              shadowBlur: 10,
              shadowColor: '#333' } },
          data: mydata} ]};
    
      chart.setOption(option);
      chart.on('click', function (params) {
          var url = "ClickServlet?geunjian=" + params.name;
          window.location.href = url; });
      window.onresize = chart.resize;
    </script>
    </body>
    </html>
    View Code

    3、将关键字分割成单词然后对单词进行去重、计数和排序,装到list,转换为json字符串传递给界面(ajax请求获取的数据)

    package com.me.servlet;
    
    import java.io.IOException;
    import java.sql.SQLException;
    import java.util.ArrayList;
    import java.util.List;
    
    import javax.servlet.ServletException;
    import javax.servlet.annotation.WebServlet;
    import javax.servlet.http.HttpServlet;
    import javax.servlet.http.HttpServletRequest;
    import javax.servlet.http.HttpServletResponse;
    
    import com.google.gson.Gson;
    import com.me.dao.LWDao;
    import com.me.domain.LunWen;
    import com.me.domain.Tu;
    
    @WebServlet("/PaperServlet_")
    public class PaperServlet_ extends HttpServlet {
        private static final long serialVersionUID = 1L;
        public PaperServlet_() {
            super();}
    
        protected void doGet(HttpServletRequest request, HttpServletResponse response) throws ServletException, IOException {
            response.setHeader("content-type", "text/html;charset=UTF-8");
            response.setCharacterEncoding("UTF-8");
            LWDao dao = new LWDao();
            List<LunWen> list = new ArrayList<LunWen>();
            List<Tu> list_tu = new ArrayList<Tu>();
            String [] str = new String[10000];
            String [] str_ = new String[10000];
            int [] b = new int[10000];
            int num = 0;
            int length1 = 0;
            try {
                list = dao.search_();} 
            catch (SQLException e) {
                e.printStackTrace();}
            for(int i=0;i<list.size();i++) {
                if(list.get(i).getLianjie()!=null) {
                    String ss = list.get(i).getLianjie().substring(6,list.get(i).getLianjie().length());
                    list.get(i).setLianjie("http://openaccess.thecvf.com/"+ss);}
                String[] split = list.get(i).getGuanjian().split(" ");
                for(int j=0;j<split.length;j++) {
                    str[num++] = split[j];}}
            for(int k=0;k<num;k++) {
                b[k]=0;}
            str_[0]=str[0];
            int tt=1;
            Boolean rt=true;
            for(int i=1;i<num;i++) {
                rt=false;
                for(int j=0;j<tt;j++) {
                    if(str[i].equals(str_[j])) {
                        rt=true;
                        break;}}
                if(!rt) {
                    str_[tt]=str[i];
                    tt++;}}
            length1=tt;
            for(int i=0;i<length1;i++) {
                for(int j=0;j<num;j++) {
                    if(str_[i].equals(str[j])) {
                        b[i]++;}}}
            int t3=0;
            int t2=0;
            String sr="";
            for(int i=0;i<length1-1;i++) {
                t3=i;
                for(int j=i+1;j<length1;j++) {
                    if(b[t3]<b[j]) {
                        t3=j;}}
               if(t3!=i) {
                   t2=b[i];
                   b[i]=b[t3];
                   b[t3]=t2;
                   sr=str_[i];
                   str_[i]=str_[t3];
                   str_[t3]=sr;}}
            for(int i=0;i<100;i++) {
                Tu tu = new Tu();
                tu.name=str_[i];
                tu.value= b[i];
                list_tu.add(tu);}
            
            Gson gson = new Gson();
            String json = gson.toJson(list_tu);
            response.getWriter().write(json);}
        
    
        protected void doPost(HttpServletRequest request, HttpServletResponse response) throws ServletException, IOException {
            // TODO Auto-generated method stub
            doGet(request, response);}}
    View Code

    1)热词实体

    2)将关键字分割成单词然后对单词进行去重、计数和排序,装到list

    4、论文连接列表数据准备(PaperServlet是最初访问的地方,携带数据跳转到jsp界面)

    1)论文实体

    5、点击热词后携带此热词到servlet,再从数据库中找出论文的关键字中包含此热词的论文列表

    6、dao层

  • 相关阅读:
    观察者模式股票提醒
    中介者模式虚拟聊天室
    模板方法模式数据库的连接
    职责链模式财务审批
    期末总结
    软件需求分析考试
    tomcat启动极其慢的解决方法困扰我一年多的问题终于解决
    状态模式银行账户
    解释器模式
    动态加载JS文件提升访问网站速度
  • 原文地址:https://www.cnblogs.com/dg1137/p/13085018.html
Copyright © 2011-2022 走看看