zoukankan      html  css  js  c++  java
  • java词频统计——web版支持

    需求概要:

    1.把程序迁移到web平台,通过用户上传TXT的方式接收文件。

    2.用户直接输入要统计的文本,服务器返回结果

    3.在页面上给出链接 (如果有封皮、作者、字数、页数等信息更佳)或表格,展示经典英文小说词频统计结果;

    4.支持用户自定义单词分隔符;

    5.词汇范围对比(额外项)。

    分析和设计:

    1.创建web工程,利用servlet上传文件的技术实现用户向服务器上传文件。页面设置表单类型为enctype="multipart/form-data",创建文件上传文本框<input type="file" id="upfilename" name="upfilename" value="" />,服务器端使用Part p = request.getPart("upfilename");获取上传的文件,然后写入到指定地址即可。

    2.直接分析用户post到服务器的内容,为了使用原有的api,可以将输入内容写到文件中,再进行分析。

    3.页面展示统计结果

    4.用户可以输入自定义的分隔符和设置显示统计结果前10行(可修改)。需要修改原词频统计的有效字符函数。

    5.暂时不考虑

    部分代码实现:

    表单实现

     1     <div align="center" id="txtform">
     2         <form action="upload" method="post" enctype="multipart/form-data">
     3             <input type="file" id="upfilename" name="upfilename" value="" /> 自定义分隔符<input
     4                 type="text" id="splitter" name="splitter"> <input
     5                 type="submit" id="submit" value="上传" />
     6         </form>
     7     </div>
     8     <div align="center" id="txtform">
     9         <form action="wordcount" method="post">
    10             <div align="center">待统计内容</div>
    11             <textarea name="content" id="content"
    12                 style=" 700px; height: 200px;"></textarea>
    13             <br> 统计前<select id="num" name="num">
    14                 <option value="10">10</option>
    15                 <option value="20">20</option>
    16                 <option value="0">所有</option>
    17             </select><br>自定义分隔符<input type="text" id="splitter" name="splitter"> <input
    18                 type="submit" value="提交" /> <input type="button"
    19                 onclick="if(confirm('确认重置?')){reset()}" value="重置">
    20         </form>
    21     </div>

    文件上传:

     1         request.setCharacterEncoding("UTF-8");
     2         response.setCharacterEncoding("UTF-8");
     3         response.setContentType("text/html");
     4         PrintWriter out = response.getWriter();
     5         byte b[] = new byte[2048];
     6         @SuppressWarnings("unused")
     7         int len = 0;
     8         Part p = request.getPart("upfilename");
     9         if(p==null){
    10             System.out.println("p == null");
    11         }
    12         String splitter = request.getParameter("splitter");
    13         InputStream in = p.getInputStream();
    14         String name = ""+System.currentTimeMillis();
    15         FileWriter fr = new FileWriter("D:\upload\" + name+".txt");
    16         while ((len = in.read(b)) > 0) {
    17             fr.write(new String(b));
    18         }
    19         fr.close();
    20         out.println("uploaded");
    21         response.sendRedirect("wordcount?id="+name+"&splitter"+splitter);
    22         out.flush();
    23         out.close();

    servlet处理:

     1     protected void doGet(HttpServletRequest request, HttpServletResponse response)
     2             throws ServletException, IOException {
     3         request.setCharacterEncoding("UTF-8");
     4         response.setCharacterEncoding("UTF-8");
     5         response.setContentType("text/html");
     6         PrintWriter out = response.getWriter();
     7         String id = request.getParameter("id");
     8         int num = 10;
     9         String filename = "D:\upload\" + id + ".txt";
    10         WordUtil wu = WordUtilFactory.getWordUtil();
    11         long start = System.currentTimeMillis();
    12         String splitter = request.getParameter("splitter");
    13         wu.setSplitter(splitter);
    14         List<String[]> result = wu.getSortedWordGroupCountBuffered(filename, splitter);
    15         int size = result.size();
    16         for (int i = 0; i < (size > num ? num == 0 ? size : num : size); i++) {
    17             String[] strs = result.get(i);
    18             out.println(strs[1] + "  : " + strs[0] + "<br>");
    19         }
    20         long end = System.currentTimeMillis();
    21         out.println("execution time :" + (end - start) + "ms");
    22         out.flush();
    23         out.close();
    24     }
    25 
    26     protected void doPost(HttpServletRequest request, HttpServletResponse response)
    27             throws ServletException, IOException {
    28         request.setCharacterEncoding("UTF-8");
    29         response.setCharacterEncoding("UTF-8");
    30         response.setContentType("text/html");
    31         PrintWriter out = response.getWriter();
    32         String content = request.getParameter("content");
    33         String numStr = request.getParameter("num");
    34         int num = 10;
    35         if (numStr != null) {
    36             num = Integer.parseInt(numStr);
    37         }
    38         WordUtil wu = WordUtilFactory.getWordUtil();
    39     
    40         long start = System.currentTimeMillis();
    41         String filename = "D://tmp.txt";
    42 
    43         FileWriter fr = new FileWriter(filename);
    44         fr.write(content);
    45         fr.close();
    46         String splitter = request.getParameter("splitter");
    47         wu.setSplitter(splitter);
    48         List<String[]> result = wu.getSortedWordGroupCountBuffered(filename, splitter);
    49         int size = result.size();
    50         for (int i = 0; i < (size > num ? num == 0 ? size : num : size); i++) {
    51             String[] strs = result.get(i);
    52             out.println(strs[1] + "  : " + strs[0] + "<br>");
    53         }
    54         long end = System.currentTimeMillis();
    55         out.println("execution time :" + (end - start) + "ms");
    56         out.flush();
    57         out.close();
    58     }

    有效字符判定(即自定义分隔符)

     1     public void setSplitter(String splitter) {
     2         char[] tmp = splitter.toCharArray();
     3         ArrayList<Character> deleted = new ArrayList<>();
     4         for(int i=0;i<tmp.length-1;i++){
     5             if(tmp[i]=='\'){
     6                 char c = tmp[i+1];
     7                 if(c=='n'){
     8                     deleted.add('
    ');
     9                 }
    10                 if(c=='r'){
    11                     deleted.add('
    ');
    12                 }
    13                 if(c=='t'){
    14                     deleted.add('
    ');
    15                 }
    16                 char[] copy = new char[tmp.length-2];
    17                 for(int j = 0;j <i;j++){
    18                     copy[j]=tmp[j];
    19                 }
    20                 for(int j=i;j<tmp.length-2;j++){
    21                     copy[j]=tmp[j+2];
    22                 }
    23                 i++;
    24             }
    25         }
    26         split = new char[tmp.length+deleted.size()];
    27         for(int i = 0;i<tmp.length;i++){
    28             split[i]=tmp[i];
    29         }
    30         for(int i=tmp.length;i<split.length;i++){
    31             split[i]=deleted.get(split.length-tmp.length-1);
    32         }
    33     }
    34 
    35     private int isCharacter(char ch, String splitter) {
    36         if (split == null) {
    37             if ((ch >= 'a' && ch <= 'z'))
    38                 return 1;
    39             if ((ch >= 'A' && ch <= 'Z'))
    40                 return 1;
    41             if (ch >= '0' && ch <= '9')
    42                 return 2;
    43             return 0;
    44         }
    45         if (split.equals("")) {
    46             if ((ch >= 'a' && ch <= 'z'))
    47                 return 1;
    48             if ((ch >= 'A' && ch <= 'Z'))
    49                 return 1;
    50             if (ch >= '0' && ch <= '9')
    51                 return 2;
    52             return 0;
    53         }
    54         for (int i = 0; i < split.length; i++) {
    55             if (ch == split[i]) {
    56                 return 0;
    57             }
    58         }
    59         if ((ch >= 'a' && ch <= 'z'))
    60             return 1;
    61         if ((ch >= 'A' && ch <= 'Z'))
    62             return 1;
    63         if (ch >= '0' && ch <= '9')
    64             return 2;
    65         return 1;
    66     }

     运行截图:

    web版工程地址:https://git.coding.net/jx8zjs/wordcount-web.git

    ssh:  git@git.coding.net:jx8zjs/wordcount-web.git

    console版工程地址https://coding.net/u/jx8zjs/p/wordCount/git

    ssh:  git@git.coding.net:jx8zjs/wordCount.git

  • 相关阅读:
    [CF920E] Connected Components?
    [CF981E] Addition on Segments
    [BZOJ2152] 聪聪可可
    [CF1355E] Restorer Distance
    [CF1101D] GCD Counting
    [CF827C] DNA Evolution
    [HNOI2008] 明明的烦恼
    [CF712D] Memory and Scores
    [CF609E] Minimum spanning tree for each edge
    后缀自动机应用小结 I
  • 原文地址:https://www.cnblogs.com/jx8zjs/p/5955009.html
Copyright © 2011-2022 走看看