zoukankan      html  css  js  c++  java
  • 单词统计功能续

    短语统计

    我们在处理文本的时候只需将提取出来的文本数据进行特殊的分割处理,比如只需将英文的“,”,“.”,“?”,“!"以及回车符设为分隔符。并将一些无用单词作为间断比如

    "a",  "it", "the", "and", "this"等。

    package analyse_word;
    import java.io.File;
    import java.io.FileNotFoundException;
    import java.util.HashMap;
    import java.util.Iterator;
    import java.util.Scanner;
    import java.util.Set;
    public class recognize_sentence {
     
     public static boolean useless(String str) throws FileNotFoundException {
      File file = new File("D:\useless.txt");// 读取文件
      String words[] = new String [100000];
      int out_words[] = new int [100000];
      if (!file.exists()) {// 如果文件打不开或不存在则提示错误
       System.out.println("文件不存在");
       return false;
      }
      Scanner x = new Scanner(file);
      HashMap<String, Integer> hashMap = new HashMap<String, Integer>();
      while (x.hasNextLine()) {
       String line = x.nextLine();
       String[] lineWords = line.split("[
    ]");
       Set<String> wordSet = hashMap.keySet();
       for (int i = 0; i < lineWords.length; i++) {
        if (wordSet.contains(lineWords[i])) {
         Integer number = hashMap.get(lineWords[i]);
         number++;
         hashMap.put(lineWords[i], number);
        } else {
         hashMap.put(lineWords[i], 1);
        }
       }
      }
      Iterator<String> iterator = hashMap.keySet().iterator();
      int max = 0,i=0;
      while (iterator.hasNext()) {
       String word = iterator.next();
       if(!"".equals(word)&&word!=null&&!"a".equals(word)&&!"the".equals(word)&&!"  ".equals(word)) {
        if(str.indexOf(" "+word+" ")==0) {
         return true;
        }
        words[i]=word;
        out_words[i]=hashMap.get(word);
        i++;
       }
      }
      return true;
     }
     public static void recognize() throws FileNotFoundException {
      File file = new File("D:\Englis_letters.txt");// 读取文件
      if (!file.exists()) {// 如果文件打不开或不存在则提示错误
       System.out.println("文件不存在");
       return;
      }
      Scanner x = new Scanner(file);
      HashMap<String, Integer> hashMap = new HashMap<String, Integer>();
      while (x.hasNextLine()) {
       String line = x.nextLine();
       String[] lineWords = line.split("[\t+;.,“”‘’?!
    +]");
       Set<String> wordSet = hashMap.keySet();
       for (int i = 0; i < lineWords.length; i++) {
        if (wordSet.contains(lineWords[i])) {
         Integer number = hashMap.get(lineWords[i]);
         number++;
         hashMap.put(lineWords[i], number);
        } else {
         hashMap.put(lineWords[i], 1);
        }
       }
      }
      Iterator<String> iterator = hashMap.keySet().iterator();
      while (iterator.hasNext()) {
       String word = iterator.next();
       if(useless(word)) {
        System.out.println(word);
       }
      }
     }
     public static void main(String[] args) throws FileNotFoundException {
      recognize();
     }
    }
  • 相关阅读:
    2019年江苏大学885编程大题
    2018年江苏大学885编程题
    python-类和对象
    unity游戏框架学习-登录模块
    unity游戏框架学习-AssetBundle
    记 Firebase Crashlytics 接入遇到的坑
    c# 枚举Enum
    unity性能优化-UGUI
    unity性能优化-GPU
    unity性能优化-CPU
  • 原文地址:https://www.cnblogs.com/adret/p/11070468.html
Copyright © 2011-2022 走看看