zoukankan      html  css  js  c++  java
  • 单词统计

    1统计字母出现的概率(不分大小写)

    import java.io.BufferedReader;
    import java.io.FileReader;
    import java.io.IOException;
    import java.text.DecimalFormat;
    import java.text.NumberFormat;
    
    public class a {
    
        private static NumberFormat nf = new DecimalFormat("0.00");
    
        public static void main(String[] args) throws IOException {
            BufferedReader br = new BufferedReader(new FileReader("f:/piao.txt"));
            StringBuilder sb = new StringBuilder();
            while (true) {
                String line = br.readLine();
                if (line == null)
                    break;
                sb.append(line);
            }
            br.close();
            int[] characters = new int[128];
            for (byte bt : sb.toString().getBytes())
                if (bt > 0 && bt < 128)
                    characters[bt]++;
    
            int totalCount = 0;
            for (int i = 'a'; i <= 'z'; i++) {
                totalCount += (characters[i] + characters[i - 32]);
            }
            double c;
            
          
    
            for (int i = 'a'; i <= 'z'; i++) {
                c=(double)(Math.round((characters[i] + characters[i - 32]) * 100 / totalCount)/100.0);
            
                System.out.print((char) i + "=" + (characters[i] + characters[i - 32]) + "(" +c + "),");
                 
            }
    
        }
    }

    截图:

    2统计单词出现的次数

    import java.io.BufferedReader;
    import java.io.FileReader;
    import java.util.*;
    
    
    public class b {
        public static void main(String [] args) throws Exception {
        	
            BufferedReader br = new BufferedReader(new FileReader("f:/飘c1.txt"));
    
            StringBuffer sb = new StringBuffer();
            String text =null;
            while ((text=br.readLine())!= null){
                sb.append(text);// 将读取出的字符追加到stringbuffer中
            }
            br.close();  // 关闭读入流
    
            String str = sb.toString().toLowerCase(); // 将stringBuffer转为字符并转换为小写
            String[] words = str.split("[^(a-zA-Z)]+");  // 非单词的字符来分割,得到所有单词
            Map<String ,Integer> map = new HashMap<String, Integer>() ;
    
            for(String word :words){
                if(map.get(word)==null ){  // 若不存在说明是第一次,则加入到map,出现次数为1
                    map.put(word,1);
                }else
                {
                    map.put(word,map.get(word)+1);  // 若存在,次数累加1
                }
            }
    
            // 排序
            List<Map.Entry<String ,Integer>> list = new ArrayList<Map.Entry<String,Integer>>(map.entrySet());
    
            Comparator<Map.Entry<String,Integer>> comparator = new Comparator<Map.Entry<String, Integer>>() {
                public int compare(Map.Entry<String, Integer> left, Map.Entry<String, Integer> right) {
                    return (left.getValue().compareTo(right.getValue()));
                }
            };
            // 集合默认升序升序
            Collections.sort(list,comparator);
    
            for(int i=0;i<list.size();i++){// 由高到低输出
                System.out.println(list.get(list.size()-i-1).getKey() +":"+list.get(list.size()-i-1).getValue());
            }
    
        }
    }
    

      截图

    3去除无用词

    public class English_word {
     
     
     public static void main(String[] args) throws FileNotFoundException {
      File file = new File("D:\Englis_letter.txt");// 读取文件
      String words[] = new String [100000];
      int out_words[] = new int [100000];
      if (!file.exists()) {// 如果文件打不开或不存在则提示错误
       System.out.println("文件不存在");
       return;
      }
      Scanner x = new Scanner(file);
      HashMap<String, Integer> hashMap = new HashMap<String, Integer>();
      while (x.hasNextLine()) {
       String line = x.nextLine();
       String[] lineWords = line.split("[\s+	”“();,.?!
    ]");
       Set<String> wordSet = hashMap.keySet();
       for (int i = 0; i < lineWords.length; i++) {
        if (wordSet.contains(lineWords[i])) {
         Integer number = hashMap.get(lineWords[i]);
         number++;
         hashMap.put(lineWords[i], number);
        } else {
         hashMap.put(lineWords[i], 1);
        }
       }
      }
      Iterator<String> iterator = hashMap.keySet().iterator();
      int max = 0,i=0;
      while (iterator.hasNext()) {
       String word = iterator.next();
       if(!"".equals(word)&&word!=null&&!"a".equals(word)&&!"the".equals(word)&&!"  ".equals(word)) {
        System.out.println(word);
        words[i]=word;
        out_words[i]=hashMap.get(word);
        i++;
       }
      }
      int change=0;
      String change_word=null;
      for(int j=0;j<=i;j++)
      {
       for(int k=j;k<=i;k++)
       {
        if(out_words[k]>out_words[j])
        {
         change=out_words[j];
         change_word=words[j];
         out_words[j]=out_words[k];
         words[j]=words[k];
         out_words[k]=change;
         words[k]=change_word;
        }
        
       }
      }
      Scanner scan = new Scanner(System.in);
      int ms = scan.nextInt();
      for(int j=0;j<ms;j++)
      {
       System.out.println(words[j]+" 出现次数:"+out_words[j]);
      }
      
     }
    

      

    4遍历文件统计

    public class test {
     
        static String words[] = new String [100000];
        static   int out_words[] = new int [100000];
        static int i=0;
        static HashMap<String, Integer> hashMap = new HashMap<String, Integer>();
     
     public static void English_words(File ms) throws FileNotFoundException {
      File file = new File(ms.toString());// 读取文件
      if (!file.exists()) {// 如果文件打不开或不存在则提示错误
       System.out.println("文件不存在");
       return;
      }
      Scanner x = new Scanner(file);
      while (x.hasNextLine()) {
       String line = x.nextLine();
       String[] lineWords = line.split("[\s+	”“();,.?!
    ]");
       Set<String> wordSet = hashMap.keySet();
       for (int i = 0; i < lineWords.length; i++) {
        if (wordSet.contains(lineWords[i])) {
         Integer number = hashMap.get(lineWords[i]);
         number++;
         hashMap.put(lineWords[i], number);
        } else {
         hashMap.put(lineWords[i], 1);
        }
       }
      }
      
      
      
     }
     
     public static void main(String[] args) throws FileNotFoundException {
      String path = "d:/";
      File file = new File(path);
      File[] tempList = file.listFiles();
      for (int i = 0; i < tempList.length; i++) {
       if (tempList[i].toString().endsWith("txt")) {
        System.out.println("文     件:" + tempList[i]);
        English_words(tempList[i]);
       }
      }
      Iterator<String> iterator = hashMap.keySet().iterator();
      int max = 0;
      while (iterator.hasNext()) {
       String word = iterator.next();
       if(!"".equals(word)&&word!=null&&!"a".equals(word)&&!"the".equals(word)&&!"  ".equals(word)) {
        words[i]=word;
        out_words[i]=hashMap.get(word);
        i++;
       }
      }
      int change=0;
      String change_word=null;
      for(int j=0;j<=i;j++)
      {
       for(int k=j;k<=i;k++)
       {
        if(out_words[k]>out_words[j])
        {
         change=out_words[j];
         change_word=words[j];
         out_words[j]=out_words[k];
         words[j]=words[k];
         out_words[k]=change;
         words[k]=change_word;
        }
        
       }
      }
      Scanner scan = new Scanner(System.in);
      int ms = scan.nextInt();
      for(int j=0;j<ms;j++)
      {
       System.out.println(words[j]+" 出现次数:"+out_words[j]);
      }
      
     }
    }
  • 相关阅读:
    密钥学习
    MAP的计算方法(简单总结)
    模型量化技术(入门级理解,不涉及复杂公式和深入的原理)
    实现java非阻塞http请求的两种方式
    PIP安装软件报错:“ERROR: Could not install packages due to an EnvironmentError: HTTPSConnectionPool(host='files.pythonhosted.org', port=443)”
    Fiddler弱网测试
    Fiddler断点应用
    Fiddler基本介绍
    Fiddler安装及证书配置教程(Windows)
    URL统一资源定位符
  • 原文地址:https://www.cnblogs.com/xuange1/p/10994422.html
Copyright © 2011-2022 走看看