zoukankan      html  css  js  c++  java
  • 单词统计

    一、读取文本中英文字母出现的次数并降序输出英文字母的百分比

    源码;

    复制代码
    package total;
    
    import java.io.BufferedReader;
    import java.io.FileReader;
    import java.io.IOException;
    import java.util.Arrays;
    
    public class Statistics_letter {
    
        public static void main(String[] args) throws IOException  {
            // TODO Auto-generated method stub
            FileReader fr=new FileReader("a.txt");
            BufferedReader bufr=new BufferedReader(fr);
            StringBuffer str=new StringBuffer();
            String Line=null;
            while((Line=bufr.readLine())!=null) {
                str.append(Line);
            }
            bufr.close();
            
            double capitalletter[]=new double[26];
            double lowercaseletter[]=new double[26];
            int count=0;
            for(int i=0;i<str.length();i++) {
                char ch=str.charAt(i);
                if(ch>='A'&&ch<='Z'||ch>='a'&&ch<='z') {
                    for(int j=0;j<26;j++) {
                        if(ch=='A'+j)
                        capitalletter[j]++;
                    }
                    for(int k=0;k<26;k++) {
                        if(ch=='a'+k)
                            lowercaseletter[k]++;
                    }
                    count++;    
                }
            }
            
            double percentage1[]=new double[52];
            double percentage2[]=new double[52];
            for(int i=0;i<26;i++) {
                percentage1[i]=capitalletter[i]/count;
                percentage2[i]=percentage1[i];
            }
            for(int i=26;i<52;i++) {
                percentage1[i]=lowercaseletter[i-26]/count;
                percentage2[i]=percentage1[i];
            }
            Arrays.sort(percentage1);
            for(int i=51;i>=0;i--) {
                int max=0;
                for(int j=0;j<52;j++) {
                    if(percentage2[j]==percentage1[i])
                        max=j;
                }
                if(max>=26)
                    System.out.print(((char)('a'+max-26))+":");
                else
                System.out.print(((char)('A'+max))+":");
                System.out.println(String.format("%.2f",percentage1[i]*100)+'%');
            }
            System.out.println("英文字母总数为:"+count);
        }
    }
    复制代码

    运行结果截图:

     

    二、读取文本中的英文单词并按出现次数降序输出结果

    源码:

    复制代码
    package total;
    
    import java.io.File;
    import java.io.FileInputStream;
    import java.io.FileOutputStream;
    import java.io.IOException;
    import java.io.InputStreamReader;
    import java.io.OutputStreamWriter;
    import java.util.HashMap;
    import java.util.Map;
    
    
    public class Statistics_words {
        public Map<String, Integer> map1 = new HashMap<String, Integer>();
    
        public static void main(String arg[]) throws IOException {
            String sz[];
            Integer num[];
            final int MAXNUM = 20; 
    
            sz = new String[MAXNUM + 1];
            num = new Integer[MAXNUM + 1];
            Statistics_words Statistics_words = new Statistics_words();
            int account = 1;
            // Vector<String> ve1=new Vector<String>();
                Statistics_words.textImport();
            System.out.println("文本出现单词的次数情况为:");
            int g_run = 0;
    
            for (g_run = 0; g_run < MAXNUM + 1; g_run++) {
                account = 1;
                for (Map.Entry<String, Integer> it : Statistics_words.map1.entrySet()) {
                    if (account == 1) {
                        sz[g_run] = it.getKey();
                        num[g_run] = it.getValue();
                        account = 2;
                    }
                    if (account == 0) {
                        account = 1;
                        continue;
                    }
                    if (num[g_run] < it.getValue()) {
                        sz[g_run] = it.getKey();
                        num[g_run] = it.getValue();
                    }
                    // System.out.println("英文单词: "+it.getKey()+" 该英文单词出现次数: "+it.getValue());
                }
                Statistics_words.map1.remove(sz[g_run]);
            }
            int g_count = 1;
            String tx1 = new String();
            for (int i = 0; i < g_run; i++) {
                if (sz[i] == null)
                    continue;
                if (sz[i].equals(""))
                    continue;
                tx1 += "出现次数第" + (g_count) + "多的单词为:" + sz[i] + "			出现次数: " + num[i] + "
    ";
                System.out.println("出现次数第" + (g_count) + "多的单词为:" + sz[i] + "			出现次数: " + num[i]);
                g_count++;
            }
                Statistics_words.textExport(tx1);
    
        }
    
        public void textImport() throws IOException {
    
            File a = new File("C:\Users\22400\Desktop\a.txt");
            FileInputStream b = new FileInputStream(a);
            InputStreamReader c = new InputStreamReader(b, "UTF-8");
            String string2 = new String();
            while (c.ready()) {
                char string1 = (char) c.read();
                if (!isWord(string1)) {
                    if (map1.containsKey(string2)) {
                        Integer num1 = map1.get(string2) + 1;
                        map1.put(string2, num1);
                    } else {
                        Integer num1 = 1;
                        map1.put(string2, num1);
                    }
                    string2 = "";
                } else {
                    string2 += string1;
                }
            }
            if (!string2.isEmpty()) {
                if (map1.containsKey(string2)) {
                    Integer num1 = map1.get(string2) + 1;
                    map1.put(string2, num1);
                } else {
                    Integer num1 = 1;
                    map1.put(string2, num1);
                }
                string2 = "";
            }
            c.close();
            b.close();
        }
    
        public void textExport(String txt) throws IOException {
            File fi = new File("StatisticsWord.txt");
            FileOutputStream fop = new FileOutputStream(fi);
            OutputStreamWriter ops = new OutputStreamWriter(fop, "UTF-8");
            ops.append(txt);
            ops.close();
            fop.close();
        }
    
        public boolean isWord(char a) {
            if (a <= 'z' && a >= 'a' || a <= 'Z' && a >= 'A')
                return true;
            return false;
        }
    
    }
    复制代码

  • 相关阅读:
    android存储訪问框架Storage Access Framework
    hdu 5338 ZZX and Permutations (贪心+线段树+二分)
    集成CCFlow工作流与GPM的办公系统驰骋CCOA介绍(三)
    PHP中文分词扩展 SCWS
    使用docker 搭建基础的 mysql 应用
    UVA 11090 Going in Cycle!!(Bellman-Ford推断负圈)
    HDU 5237 Base64
    Android 自己定义主菜单
    HDU 1018 Big Number 数学题解
    python经常使用的十进制、16进制、字符串、字节串之间的转换(长期更新帖)
  • 原文地址:https://www.cnblogs.com/wwbzuiku/p/13089944.html
Copyright © 2011-2022 走看看