zoukankan      html  css  js  c++  java
  • 单词统计

    这次的实验测试分为很多个小部分,由于个人能力有限,我只完成了前三个部分。其中第一个是统计文本中26个英文字幕出现的次数与比例,并降序排序:是自行确定欠多少个最多出现的单词

    package piao;
    
    import java.io.BufferedReader;
    import java.io.FileReader;
    import java.text.NumberFormat;
    
    public class text0{
        public static void main(String[] args) throws Exception {
            BufferedReader br = new BufferedReader(new FileReader("D:\java/eclipse/测试/piao.txt"));
            int[] count  = new int[26];
            char[] c = new char[1];
            int len = br.read(c);
            while(len!=-1) {
                if(c[0]>='A'&&c[0]<='Z') {
                    int number = c[0];
                    count[number-65]++;
                }
                if(c[0]>='a'&&c[0]<='z') {
                    int number = c[0];
                    count[number-97]++;
                }
                len = br.read(c);
            }
            count=Paixu(count);
            Print(count);
            br.close();    
        }
        public static int[] Paixu(int[] count) {
            int temp;
            int size=count.length;
            for(int i=0;i<size-1;i++) {
                for(int j=i+1;j<size;j++) {
                    if(count[i]<count[j]){
                        temp=count[j];
                        count[j]=count[i];
                        count[i]=temp;
                    }
                }
            }
            return count;    
        }
        public static void Print(int[] count) {
            NumberFormat numberFormat = NumberFormat.getInstance();     
            // 设置精确到小数点后2位
            numberFormat.setMaximumFractionDigits(2);
            int sum=0;
            for(int i=0;i<count.length;i++) {
                sum=count[i]+sum;
            }
            String[] a=new String[count.length];
            for(int i=0;i<count.length;i++) {
                 a[i] = numberFormat.format((float) count[i] / (float) sum * 100);
            }
            for(int i=0;i<26;i++) {
                if(count[i]>0) {
                    char lowerCase = (char)(i+97);
                    System.out.println(lowerCase+"("+count[i]+")"+"("+a[i]+"%)");
                }
            }    
        }
    }

    第二部分是统计所有单词出现的次数并降序排序:

    package piao;
    
    import java.io.BufferedReader;
    import java.io.FileReader;
    import java.util.ArrayList;
    import java.util.Collections;
    import java.util.Comparator;
    import java.util.List;
    import java.util.Map;
    import java.util.TreeMap;
    import java.util.regex.Matcher;
    import java.util.regex.Pattern;
    
    public class text1 {     
        public static void main(String[] args) throws Exception {
            BufferedReader re = new BufferedReader(new FileReader("D:\java/eclipse/测试/piao.txt"));
            StringBuffer buffer = new StringBuffer();
            String line = null;
            while ((line = re.readLine()) != null) {
                  buffer.append(line);
            }
            re.close();
            Pattern expression = Pattern.compile("[a-zA-Z]+");// 定义正则表达式匹配单词
            String string = buffer.toString();
            Matcher matcher = expression.matcher(string);
            Map<String, Integer> map = new TreeMap<String, Integer>();
            String word = "";
            int times = 0;
            while (matcher.find()) {// 是否匹配单词
                word = matcher.group();// 得到一个单词-树映射的键
                if (map.containsKey(word)) {// 如果包含该键,单词出现过
                    times = map.get(word);// 得到单词出现的次数
                    map.put(word, times + 1);
                } 
                else {
                    map.put(word, 1);// 否则单词第一次出现,添加到映射中
                }
            }
            List<Map.Entry<String, Integer>>list = new ArrayList<Map.Entry<String,Integer>>(map.entrySet());
            Collections.sort(list, new Comparator<Map.Entry<String, Integer>>(){// 排序,打印
                public int compare(Map.Entry<String, Integer> left,Map.Entry<String, Integer> right) {
                        return (left.getValue()).compareTo(right.getValue());
                    }
                });
            int last = list.size() - 1;
            for (int i = last; i > 0; i--) {
                String key = list.get(i).getKey();
                Integer value = list.get(i).getValue();
                System.out.println(key + " :" + value);
            }
        }
    }

    第三部分是自行确定欠多少个最多出现的单词:

    package piao;
    
    import java.io.BufferedReader;
    import java.io.FileReader;
    import java.util.ArrayList;
    import java.util.Collections;
    import java.util.Comparator;
    import java.util.List;
    import java.util.Map;
    import java.util.Scanner;
    import java.util.TreeMap;
    import java.util.regex.Matcher;
    import java.util.regex.Pattern;
    
    public class text2 {     
        public static void main(String[] args) throws Exception {
            BufferedReader re = new BufferedReader(new FileReader("D:\java/eclipse/测试/piao.txt"));
            StringBuffer buffer = new StringBuffer();
            String line = null;
            while ((line = re.readLine()) != null) {
                  buffer.append(line);
            }
            re.close();
            Pattern expression = Pattern.compile("[a-zA-Z]+");// 定义正则表达式匹配单词
            String string = buffer.toString();
            Matcher matcher = expression.matcher(string);
            Map<String, Integer> map = new TreeMap<String, Integer>();
            String word = "";
            int times = 0;
            while (matcher.find()) {// 是否匹配单词
                word = matcher.group();// 得到一个单词-树映射的键
                if (map.containsKey(word)) {// 如果包含该键,单词出现过
                    times = map.get(word);// 得到单词出现的次数
                    map.put(word, times + 1);
                } 
                else {
                    map.put(word, 1);// 否则单词第一次出现,添加到映射中
                }
            }
            List<Map.Entry<String, Integer>>list = new ArrayList<Map.Entry<String,Integer>>(map.entrySet());
            Collections.sort(list, new Comparator<Map.Entry<String, Integer>>(){// 排序,打印
                public int compare(Map.Entry<String, Integer> left,Map.Entry<String, Integer> right) {
                        return (left.getValue()).compareTo(right.getValue());
                    }
                });
            @SuppressWarnings("resource")
            Scanner in=new Scanner(System.in);
            System.out.println("输入前n个最常出现的单词:");
            int n=in.nextInt();
            int last = list.size() - 1;
            for (int i = last; i > last - n; i--) {
                String key = list.get(i).getKey();
                Integer value = list.get(i).getValue();
                System.out.println(key + " :" + value);
            }
        }
    }
  • 相关阅读:
    java 中 this 和 super 说明及在构造器中super()和this()相互调用执行顺序
    Java中get()方法和set()方法如何使用?
    java中如何在键盘中输入一串数字然后存入数组中?
    修改阿里云服务器主机名称
    flask第一个页面
    爬取汽车之家新闻的数据
    冒泡排序
    爬取简书
    Mac Flask-Migrate 安装出现错误
    C++__ 判断和循环
  • 原文地址:https://www.cnblogs.com/yuanxiaochou/p/11065633.html
Copyright © 2011-2022 走看看