zoukankan      html  css  js  c++  java
  • 单词统计

    这次的实验测试分为很多个小部分,由于个人能力有限,我只完成了前三个部分。其中第一个是统计文本中26个英文字幕出现的次数与比例,并降序排序:是自行确定欠多少个最多出现的单词

    package piao;
    
    import java.io.BufferedReader;
    import java.io.FileReader;
    import java.text.NumberFormat;
    
    public class text0{
        public static void main(String[] args) throws Exception {
            BufferedReader br = new BufferedReader(new FileReader("D:\java/eclipse/测试/piao.txt"));
            int[] count  = new int[26];
            char[] c = new char[1];
            int len = br.read(c);
            while(len!=-1) {
                if(c[0]>='A'&&c[0]<='Z') {
                    int number = c[0];
                    count[number-65]++;
                }
                if(c[0]>='a'&&c[0]<='z') {
                    int number = c[0];
                    count[number-97]++;
                }
                len = br.read(c);
            }
            count=Paixu(count);
            Print(count);
            br.close();    
        }
        public static int[] Paixu(int[] count) {
            int temp;
            int size=count.length;
            for(int i=0;i<size-1;i++) {
                for(int j=i+1;j<size;j++) {
                    if(count[i]<count[j]){
                        temp=count[j];
                        count[j]=count[i];
                        count[i]=temp;
                    }
                }
            }
            return count;    
        }
        public static void Print(int[] count) {
            NumberFormat numberFormat = NumberFormat.getInstance();     
            // 设置精确到小数点后2位
            numberFormat.setMaximumFractionDigits(2);
            int sum=0;
            for(int i=0;i<count.length;i++) {
                sum=count[i]+sum;
            }
            String[] a=new String[count.length];
            for(int i=0;i<count.length;i++) {
                 a[i] = numberFormat.format((float) count[i] / (float) sum * 100);
            }
            for(int i=0;i<26;i++) {
                if(count[i]>0) {
                    char lowerCase = (char)(i+97);
                    System.out.println(lowerCase+"("+count[i]+")"+"("+a[i]+"%)");
                }
            }    
        }
    }

    第二部分是统计所有单词出现的次数并降序排序:

    package piao;
    
    import java.io.BufferedReader;
    import java.io.FileReader;
    import java.util.ArrayList;
    import java.util.Collections;
    import java.util.Comparator;
    import java.util.List;
    import java.util.Map;
    import java.util.TreeMap;
    import java.util.regex.Matcher;
    import java.util.regex.Pattern;
    
    public class text1 {     
        public static void main(String[] args) throws Exception {
            BufferedReader re = new BufferedReader(new FileReader("D:\java/eclipse/测试/piao.txt"));
            StringBuffer buffer = new StringBuffer();
            String line = null;
            while ((line = re.readLine()) != null) {
                  buffer.append(line);
            }
            re.close();
            Pattern expression = Pattern.compile("[a-zA-Z]+");// 定义正则表达式匹配单词
            String string = buffer.toString();
            Matcher matcher = expression.matcher(string);
            Map<String, Integer> map = new TreeMap<String, Integer>();
            String word = "";
            int times = 0;
            while (matcher.find()) {// 是否匹配单词
                word = matcher.group();// 得到一个单词-树映射的键
                if (map.containsKey(word)) {// 如果包含该键,单词出现过
                    times = map.get(word);// 得到单词出现的次数
                    map.put(word, times + 1);
                } 
                else {
                    map.put(word, 1);// 否则单词第一次出现,添加到映射中
                }
            }
            List<Map.Entry<String, Integer>>list = new ArrayList<Map.Entry<String,Integer>>(map.entrySet());
            Collections.sort(list, new Comparator<Map.Entry<String, Integer>>(){// 排序,打印
                public int compare(Map.Entry<String, Integer> left,Map.Entry<String, Integer> right) {
                        return (left.getValue()).compareTo(right.getValue());
                    }
                });
            int last = list.size() - 1;
            for (int i = last; i > 0; i--) {
                String key = list.get(i).getKey();
                Integer value = list.get(i).getValue();
                System.out.println(key + " :" + value);
            }
        }
    }

    第三部分是自行确定欠多少个最多出现的单词:

    package piao;
    
    import java.io.BufferedReader;
    import java.io.FileReader;
    import java.util.ArrayList;
    import java.util.Collections;
    import java.util.Comparator;
    import java.util.List;
    import java.util.Map;
    import java.util.Scanner;
    import java.util.TreeMap;
    import java.util.regex.Matcher;
    import java.util.regex.Pattern;
    
    public class text2 {     
        public static void main(String[] args) throws Exception {
            BufferedReader re = new BufferedReader(new FileReader("D:\java/eclipse/测试/piao.txt"));
            StringBuffer buffer = new StringBuffer();
            String line = null;
            while ((line = re.readLine()) != null) {
                  buffer.append(line);
            }
            re.close();
            Pattern expression = Pattern.compile("[a-zA-Z]+");// 定义正则表达式匹配单词
            String string = buffer.toString();
            Matcher matcher = expression.matcher(string);
            Map<String, Integer> map = new TreeMap<String, Integer>();
            String word = "";
            int times = 0;
            while (matcher.find()) {// 是否匹配单词
                word = matcher.group();// 得到一个单词-树映射的键
                if (map.containsKey(word)) {// 如果包含该键,单词出现过
                    times = map.get(word);// 得到单词出现的次数
                    map.put(word, times + 1);
                } 
                else {
                    map.put(word, 1);// 否则单词第一次出现,添加到映射中
                }
            }
            List<Map.Entry<String, Integer>>list = new ArrayList<Map.Entry<String,Integer>>(map.entrySet());
            Collections.sort(list, new Comparator<Map.Entry<String, Integer>>(){// 排序,打印
                public int compare(Map.Entry<String, Integer> left,Map.Entry<String, Integer> right) {
                        return (left.getValue()).compareTo(right.getValue());
                    }
                });
            @SuppressWarnings("resource")
            Scanner in=new Scanner(System.in);
            System.out.println("输入前n个最常出现的单词:");
            int n=in.nextInt();
            int last = list.size() - 1;
            for (int i = last; i > last - n; i--) {
                String key = list.get(i).getKey();
                Integer value = list.get(i).getValue();
                System.out.println(key + " :" + value);
            }
        }
    }
  • 相关阅读:
    LeetCode——Generate Parentheses
    LeetCode——Best Time to Buy and Sell Stock IV
    LeetCode——Best Time to Buy and Sell Stock III
    LeetCode——Best Time to Buy and Sell Stock
    LeetCode——Find Minimum in Rotated Sorted Array
    Mahout实现基于用户的协同过滤算法
    使用Java对文件进行解压缩
    LeetCode——Convert Sorted Array to Binary Search Tree
    LeetCode——Missing Number
    LeetCode——Integer to Roman
  • 原文地址:https://www.cnblogs.com/yuanxiaochou/p/11065633.html
Copyright © 2011-2022 走看看