这次的实验测试分为很多个小部分,由于个人能力有限,我只完成了前三个部分。其中第一个是统计文本中26个英文字幕出现的次数与比例,并降序排序:是自行确定欠多少个最多出现的单词
package piao; import java.io.BufferedReader; import java.io.FileReader; import java.text.NumberFormat; public class text0{ public static void main(String[] args) throws Exception { BufferedReader br = new BufferedReader(new FileReader("D:\java/eclipse/测试/piao.txt")); int[] count = new int[26]; char[] c = new char[1]; int len = br.read(c); while(len!=-1) { if(c[0]>='A'&&c[0]<='Z') { int number = c[0]; count[number-65]++; } if(c[0]>='a'&&c[0]<='z') { int number = c[0]; count[number-97]++; } len = br.read(c); } count=Paixu(count); Print(count); br.close(); } public static int[] Paixu(int[] count) { int temp; int size=count.length; for(int i=0;i<size-1;i++) { for(int j=i+1;j<size;j++) { if(count[i]<count[j]){ temp=count[j]; count[j]=count[i]; count[i]=temp; } } } return count; } public static void Print(int[] count) { NumberFormat numberFormat = NumberFormat.getInstance(); // 设置精确到小数点后2位 numberFormat.setMaximumFractionDigits(2); int sum=0; for(int i=0;i<count.length;i++) { sum=count[i]+sum; } String[] a=new String[count.length]; for(int i=0;i<count.length;i++) { a[i] = numberFormat.format((float) count[i] / (float) sum * 100); } for(int i=0;i<26;i++) { if(count[i]>0) { char lowerCase = (char)(i+97); System.out.println(lowerCase+"("+count[i]+")"+"("+a[i]+"%)"); } } } }
第二部分是统计所有单词出现的次数并降序排序:
package piao; import java.io.BufferedReader; import java.io.FileReader; import java.util.ArrayList; import java.util.Collections; import java.util.Comparator; import java.util.List; import java.util.Map; import java.util.TreeMap; import java.util.regex.Matcher; import java.util.regex.Pattern; public class text1 { public static void main(String[] args) throws Exception { BufferedReader re = new BufferedReader(new FileReader("D:\java/eclipse/测试/piao.txt")); StringBuffer buffer = new StringBuffer(); String line = null; while ((line = re.readLine()) != null) { buffer.append(line); } re.close(); Pattern expression = Pattern.compile("[a-zA-Z]+");// 定义正则表达式匹配单词 String string = buffer.toString(); Matcher matcher = expression.matcher(string); Map<String, Integer> map = new TreeMap<String, Integer>(); String word = ""; int times = 0; while (matcher.find()) {// 是否匹配单词 word = matcher.group();// 得到一个单词-树映射的键 if (map.containsKey(word)) {// 如果包含该键,单词出现过 times = map.get(word);// 得到单词出现的次数 map.put(word, times + 1); } else { map.put(word, 1);// 否则单词第一次出现,添加到映射中 } } List<Map.Entry<String, Integer>>list = new ArrayList<Map.Entry<String,Integer>>(map.entrySet()); Collections.sort(list, new Comparator<Map.Entry<String, Integer>>(){// 排序,打印 public int compare(Map.Entry<String, Integer> left,Map.Entry<String, Integer> right) { return (left.getValue()).compareTo(right.getValue()); } }); int last = list.size() - 1; for (int i = last; i > 0; i--) { String key = list.get(i).getKey(); Integer value = list.get(i).getValue(); System.out.println(key + " :" + value); } } }
第三部分是自行确定欠多少个最多出现的单词:
package piao; import java.io.BufferedReader; import java.io.FileReader; import java.util.ArrayList; import java.util.Collections; import java.util.Comparator; import java.util.List; import java.util.Map; import java.util.Scanner; import java.util.TreeMap; import java.util.regex.Matcher; import java.util.regex.Pattern; public class text2 { public static void main(String[] args) throws Exception { BufferedReader re = new BufferedReader(new FileReader("D:\java/eclipse/测试/piao.txt")); StringBuffer buffer = new StringBuffer(); String line = null; while ((line = re.readLine()) != null) { buffer.append(line); } re.close(); Pattern expression = Pattern.compile("[a-zA-Z]+");// 定义正则表达式匹配单词 String string = buffer.toString(); Matcher matcher = expression.matcher(string); Map<String, Integer> map = new TreeMap<String, Integer>(); String word = ""; int times = 0; while (matcher.find()) {// 是否匹配单词 word = matcher.group();// 得到一个单词-树映射的键 if (map.containsKey(word)) {// 如果包含该键,单词出现过 times = map.get(word);// 得到单词出现的次数 map.put(word, times + 1); } else { map.put(word, 1);// 否则单词第一次出现,添加到映射中 } } List<Map.Entry<String, Integer>>list = new ArrayList<Map.Entry<String,Integer>>(map.entrySet()); Collections.sort(list, new Comparator<Map.Entry<String, Integer>>(){// 排序,打印 public int compare(Map.Entry<String, Integer> left,Map.Entry<String, Integer> right) { return (left.getValue()).compareTo(right.getValue()); } }); @SuppressWarnings("resource") Scanner in=new Scanner(System.in); System.out.println("输入前n个最常出现的单词:"); int n=in.nextInt(); int last = list.size() - 1; for (int i = last; i > last - n; i--) { String key = list.get(i).getKey(); Integer value = list.get(i).getValue(); System.out.println(key + " :" + value); } } }