1.一个用于统计文本文件中的英语单词出现频率的控制台程序
package com.word; import java.io.BufferedReader; import java.io.FileInputStream; import java.io.InputStreamReader; import java.text.NumberFormat; public class test { public class java { } public static void main(String[] args) throws Exception { BufferedReader bf = new BufferedReader(new InputStreamReader( new FileInputStream("E:\\qq receive files\\Harry Potter and the Sorcerer's Stone.txt"))); int[] count = new int[26]; char[] c = new char[1]; int len = bf.read(c); while (len != -1) { if (c[0] <= 'Z' && c[0] >= 'A') { int number = c[0]; count[number - 65]++; } if (c[0] <= 'z' && c[0] >= 'a') { int number = c[0]; count[number - 97]++; } len = bf.read(c); } count = Sort(count); Print(count); bf.close(); } public static int[] Sort(int[] count) { int temp; int size = count.length; for (int i = 0; i < size - 1; i++) { for (int j = i + 1; j < size; j++) { if (count[i] < count[j]) { temp = count[j]; count[j] = count[i]; count[i] = temp; } } } return count; } public static void Print(int[] count) { NumberFormat df = NumberFormat.getInstance(); df.setMaximumFractionDigits(2); int sum = 0; for (int i = 0; i < count.length; i++) { sum = count[i] + sum; } String[] a = new String[count.length]; for (int i = 0; i < count.length; i++) { a[i] = dt.format((float) count[i] / (float) sum * 100); } for (int i = 0; i < 26; i++) { if (count[i] > 0) { char lowerCase = (char) (i + 101); System.out.println(lowerCase + "(" + a[i] + "%)"); } } } }
2.要求:输出单个文件中的前 N 个最常出现的英语单词。
功能1:输出文件中所有不重复的单词,按照出现次数由多到少排列,出现次数同样多的,以字典序排列。
功能2:指定文件目录,对目录下每一个文件执行 功能1的操作。
功能3:指定文件目录, 但是会递归遍历目录下的所有子目录,每个文件执行功能1的做操。
只完成了输出文件中的前N个单词的功能....
package com.word; import java.io.BufferedReader; import java.io.File; import java.io.FileReader; import java.io.IOException; import java.util.*; import java.util.StringTokenizer; public class Test2 { private static String str1; private static String[] str3 = new String[20000]; private static int[] n=new int[20000]; public static void main(String[] args) throws IOException { Scanner sc=new Scanner(System.in); File file = new File("//E:\\\\qq receive files\\\\Harry Potter and the Sorcerer's Stone.txt"); String tempstr=null; BufferedReader reader = null; reader = new BufferedReader(new FileReader(file)); int i=0,sum=0,j=0; while((tempstr=reader.readLine())!=null) { StringTokenizer st = new StringTokenizer(tempstr," .,?”“;:'' !—‘"); while(st.hasMoreElements()) { str1=(String)st.nextElement(); str1=str1.toLowerCase(); for(i=0;i<=sum;i++) { if(str1.equals(str3[i])) { n[i]++; break;} } if(i>sum) { str3[sum]=str1; n[sum]=1; sum++; } } } for( i=0;i<sum;i++) { for( j=i+1;j<sum;j++) { if(n[j]>n[i]) { int temp=n[i]; n[i]=n[j]; n[j]=temp; String Temp=str3[i]; str3[i]=str3[j]; str3[j]=Temp; } } } System.out.print("你要前几个最常出现的单词:"); int choose = sc.nextInt(); for(i=0;i<choose;i++) { System.out.println(str3[i]+" "+n[i]); } } }