获取文章中各个字母个数的代码
1 package text1;
2 import java.io.File;
3 import java.io.FileReader;
4 import java.io.IOException;
5 import java.text.DecimalFormat;
6 public class Text{ 7
8 public static void main(String[] args) { 9 double num=0; 10 int[] freArray = new int[52]; 11 char[] charArray = loadCharArrayFromFileName("D://article//Harry Potter and the Sorcerer's Stone.txt"); 12 computeFrequency(freArray, charArray); 13 for(int i = 0; i < freArray.length; i++) { 14 num+=freArray[i]; 15 } 16 System.out.println(num); 17 DecimalFormat df = new DecimalFormat("######0.00"); 18 for (int i = 0; i < freArray.length/2; i++) { 19 System.out.println(((char) ('a' + i)) + ":" + freArray[i]+" "+df.format((100*freArray[i])/num)+"%"); 20 } 21 for (int i = 26; i < freArray.length; i++) { 22 System.out.println(((char) ('A' + i-26)) + ":" + freArray[i]+" "+df.format((100*freArray[i])/num)+"%"); 23 24 } 25 }26 public static void computeFrequency(int[] freArray, char[] charArray) { 27 for (int i = 0; i < charArray.length; i++) { 28 if (charArray[i] >= 'A' && charArray[i] <= 'Z') { 29 freArray[charArray[i] - 'A'+26]++; 30 } 31 if (charArray[i] >= 'a' && charArray[i] <= 'z') { 32 freArray[charArray[i] - 'a']++; 33 } 34 } 35 } 36 public static char[] loadCharArrayFromFileName(String name) { 37 char[] charArray = new char[5000000]; 38 File file = new File(name); 39 FileReader fr = null; 40 try { 41 fr = new FileReader(file); 42 fr.read(charArray); 43 return charArray; 44 }
catch (IOException e) {
45 e.printStackTrace(); 46 } finally { 47 try { 48 fr.close(); 49 } catch (IOException e) { 50 51 e.printStackTrace(); 52 } 53 } 54 return null; 55 } 56 57 }
程序运行截图:
获取文章中各个单词个数,并输出前n个最常用单词和所有单词的代码:
1 package text1; 2 3 import java.io.*; 4 import java.util.ArrayList; 5 import java.util.Collections; 6 import java.util.Comparator; 7 import java.util.Date; 8 import java.util.HashMap; 9 import java.util.List; 10 import java.util.Map; 11 import java.util.Scanner; 12 import java.util.Set; 13 import java.util.TreeMap; 14 import java.util.stream.Collectors; 15 16 17 public class Test2 { 18 19 //找到目标文件,创建字符输入流对象, 20 public static Reader findFile(){ 21 File f=new File("D://article//Harry Potter and the Sorcerer's Stone.txt"); 22 Reader in=null; 23 try{ 24 in=new FileReader(f); 25 }catch(IOException e){ 26 e.printStackTrace(); 27 } 28 return in; 29 } 30 //缓存流 31 public static BufferedReader inputPipe(Reader in){ 32 BufferedReader br=null; 33 br=new BufferedReader(in); 34 return br; 35 } 36 //读取文章内容 37 public static String readAll(BufferedReader br,Reader in){ 38 String str; 39 Map<String,Integer> map=new HashMap<>(); 40 StringBuilder words=null; 41 String allwords=null; 42 try { 43 StringBuilder sb = new StringBuilder(); 44 while ((str = br.readLine()) != null) { 45 46 words = sb.append(str); 47 allwords=sb.toString(); 48 } 49 br.close(); 50 in.close(); 51 }catch(IOException e){ 52 e.printStackTrace(); 53 } 54 55 return allwords; 56 } 57 58 public static void spiltAndCount(String allwords, Map<String,Integer> map) { 59 String regex = "\W+"; 60 String[] words = allwords.split(regex);//截获单词,并存放到数组中 61 for (int i = 0; i < words.length; i++) { 62 if (map.containsKey(words[i])) { 63 map.put(words[i], map.get(words[i])+1); 64 } else { 65 map.put(words[i], 1); 66 } 67 } 68 Set<String> keys = map.keySet(); 69 70 System.out.println("总单词数:"+words.length); 71 List<Map.Entry<String, Integer>> list = new ArrayList<Map.Entry<String, Integer>>(map.entrySet()); //转换为list 72 //按照 value值排序 73 Collections.sort(list, new Comparator<Map.Entry<String, Integer>>() { 74 @Override 75 public int compare(Map.Entry<String, Integer> o1, Map.Entry<String, Integer> o2) { 76 return o2.getValue().compareTo(o1.getValue()); 77 } 78 }); 79 int n; 80 n=list.size(); 81 /*System.out.println("你想获取前几个单词?"); 82 83 Scanner cin=new Scanner(System.in); 84 n=cin.nextInt(); 85 */ 86 for (int i = 0; i < n; i++) { 87 System.out.println(list.get(i).getKey() + ": " + list.get(i).getValue()); 88 } 89 90 } 91 92 93 public static void main(String[] args) { 94 long star =System.currentTimeMillis(); 95 96 Map<String,Integer> map=new HashMap<>(); 97 Reader in= Test2.findFile(); 98 BufferedReader br=Test2.inputPipe(in); 99 String allwords= Test2.readAll(br,in); 100 Test2.spiltAndCount(allwords, map); 101 long end=System.currentTimeMillis(); 102 103 } 104 }
程序运行截图: