zoukankan      html  css  js  c++  java
  • 获取文章中的字母个数 和单词个数

    获取文章中各个字母个数的代码 

    1 package text1; 
    2 import java.io.File; 
    3 import java.io.FileReader;
     
    4 import java.io.IOException;
     
    5 import  java.text.DecimalFormat;

    6
    public class Text{ 7
    8 public static void main(String[] args) { 9 double num=0; 10 int[] freArray = new int[52]; 11 char[] charArray = loadCharArrayFromFileName("D://article//Harry Potter and the Sorcerer's Stone.txt"); 12 computeFrequency(freArray, charArray); 13 for(int i = 0; i < freArray.length; i++) { 14 num+=freArray[i]; 15 } 16 System.out.println(num); 17 DecimalFormat df = new DecimalFormat("######0.00"); 18 for (int i = 0; i < freArray.length/2; i++) { 19 System.out.println(((char) ('a' + i)) + ":" + freArray[i]+" "+df.format((100*freArray[i])/num)+"%"); 20 } 21 for (int i = 26; i < freArray.length; i++) { 22 System.out.println(((char) ('A' + i-26)) + ":" + freArray[i]+" "+df.format((100*freArray[i])/num)+"%"); 23 24 } 25 }26 public static void computeFrequency(int[] freArray, char[] charArray) { 27 for (int i = 0; i < charArray.length; i++) { 28 if (charArray[i] >= 'A' && charArray[i] <= 'Z') { 29 freArray[charArray[i] - 'A'+26]++; 30 } 31 if (charArray[i] >= 'a' && charArray[i] <= 'z') { 32 freArray[charArray[i] - 'a']++; 33 } 34 } 35 } 36 public static char[] loadCharArrayFromFileName(String name) { 37 char[] charArray = new char[5000000]; 38 File file = new File(name); 39 FileReader fr = null; 40 try { 41 fr = new FileReader(file); 42 fr.read(charArray); 43 return charArray; 44 }
    catch (IOException e) {
    45 e.printStackTrace(); 46 } finally { 47 try { 48 fr.close(); 49 } catch (IOException e) { 50 51 e.printStackTrace(); 52 } 53 } 54 return null; 55 } 56 57 }

    程序运行截图:

    获取文章中各个单词个数,并输出前n个最常用单词和所有单词的代码:

      1 package text1;
      2 
      3 import java.io.*;
      4 import java.util.ArrayList;
      5 import java.util.Collections;
      6 import java.util.Comparator;
      7 import java.util.Date;
      8 import java.util.HashMap;
      9 import java.util.List;
     10 import java.util.Map;
     11 import java.util.Scanner;
     12 import java.util.Set;
     13 import java.util.TreeMap;
     14 import java.util.stream.Collectors;
     15 
     16 
     17 public class Test2 {
     18 
     19 //找到目标文件,创建字符输入流对象,
     20 public static Reader findFile(){
     21    File f=new File("D://article//Harry Potter and the Sorcerer's Stone.txt");
     22    Reader in=null;
     23    try{
     24       in=new FileReader(f);
     25    }catch(IOException e){
     26       e.printStackTrace();
     27    }
     28    return in;
     29 }
     30 //缓存流
     31 public static BufferedReader inputPipe(Reader in){
     32    BufferedReader br=null;
     33    br=new BufferedReader(in);
     34    return br;
     35 }
     36 //读取文章内容
     37 public static String readAll(BufferedReader br,Reader in){
     38    String str;
     39    Map<String,Integer> map=new HashMap<>();
     40    StringBuilder words=null;
     41    String allwords=null;
     42    try {
     43       StringBuilder sb = new StringBuilder();
     44    while ((str = br.readLine()) != null) {
     45 
     46       words = sb.append(str);
     47       allwords=sb.toString();
     48    }
     49    br.close();
     50    in.close();
     51    }catch(IOException e){
     52       e.printStackTrace();
     53    }
     54 
     55    return allwords;
     56 }
     57 
     58 public static void spiltAndCount(String allwords, Map<String,Integer> map) {
     59    String regex = "\W+";
     60    String[] words = allwords.split(regex);//截获单词,并存放到数组中
     61    for (int i = 0; i < words.length; i++) {
     62       if (map.containsKey(words[i])) {
     63       map.put(words[i], map.get(words[i])+1);
     64    } else {
     65       map.put(words[i], 1);
     66    }
     67    }
     68    Set<String> keys = map.keySet();
     69 
     70    System.out.println("总单词数:"+words.length);
     71    List<Map.Entry<String, Integer>> list = new ArrayList<Map.Entry<String, Integer>>(map.entrySet()); //转换为list
     72    //按照 value值排序
     73    Collections.sort(list, new Comparator<Map.Entry<String, Integer>>() {
     74    @Override
     75    public int compare(Map.Entry<String, Integer> o1, Map.Entry<String, Integer> o2) {
     76       return o2.getValue().compareTo(o1.getValue());
     77    }
     78    });
     79    int n;
     80    n=list.size();
     81    /*System.out.println("你想获取前几个单词?");
     82 
     83    Scanner cin=new Scanner(System.in);
     84    n=cin.nextInt();
     85    */
     86    for (int i = 0; i < n; i++) {
     87       System.out.println(list.get(i).getKey() + ": " + list.get(i).getValue());
     88    } 
     89 
     90 }
     91 
     92 
     93 public static void main(String[] args) {
     94    long star =System.currentTimeMillis();
     95 
     96    Map<String,Integer> map=new HashMap<>();
     97    Reader in= Test2.findFile();
     98    BufferedReader br=Test2.inputPipe(in);
     99    String allwords= Test2.readAll(br,in);
    100    Test2.spiltAndCount(allwords, map);
    101    long end=System.currentTimeMillis();
    102 
    103 }
    104 }

      程序运行截图:

  • 相关阅读:
    java程序高CPU,如何直接定位(linux系统下命令行操作)
    HashMap源码解读(jdk1.8)
    mysql索引原理
    redis常见问题
    Bitmap算法
    list遍历时删除的坑
    接口和抽象类有什么区别?各在什么场景下使用?
    记录BigInteger犯过的一个错误
    CentOS7切换源
    map遍历性能记录
  • 原文地址:https://www.cnblogs.com/wendi/p/11805178.html
Copyright © 2011-2022 走看看