zoukankan      html  css  js  c++  java
  • 读取JDK API文档,并根据单词出现频率排序

    1,拿到 API 文档

    登录 https://docs.oracle.com/javase/8/docs/api/

    选中特定的类,然后 copy 其中的内容,

    放入 TXT 文件中 ,

    2,读取TXT内容,并排序

    package com.lgx.test;
    
    import java.io.BufferedReader;
    import java.io.File;
    import java.io.FileNotFoundException;
    import java.io.FileReader;
    import java.io.IOException;
    import java.util.ArrayList;
    import java.util.Collections;
    import java.util.Comparator;
    import java.util.List;
    import java.util.Map;
    import java.util.Map.Entry;
    import java.util.Set;
    import java.util.TreeMap;
    
    public class ReadVectorAPI {
    
    	public static String filename = "src/Vector.txt";
    	public static StringBuffer sb = null;
    
    	public static void main(String[] args) {
    		try {
    			//根据单词字母排序
    			countWordOrderByWord();
    			//根据单词频率排序
    			countWordOrderByCount();
    		} catch (FileNotFoundException e) {
    			e.printStackTrace();
    		}
    	}
    
    	// 计算单词的出现次数,依据单词排序
    	public static void countWordOrderByWord() throws FileNotFoundException {
    		BufferedReader br = null;
    		try {
    			br = new BufferedReader(new FileReader(new File(filename)));
    			String inputLine = null;
    			TreeMap<String, Integer> map = new TreeMap<String, Integer>();
    			while ((inputLine = br.readLine()) != null) {
    				String[] words = inputLine.split("[ 
    	
    .,;:!?(){}]");
    				for (int i = 0; i < words.length; i++) {
    					String key = words[i].toLowerCase();
    					if (key.length() > 0) {
    						if (map.get(key) == null) {
    							map.put(key, 1);
    						} else {
    							int times = map.get(key).intValue();
    							times++;
    							map.put(key, times);
    						}
    					}
    				}
    			}
    			Set<Map.Entry<String, Integer>> entrySet = map.entrySet();
    			System.out.println("=====根据单词字母排序=====");
    			for (Map.Entry<String, Integer> entry : entrySet) {
    				System.out.println(entry.getKey() + " 在API文档中出现了 " + entry.getValue() + " 次");
    			}
    		} catch (IOException e) {
    			e.printStackTrace();
    		}
    	}
    
    	// 计算单词的出现次数,依据次数排序
    	public static void countWordOrderByCount() throws FileNotFoundException {
    		BufferedReader br = null;
    		try {
    			br = new BufferedReader(new FileReader(new File(filename)));
    			String inputLine = null;
    			TreeMap<String, Integer> map = new TreeMap<String, Integer>();
    			while ((inputLine = br.readLine()) != null) {
    				String[] words = inputLine.split("[ 
    	
    .,;:!?(){}]");
    				for (int i = 0; i < words.length; i++) {
    					String key = words[i].toLowerCase();
    					if (key.length() > 0) {
    						if (map.get(key) == null) {
    							map.put(key, 1);
    						} else {
    							int times = map.get(key).intValue();
    							times++;
    							map.put(key, times);
    						}
    					}
    				}
    			}
    
    			List<Map.Entry<String, Integer>> list = new ArrayList<Map.Entry<String, Integer>>(map.entrySet());
    			Collections.sort(list, new Comparator<Map.Entry<String, Integer>>() {
    				public int compare(Entry<String, Integer> o1, Entry<String, Integer> o2) {
    					return o2.getValue().compareTo(o1.getValue());
    				}
    			});
    			System.out.println("=====根据单词频率排序=====");
    			for (Map.Entry<String, Integer> mapping : list) {
    				System.out.println(mapping.getKey() + " 在API文档中出现了 " + mapping.getValue() + " 次");
    			}
    		} catch (IOException e) {
    			e.printStackTrace();
    		}
    	}
    }
    

    输出结果前部分截图为;

  • 相关阅读:
    弗尤博客(二)
    弗尤博客(一)
    第一系列完
    C# 关闭子窗体释放子窗体对象问题
    C#设置IE代理
    C# 计算位置居中
    C# 绘图
    From传值
    pictureBox绑定Base64字符串
    C# 绘制圆角矩形
  • 原文地址:https://www.cnblogs.com/lgx211/p/10181737.html
Copyright © 2011-2022 走看看