zoukankan      html  css  js  c++  java
  • 字典树 找出单词文件中出现次数前十的单词

    import java.io.*;
    import java.util.*;
    
    /**
     * 不区分大小写
     */
    class TrieNode{
        TrieNode[] next = new TrieNode[26]; // 只有小写字母的字典树    0存放a   1存放b ...   25存放z
        int count = 0;//字母出现的次数
        int wordCount = 0;// 单词出现的次数
        char currentChar;
        public TrieNode(char word) {
            this.currentChar = word;
        }
    }
    
    public class Main {
    
        public static void main(String[] args) throws IOException{
            //读取文件(字符流)
            BufferedReader in = new BufferedReader(new InputStreamReader(new FileInputStream("E:\word.txt"),"GBK"));// 这里主要是涉及中文
            //读取数据
            //循环取出数据
            String str = null;
            TrieNode root = new TrieNode('0'); // 初始化根节点
            while ((str = in.readLine()) != null) {
                insert(root,str);
            }
    
    
            // 查找单词出现的次数
           // int count = query(root, "world");
           // System.out.println(count);
            // 查找单词出现的次数最多的10个
            Map<String,Integer> map = new TreeMap<String, Integer>();// 默认按照key 升序排序
            in = new BufferedReader(new InputStreamReader(new FileInputStream("E:\word.txt"),"GBK"));
            while ((str = in.readLine()) != null) {
                int query = query(root, str);
                map.put(str,query);
            }
            // 按值排序  降序排序
            List<Map.Entry<String, Integer>> list = new ArrayList<Map.Entry<String, Integer>>(map.entrySet());
            Collections.sort(list, new Comparator<Map.Entry<String, Integer>>() {
                @Override
                public int compare(Map.Entry<String, Integer> o1, Map.Entry<String, Integer> o2) {
                    return -o1.getValue().compareTo(o2.getValue());
                }
            });
            int i=0;
            for (Map.Entry<String, Integer> mapping : list) {
                if(i++ == 10) {
                    break;
                }
                System.out.println("单词:" + mapping.getKey() + " 出现:" + mapping.getValue());
            }
    
            //关闭流
            in.close();
    
    
        }
    
        public static  void insert(TrieNode root,String str) {
            str = str.toLowerCase();
            char[] chars = str.toCharArray();
            for(int i=0;i<chars.length;i++) {
                int index = chars[i]-'a';
                TrieNode[] next = root.next;
                if(next[index] == null) {// 节点为空
                   // System.out.print("插入"+ chars[i] +";");
                    next[index] = new TrieNode(chars[i]);
                }
                next[index].count++;
                root = next[index];
            }
            root.wordCount++;
           // System.out.println();
        }
    
        //查询字符串是否存在,不存在返回0 ,存在返回个数
        public static int query(TrieNode root,String str) {
            if(root == null) {
                return 0;
            }
            str = str.toLowerCase();
            char[] chars = str.toCharArray();
            for(int i=0;i<chars.length;i++) {
                TrieNode[] next = root.next;
                int index = chars[i]-'a';
                if(next[index] == null) {
                    return 0;
                }else {
                    root = next[index];
                }
            }
            return root.wordCount;
        }
    
    
    }
    

      

  • 相关阅读:
    什么是内卷?
    iphone与PC端如何传BUG截图
    java应用服务占用cpu过高,如何优化
    性能测试常见问题FAQ
    性能测试工程师能力进阶三部曲
    jmeter分布式压测试部署
    了解token及分类
    常见端口号及其服务
    2714
    python
  • 原文地址:https://www.cnblogs.com/lick468/p/11580788.html
Copyright © 2011-2022 走看看