#1014 : Trie树
在20%的数据中n, m<=10,词典的字母表大小<=2.
在60%的数据中n, m<=1000,词典的字母表大小<=5.
在100%的数据中n, m<=100000,词典的字母表大小<=26.
- 样例输入
5 babaab babbbaaaa abba aaaaabaa babaababb 5 babb baabaaa bab bb bbabbaab
- 样例输出
1 0 3 0 0
注意代码中注释掉的部分是一开始我迷惑的地方,new之后 数组是否 == null。import java.io.BufferedReader; import java.io.IOException; import java.io.InputStreamReader; public class TrieTree { public static void main(String []args){ /* TreeNode root = new TreeNode(); if (root.child == null) { System.out.println("null");//不是null,此处不输出 } if (root.child[1] == null) { System.out.println("null null ");//是null,此处输出 }*/ InputStreamReader is = new InputStreamReader(System.in); BufferedReader br = new BufferedReader(is); String s = ""; TreeNode root = new TreeNode(); try{ while((s = br.readLine()) != null){ int n = Integer.parseInt(s); for (int i = 0;i < n;i++) { TreeNode p = root; String s1 = br.readLine(); for (int k = 0;k < s1.length();k++) { int temp = s1.charAt(k)-'a'; if (p.child[temp] == null) { p.child[temp] = new TreeNode(); } p.child[temp].num++; p = p.child[temp]; } } int m = Integer.parseInt(br.readLine()); for (int i = 0;i < m;i++) { String s1 = br.readLine(); TreeNode p = root; int ret = 0; for (int k = 0;k < s1.length();k++) { int temp = s1.charAt(k)-'a'; if (p.child[temp] == null ) { ret = 0; break; } else { ret = p.child[temp].num; p = p.child[temp]; } } System.out.println(ret); } } } catch(IOException e){ e.printStackTrace(); } } } class TreeNode { int num; TreeNode child[] = new TreeNode[26]; TreeNode() { num = 0; } }
package Trie树及其应用; /* * TrieTree的建立 */ import java.util.HashMap; public class TrieTreeNode { private HashMap<Character, TrieTreeNode> children = new HashMap<Character, TrieTreeNode>(); //单纯建立 public void insert(String word){ if (word == null || word.length() == 0) { return ; } char first = word.charAt(0); if (!children.containsKey(first)) { children.put(first, new TrieTreeNode()); } String next = word.substring(1); children.get(first).insert(next); } //检查是否含有前缀 private boolean wordEnd = false; public boolean insertAndJudgePrefix(String word){ if (word == null || word.length() == 0) { return false; } char first = word.charAt(0); if (children.containsKey(first)) { if (children.get(first).wordEnd) { return true; } String next = word.substring(1); if (next.isEmpty()) { children.get(first).wordEnd = true; return true; } } else { children.put(first, new TrieTreeNode()); } String next = word.substring(1); if (next.isEmpty()) { children.get(first).wordEnd = true; return false; } return children.get(first).insertAndJudgePrefix(next); } //用于统计前缀在字典中的次数 private int count = 0; public void insertAndCount(String word){ if (word == null || word.length() == 0) { return ; } char first = word.charAt(0); if (!children.containsKey(first)) { children.put(first, new TrieTreeNode()); } children.get(first).count++; String next = word.substring(1); children.get(first).insertAndCount(next); } public int getCount(String pre) { char first = pre.charAt(0); if (!children.containsKey(first)) { return 0; } if (pre.substring(1).isEmpty()) return children.get(first).count; return children.get(first).getCount(pre.substring(1)); } }
2016/12/08 做了leetcode 上面的题,再次更新
1.用数组实现(递归),比较简单而且更优化 wordEnd = true表示一个单词的结束。当一个单词结束时,这条边对应的子节点wordEnd = true
1 class TrieNode { 2 // Initialize your data structure here. 3 private TrieNode[] children = new TrieNode[26]; 4 boolean wordEnd = false; 5 public TrieNode() { 6 7 } 8 // Inserts a word into the trie. 9 public void insert(String word, int index) { 10 if (index == word.length()) { 11 wordEnd = true; 12 return; 13 } 14 char cur = word.charAt(index); 15 if (children[cur - 'a'] == null) { 16 children[cur - 'a'] = new TrieNode(); 17 } 18 children[cur - 'a'].insert(word, index + 1); 19 } 20 public boolean search(String word, int index) { 21 if (index == word.length()) { 22 return wordEnd; 23 } 24 char cur = word.charAt(index); 25 if (children[cur - 'a'] == null) { 26 return false; 27 } 28 return children[cur - 'a'].search(word, index + 1); 29 } 30 // Returns if there is any word in the trie 31 // that starts with the given prefix. 32 public boolean startsWith(String word, int index) { 33 if (index == word.length()) { 34 return true; 35 } 36 char cur = word.charAt(index); 37 if (children[cur - 'a'] == null) { 38 return false; 39 } 40 return children[cur - 'a'].startsWith(word, index + 1); 41 } 42 } 43 44 public class Trie { 45 private TrieNode root; 46 47 public Trie() { 48 root = new TrieNode(); 49 } 50 51 // Inserts a word into the trie. 52 public void insert(String word) { 53 if (word == null || word.length() == 0) { 54 return; 55 } 56 root.insert(word, 0); 57 } 58 59 // Returns if the word is in the trie. 60 public boolean search(String word) { 61 if (word == null || word.length() == 0) { 62 return false; 63 } 64 return root.search(word, 0); 65 } 66 67 // Returns if there is any word in the trie 68 // that starts with the given prefix. 69 public boolean startsWith(String prefix) { 70 if (prefix == null || prefix.length() == 0) { 71 return false; 72 } 73 return root.startsWith(prefix, 0); 74 } 75 } 76 77 // Your Trie object will be instantiated and called as such: 78 // Trie trie = new Trie(); 79 // trie.insert("somestring"); 80 // trie.search("key");
1 class TrieNode { 2 // Initialize your data structure here. 3 Map<Character, TrieNode> children = null; 4 boolean wordEnd = false; 5 public TrieNode() { 6 children = new HashMap<>(); 7 } 8 public void insert(String word) { 9 if (word.length() == 0) { 10 wordEnd = true; 11 return; 12 } 13 char first = word.charAt(0); 14 if (!children.containsKey(first)) { 15 children.put(first, new TrieNode()); 16 } 17 String next = word.substring(1); 18 children.get(first).insert(next); 19 } 20 // Returns if the word is in the trie. 21 public boolean search(String word) { 22 if (word.length() == 0) { 23 return wordEnd; 24 } 25 char first = word.charAt(0); 26 if (!children.containsKey(first)) { 27 return false; 28 } else { 29 String next = word.substring(1); 30 return children.get(first).search(next); 31 } 32 } 33 // Returns if there is any word in the trie 34 // that starts with the given prefix. 35 public boolean startsWith(String word) { 36 if (word.length() == 0) { 37 return true; 38 } 39 char first = word.charAt(0); 40 if (!children.containsKey(first)) { 41 return false; 42 } else { 43 String next = word.substring(1); 44 return children.get(first).startsWith(next); 45 } 46 } 47 } 48 49 public class Trie { 50 private TrieNode root; 51 52 public Trie() { 53 root = new TrieNode(); 54 } 55 56 // Inserts a word into the trie. 57 public void insert(String word) { 58 if (word == null || word.length() == 0) { 59 return; 60 } 61 root.insert(word); 62 } 63 64 // Returns if the word is in the trie. 65 public boolean search(String word) { 66 if (word == null || word.length() == 0) { 67 return false; 68 } 69 return root.search(word); 70 } 71 72 // Returns if there is any word in the trie 73 // that starts with the given prefix. 74 public boolean startsWith(String prefix) { 75 if (prefix == null || prefix.length() == 0) { 76 return false; 77 } 78 return root.startsWith(prefix); 79 } 80 } 81 82 // Your Trie object will be instantiated and called as such: 83 // Trie trie = new Trie(); 84 // trie.insert("somestring"); 85 // trie.search("key");
1. 给定一组单词,判断其中是否存在一个单词是另一个的前缀
package Trie树及其应用; /* * 题意:给定一组单词,各不相同,判断是否存在一个单词是另一个单词的前缀 存在返回true,反之 返回false * 约定 null 或空字符串是任意字符串的前缀 * 两个判断点 * 1. 一直在别的单词上走,没有建立自己的节点 * 2. 走完了一个完整的单词才建立自己的节点 */ public class PrefixJudge { public boolean hasPrefix(String[] words) { if (words == null || words.length == 0) { return false; } TrieTreeNode root = new TrieTreeNode(); for (String word : words) { if (word == null || word.equals("")) { return true; } if (root.insertAndJudgePrefix(word)) return true; } return false; } public static void main(String[] args) { PrefixJudge p = new PrefixJudge(); String[] words = {"abcde","bcd","abf","bcd"}; p.hasPrefix(words); } }
package Trie树及其应用; import java.io.BufferedReader; import java.io.IOException; import java.io.InputStreamReader; /* * 题意 :返回字典中每个前缀出现的次数 */ /* * solution : * 在构建字典时,给每个节点用count记录一下以该节点结束的前缀的单词数,每经过一个节点就给该值count加一 * 在检查前缀出现的次数时,遍历到前缀末尾返回末尾节点的count值即可 */ public class HiHo1014 { public static void main(String []args){ InputStreamReader is = new InputStreamReader(System.in); BufferedReader br = new BufferedReader(is); String s = ""; TrieTreeNode root = new TrieTreeNode(); try{ while((s = br.readLine()) != null){ int n = Integer.parseInt(s); for (int i = 0;i < n;i++) { String s1 = br.readLine(); root.insertAndCount(s1); } int m = Integer.parseInt(br.readLine()); for (int i = 0;i < m;i++) { String s1 = br.readLine(); if (s1 == null || s1.length() == 0) System.out.println(0); else System.out.println(root.getCount(s1)); } } } catch(IOException e){ e.printStackTrace(); } } }
原题 :搜索引擎会通过日志文件把用户每次检索使用的所有检索串都记录下来,每个查询串的长度为1-255字节。假设目前有一千万个记录,这些查询串的重复读比较高,虽然总数是1千万,但是如果去除重复和,不超过3百万个。一个查询串的重复度越高,说明查询它的用户越多,也就越热门。请你统计最热门的10个查询串,要求使用的内存不能超过1G。
提示 :利用trie树,关键字域存该查询串出现的次数,没有出现为0。最后用10个元素的最小推来对出现频率进行排序。
提示 :用trie树统计每个词出现的次数,时间复杂度是O(n*le)(le表示单词的平均长度),然后是找出出现最频繁的前10个词。当然,也可以用堆来实现,时间复杂度是O(n*lg10)。所以总的时间复杂度,是O(n*le)与O(n*lg10)中较大的哪一个。