zoukankan      html  css  js  c++  java
  • Trie tree实践

    1、Trie树

    Trie树即字典树或前缀树,

    2、实践

    代码实践如下:

      1 package cn.edu.buaa.trie;
      2 
      3 import java.util.HashSet;
      4 
      5 /**
      6  * @author zsm
      7  * @date 2016年10月25日 上午11:03:13
      8  * @version 1.0
      9  * @parameter
     10  * @return
     11  */
     12 public class Trie {
     13     private TrieNode trieRoot;
     14     private int treeSize;
     15 
     16     public Trie() {
     17         trieRoot = new TrieNode();
     18         treeSize = 0;
     19     }
     20 
     21     public TrieNode getRoot() {
     22         return trieRoot;
     23     }
     24 
     25     public int getTreeSize() {
     26         return treeSize;
     27     }
     28 
     29     /**
     30      * 添加单词
     31      */
     32     public void addWord(String word, int wordId) {
     33         addWord(trieRoot, word, wordId);
     34     }
     35 
     36     public void addWord(TrieNode root, String word, int wordId) {
     37         // 输入单词为空
     38         if (null == word || word.length() == 0) {
     39             return;
     40         }
     41 
     42         // 确定第一个字符在放在哪个孩子节点中
     43         int k = TrieNode.getCharPosition(word.charAt(0));
     44 
     45         // 该孩子为空,初始化
     46         if (root.childNodes[k] == null) {
     47             root.childNodes[k] = new TrieNode();
     48             treeSize++;
     49             root.childNodes[k].nodeChar = word.charAt(0);
     50         }
     51         // 单词出现在该孩子节点中
     52         root.childNodes[k].wordSet.add(wordId);
     53 
     54         word = word.substring(1);
     55         // 说明是最后一个字符,该词词频数加1
     56         if (word.length() == 0) {
     57             root.childNodes[k].freq++;
     58         } else {
     59             addWord(root.childNodes[k], word, wordId);
     60         }
     61     }
     62 
     63     /**
     64      * 删除单词
     65      */
     66     public void deleteWord(String word, int wordId) {
     67         deleteWord(trieRoot, word, wordId);
     68     }
     69 
     70     enum DELETERES {
     71         FAIL_EMPTYWORLD, FAIL_WORLD_NOT_EXIST, SUCCESS
     72     };
     73 
     74     public DELETERES deleteWord(TrieNode root, String word, int wordId) {
     75         // 输入单词为空
     76         if (null == word || word.length() == 0) {
     77             return DELETERES.FAIL_EMPTYWORLD;
     78         }
     79 
     80         int k = TrieNode.getCharPosition(word.charAt(0));
     81 
     82         // 第一个字符不在树中,说明没有要删除的单词
     83         if (root.childNodes[k] == null) {
     84             return DELETERES.FAIL_WORLD_NOT_EXIST;
     85         }
     86 
     87         // 第一个字符在树中
     88         DELETERES res;
     89         {
     90             word = word.substring(1);
     91             // 找到该单词
     92             if (word.length() == 0 && root.childNodes[k].freq > 0) {
     93                 root.childNodes[k].freq--;
     94                 res = DELETERES.SUCCESS;
     95             } else {
     96                 res = deleteWord(root.childNodes[k], word, wordId);
     97             }
     98 
     99             if (res == DELETERES.SUCCESS) {
    100                 // 从沿途节点移除该单词
    101                 root.childNodes[k].wordSet.remove(wordId);
    102                 // 没单词了,释放节点
    103                 if (root.childNodes[k].wordSet.size() == 0) {
    104                     root.childNodes[k] = null;
    105                     treeSize--;
    106                 }
    107             }
    108             return res;
    109         }
    110     }
    111 
    112     /**
    113      * 修改单词
    114      */
    115     public void updateWord(String newWord, String oldWord, int wordId) {
    116         updateWord(trieRoot, newWord, oldWord, wordId);
    117     }
    118 
    119     public void updateWord(TrieNode root, String newWord, String oldWord, int wordId) {
    120         if (deleteWord(root, oldWord, wordId) == DELETERES.SUCCESS) {
    121             addWord(root, newWord, wordId);
    122         }
    123     }
    124 
    125     /**
    126      * 找以给定单词为前缀的所有单词的id
    127      */
    128     public HashSet<Integer> searchPrefixWord(String word) {
    129         return searchPrefixWord(trieRoot, word);
    130     }
    131 
    132     public HashSet<Integer> searchPrefixWord(TrieNode root, String word) {
    133 
    134         HashSet<Integer> wordSet = new HashSet<Integer>();
    135 
    136         // 输入单词为空
    137         if (null == word || word.length() == 0) {
    138             return wordSet;
    139         }
    140 
    141         int k = TrieNode.getCharPosition(word.charAt(0));
    142         // 单词里某个字符在树种不存在,说明没有该单词
    143         if (root.childNodes[k] == null) {
    144             return wordSet;
    145         }
    146 
    147         word = word.substring(1);
    148 
    149         if (word.length() == 0) {
    150             wordSet = root.childNodes[k].wordSet;
    151         } else {
    152             wordSet = searchPrefixWord(root.childNodes[k], word);
    153         }
    154         return wordSet;
    155     }
    156 
    157     /**
    158      * 统计给定单词出现的次数
    159      */
    160     public int wordCount(String word) {
    161         return wordCount(trieRoot, word);
    162     }
    163 
    164     public int wordCount(TrieNode root, String word) {
    165 
    166         // 输入单词为空
    167         if (null == word || word.length() == 0) {
    168             return 0;
    169         }
    170 
    171         int k = TrieNode.getCharPosition(word.charAt(0));
    172         // 单词里某个字符在树种不存在,说明没有该单词
    173         if (root.childNodes[k] == null) {
    174             return 0;
    175         }
    176 
    177         int count = 0;
    178         word = word.substring(1);
    179 
    180         if (word.length() == 0) {
    181             count = root.childNodes[k].freq;
    182         } else {
    183             count = wordCount(root.childNodes[k], word);
    184         }
    185 
    186         return count;
    187     }
    188 }
    189 
    190 /**
    191  * Trie树的节点<br>
    192  * 假定单词都由26个英文字母组成,Trie树根节点不存字符
    193  */
    194 class TrieNode {
    195     // 孩子节点
    196     public TrieNode[] childNodes;
    197     // 该节点的字符
    198     public char nodeChar;
    199 
    200     // 以该节点为结束的单词的词频
    201     public int freq;
    202     // 包含该节点的单词的id
    203     public HashSet<Integer> wordSet;
    204 
    205     // 初始化
    206     public TrieNode() {
    207         childNodes = new TrieNode[CHILD_NUM];
    208         freq = 0;
    209         wordSet = new HashSet<Integer>();
    210     }
    211 
    212     private static final int CHILD_NUM = 26;
    213 
    214     public static int getCharPosition(char ch) {
    215         return (ch - 'a');
    216     }
    217 }
    View Code

    测试:

     1 package cn.edu.buaa.trie;
     2 
     3 /**
     4  * @author zsm
     5  * @date 2016年10月25日 下午3:12:02
     6  * @version 1.0
     7  * @parameter
     8  * @return
     9  */
    10 public class Main_Trie {
    11 
    12     public static void main(String[] args) {
    13         // TODO Auto-generated method stub
    14         Trie trie = new Trie();
    15         String wd1 = "ab";
    16         String wd2 = "ac";
    17         String wd3 = "acd";
    18 
    19         String wd4 = "add";
    20 
    21         trie.addWord(wd1, 1);
    22         trie.addWord(wd2, 2);
    23         trie.addWord(wd2, 3);
    24         trie.addWord(wd3, 4);
    25 
    26         // wd1,wd2,wd2,wd3
    27         System.out.println(trie.wordCount(wd2));// 2
    28         System.out.println(trie.wordCount(wd3));// 1
    29         System.out.println(trie.getTreeSize());// 4
    30         System.out.println();
    31 
    32         trie.deleteWord(wd3, 4);
    33         // wd1,wd2,wd2
    34         System.out.println(trie.wordCount(wd2));// 2
    35         System.out.println(trie.wordCount(wd3));// 0
    36         System.out.println(trie.getTreeSize());// 3
    37         System.out.println();
    38 
    39         trie.addWord(wd3, 4);
    40         // wd1,wd2,wd2,wd3
    41         System.out.println(trie.wordCount(wd2));// 2
    42         System.out.println(trie.wordCount(wd3));// 1
    43         System.out.println(trie.getTreeSize());// 4
    44         System.out.println();
    45 
    46         trie.deleteWord(wd2, 2);
    47         trie.deleteWord(wd2, 3);
    48         // wd1,wd3
    49         System.out.println(trie.wordCount(wd2));// 0
    50         System.out.println(trie.wordCount(wd3));// 1
    51         System.out.println(trie.getTreeSize());// 4
    52         System.out.println(trie.searchPrefixWord("a"));// [1,4]
    53         System.out.println();
    54 
    55         trie.updateWord(wd3, wd4, 4);
    56         // wd1,wd3
    57         System.out.println(trie.searchPrefixWord("a"));// [1,4]
    58         System.out.println(trie.wordCount(wd2));// 0
    59         System.out.println(trie.wordCount(wd3));// 1
    60         System.out.println(trie.wordCount(wd4));// 0
    61         System.out.println(trie.getTreeSize());// 4
    62         System.out.println();
    63 
    64         trie.updateWord(wd4, wd3, 4);
    65         // wd1,wd4
    66         System.out.println(trie.searchPrefixWord("a"));// [1,4]
    67         System.out.println(trie.wordCount(wd2));// 0
    68         System.out.println(trie.wordCount(wd3));// 0
    69         System.out.println(trie.wordCount(wd4));// 1
    70         System.out.println(trie.getTreeSize());// 4
    71         System.out.println();
    72     }
    73 }
    View Code

    3、参考资料

    http://www.cnblogs.com/huangxincheng/archive/2012/11/25/2788268.html

  • 相关阅读:
    工厂方法和抽象工厂
    waterMarkTextBox
    button hot key 热键
    wpf 双击行。。获得行信息
    update comboBox
    WPF标准控件模板查看程序(文件里面)
    Sp EF输出 临时表
    tree view
    Ubuntu 常用命令
    ESP8266 开发记录
  • 原文地址:https://www.cnblogs.com/z-sm/p/5997286.html
Copyright © 2011-2022 走看看