zoukankan      html  css  js  c++  java
  • DoubleArrayTrie

    /**
     * DoubleArrayTrie: Java implementation of Darts (Double-ARray Trie System)
     * 
     * <p>
     * Copyright(C) 2001-2007 Taku Kudo &lt;taku@chasen.org&gt;<br />
     * Copyright(C) 2009 MURAWAKI Yugo &lt;murawaki@nlp.kuee.kyoto-u.ac.jp&gt;
     * Copyright(C) 2012 KOMIYA Atsushi &lt;komiya.atsushi@gmail.com&gt;
     * </p>
     * 
     * <p>
     * The contents of this file may be used under the terms of either of the GNU
     * Lesser General Public License Version 2.1 or later (the "LGPL"), or the BSD
     * License (the "BSD").
     * </p>
     */
    import java.io.BufferedInputStream;
    import java.io.BufferedOutputStream;
    import java.io.DataInputStream;
    import java.io.DataOutputStream;
    import java.io.File;
    import java.io.FileInputStream;
    import java.io.FileOutputStream;
    import java.io.IOException;
    import java.io.InputStream;
    import java.util.ArrayList;
    import java.util.List;
    
    public class DoubleArrayTrie {
        private final static int BUF_SIZE  = 16384;
        private final static int UNIT_SIZE = 8;    // size of int + int
    
        public static class Node {
            int code;
            int depth;
            int left;
            int right;
        };
    
        private int          check[];
        private int          base[];
    
        private boolean      used[];
        private int          size;
        private int          allocSize;
        private List<String> key;
        private int          keySize;
        private int          length[];
        private int          value[];
        private int          progress;
        private int          nextCheckPos;
        private Node[]       nodes;
        // boolean no_delete_;
        int                  error_;
    
        // int (*progressfunc_) (size_t, size_t);
    
        // inline _resize expanded
        private int resize(int newSize) {
            int[] base2 = new int[newSize];
            int[] check2 = new int[newSize];
            boolean used2[] = new boolean[newSize];
            Node[] nodes2 = new Node[newSize];
            if (allocSize > 0) {
                System.arraycopy(base, 0, base2, 0, allocSize);
                System.arraycopy(check, 0, check2, 0, allocSize);
                System.arraycopy(used, 0, used2, 0, allocSize);
                System.arraycopy(nodes, 0, nodes2, 0, allocSize);
            }
    
            base = base2;
            check = check2;
            used = used2;
            nodes = nodes2;
    
            return allocSize = newSize;
        }
    
        private int fetch(Node parent, List<Node> siblings) {
            if (error_ < 0)
                return 0;
    
            int prev = 0;
    
            for (int i = parent.left; i < parent.right; i++) {
                if ((length != null ? length[i] : key.get(i).length()) < parent.depth)
                    continue;
    
                String tmp = key.get(i);
    
                int cur = 0;
                if ((length != null ? length[i] : tmp.length()) != parent.depth)
                    cur = (int) tmp.charAt(parent.depth) + 1;
    
                if (prev > cur) {
                    error_ = -3;
                    return 0;
                }
    
                if (cur != prev || siblings.size() == 0) {
                    Node tmp_node = new Node();
                    tmp_node.depth = parent.depth + 1;
                    tmp_node.code = cur;
                    tmp_node.left = i;
                    if (siblings.size() != 0)
                        siblings.get(siblings.size() - 1).right = i;
    
                    siblings.add(tmp_node);
                }
    
                prev = cur;
            }
    
            if (siblings.size() != 0)
                siblings.get(siblings.size() - 1).right = parent.right;
    
            return siblings.size();
        }
    
        private int insert(List<Node> siblings) {
            if (error_ < 0)
                return 0;
    
            int begin = 0;
            int pos = ((siblings.get(0).code + 1 > nextCheckPos) ? siblings.get(0).code + 1
                : nextCheckPos) - 1;
            int nonzero_num = 0;
            int first = 0;
    
            if (allocSize <= pos)
                resize(pos + 1);
    
            outer: while (true) {
                pos++;
    
                if (allocSize <= pos)
                    resize(pos + 1);
    
                if (check[pos] != 0) {
                    nonzero_num++;
                    continue;
                } else if (first == 0) {
                    nextCheckPos = pos;
                    first = 1;
                }
    
                begin = pos - siblings.get(0).code;
                if (allocSize <= (begin + siblings.get(siblings.size() - 1).code)) {
                    // progress can be zero
                    double l = (1.05 > 1.0 * keySize / (progress + 1)) ? 1.05
                        : 1.0 * keySize / (progress + 1);
                    resize((int) (allocSize * l));
                }
    
                if (used[begin])
                    continue;
    
                for (int i = 1; i < siblings.size(); i++)
                    if (check[begin + siblings.get(i).code] != 0)
                        continue outer;
    
                break;
            }
    
            // -- Simple heuristics --
            // if the percentage of non-empty contents in check between the
            // index
            // 'next_check_pos' and 'check' is greater than some constant value
            // (e.g. 0.9),
            // new 'next_check_pos' index is written by 'check'.
            if (1.0 * nonzero_num / (pos - nextCheckPos + 1) >= 0.95)
                nextCheckPos = pos;
    
            used[begin] = true;
            size = (size > begin + siblings.get(siblings.size() - 1).code + 1) ? size
                : begin + siblings.get(siblings.size() - 1).code + 1;
    
            for (int i = 0; i < siblings.size(); i++)
                check[begin + siblings.get(i).code] = begin;
    
            for (int i = 0; i < siblings.size(); i++) {
                List<Node> new_siblings = new ArrayList<Node>();
    
                if (fetch(siblings.get(i), new_siblings) == 0) {
                    base[begin + siblings.get(i).code] = (value != null)
                        ? (-value[siblings.get(i).left] - 1)
                        : (-siblings.get(i).left - 1);
    
                    if (value != null && (-value[siblings.get(i).left] - 1) >= 0) {
                        error_ = -2;
                        return 0;
                    }
    
                    progress++;
                    // if (progress_func_) (*progress_func_) (progress,
                    // keySize);
                } else {
                    int h = insert(new_siblings);
                    base[begin + siblings.get(i).code] = h;
                    nodes[begin + siblings.get(i).code] = siblings.get(i);
                }
            }
            return begin;
        }
    
        public DoubleArrayTrie() {
            check = null;
            base = null;
            used = null;
            size = 0;
            allocSize = 0;
            // no_delete_ = false;
            error_ = 0;
        }
    
        // no deconstructor
    
        // set_result omitted
        // the search methods returns (the list of) the value(s) instead
        // of (the list of) the pair(s) of value(s) and length(s)
    
        // set_array omitted
        // array omitted
    
        void clear() {
            // if (! no_delete_)
            check = null;
            base = null;
            used = null;
            allocSize = 0;
            size = 0;
            // no_delete_ = false;
        }
    
        public int getUnitSize() {
            return UNIT_SIZE;
        }
    
        public int getSize() {
            return size;
        }
    
        public int getTotalSize() {
            return size * UNIT_SIZE;
        }
    
        public int getNonzeroSize() {
            int result = 0;
            for (int i = 0; i < size; i++)
                if (check[i] != 0)
                    result++;
            return result;
        }
    
        public int build(List<String> key) {
            int _keySize = key.size();
            int _length[] = new int[_keySize];
            for (int i = 0; i < _keySize; i++) {
                _length[i] = key.get(i).length();
            }
            return build(key, _length, null, _keySize);
        }
    
        public int build(List<String> _key, int _length[], int _value[], int _keySize) {
            if (_keySize > _key.size() || _key == null)
                return 0;
    
            // progress_func_ = progress_func;
            key = _key;
            length = _length;
            keySize = _keySize;
            value = _value;
            progress = 0;
    
            resize(65536 * 32);
    
            base[0] = 1;
            nextCheckPos = 0;
    
            Node root_node = new Node();
            root_node.left = 0;
            root_node.right = keySize;
            root_node.depth = 0;
    
            List<Node> siblings = new ArrayList<Node>();
            fetch(root_node, siblings);
            insert(siblings);
    
            // size += (1 << 8 * 2) + 1; // ???
            // if (size >= allocSize) resize (size);
    
            used = null;
            key = null;
    
            return error_;
        }
    
        public void open(String fileName) throws IOException {
            open(fileName, true);
        }
    
        public void open(InputStream inputStream, Boolean saveLen) throws IOException {
            DataInputStream is = null;
            try {
                is = new DataInputStream(new BufferedInputStream(inputStream, BUF_SIZE));
                size = is.readInt();
                check = new int[size];
                base = new int[size];
                for (int i = 0; i < size; i++) {
                    base[i] = is.readInt();
                    check[i] = is.readInt();
                }
                if (saveLen) {
                    keySize = is.readInt();
                    length = new int[keySize];
                    for (int i = 0; i < keySize; i++) {
                        length[i] = is.readInt();
                    }
                }
            } finally {
                if (is != null)
                    is.close();
            }
        }
    
        public void open(String fileName, Boolean saveLen) throws IOException {
            File file = new File(fileName);
            //size = (int) file.length() / UNIT_SIZE;
    
            DataInputStream is = null;
            try {
                is = new DataInputStream(new BufferedInputStream(new FileInputStream(file), BUF_SIZE));
                size = is.readInt();
                check = new int[size];
                base = new int[size];
                for (int i = 0; i < size; i++) {
                    base[i] = is.readInt();
                    check[i] = is.readInt();
                }
                if (saveLen) {
                    keySize = is.readInt();
                    length = new int[keySize];
                    for (int i = 0; i < keySize; i++) {
                        length[i] = is.readInt();
                    }
                }
            } finally {
                if (is != null)
                    is.close();
            }
        }
    
        public void save(String fileName) throws IOException {
            save(fileName, true);
        }
    
        public void save(String fileName, Boolean saveLen) throws IOException {
            DataOutputStream out = null;
            try {
                out = new DataOutputStream(new BufferedOutputStream(new FileOutputStream(fileName)));
                out.writeInt(size);
                for (int i = 0; i < size; i++) {
                    out.writeInt(base[i]);
                    out.writeInt(check[i]);
                }
                if (saveLen) {
                    out.writeInt(keySize);
                    for (int i = 0; i < keySize; i++) {
                        out.writeInt(length[i]);
                    }
                }
                out.close();
            } finally {
                if (out != null)
                    out.close();
            }
        }
    
        /**
         * 获取命中词的长度
         * @param index
         * @return
         */
        public int getWordLen(int index) {
            return length[index];
        }
    
        public int exactMatchSearch(String key) {
            return exactMatchSearch(key, 0, 0, 0);
        }
    
        public int exactMatchSearch(String key, int pos, int len, int nodePos) {
            if (len <= 0)
                len = key.length();
            if (nodePos <= 0)
                nodePos = 0;
    
            int result = -1;
    
            char[] keyChars = key.toCharArray();
    
            int b = base[nodePos];
            int p;
    
            for (int i = pos; i < len; i++) {
                p = b + (int) (keyChars[i]) + 1;
                if (b == check[p])
                    b = base[p];
                else
                    return result;
            }
    
            p = b;
            int n = base[p];
            if (b == check[p] && n < 0) {
                result = -n - 1;
            }
            return result;
        }
    
        public List<Integer> commonPrefixSearch(String key) {
            return commonPrefixSearch(key, 0, 0, 0);
        }
    
        public List<Integer> commonPrefixSearch(String key, int pos, int len, int nodePos) {
            if (len <= 0)
                len = key.length();
            if (nodePos <= 0)
                nodePos = 0;
    
            List<Integer> result = new ArrayList<Integer>();
    
            char[] keyChars = key.toCharArray();
    
            int b = base[nodePos];
            int n;
            int p;
    
            for (int i = pos; i < len; i++) {
                p = b;
                n = base[p];
    
                if (b == check[p] && n < 0) {
                    result.add(-n - 1);
                }
    
                p = b + (int) (keyChars[i]) + 1;
                if (p < size && b == check[p])
                    b = base[p];
                else
                    return result;
            }
    
            p = b;
            n = base[p];
    
            if (b == check[p] && n < 0) {
                result.add(-n - 1);
            }
    
            return result;
        }
    
        // 部分匹配,例如字典为成都市,待匹配词为成都,则也算匹配上,匹配失败返回-1,匹配到词典结束返回0
        public Integer partMatchSearch(String key, int pos, int len, int nodePos) {
            if (len <= 0)
                len = key.length();
            if (nodePos <= 0)
                nodePos = 0;
    
            int result = -1;
    
            char[] keyChars = key.toCharArray();
    
            int b = base[nodePos];
            int p = -1;
    
            for (int i = pos; i < len; i++) {
                p = b + (int) (keyChars[i]) + 1;
                if (b == check[p])
                    b = base[p];
                else
                    return result;
            }
            return p;
        }
    
        public Node getNode(Integer index) {
            if (index < 0) {
                return null;
            }
            return nodes[index];
        }
    
        // 判断是否是部分匹配,决定下一个词的匹配逻辑
        public boolean isPartMatched(String key, int nodePos) {
            int b = base[nodePos];
            int p = b;
            int n = base[p];
            if (b == check[p] && n < 0) {
                return false;
            } else {
                return true;
            }
        }
    
        // debug
        //public void dump() {
        //    for (int i = 0; i < size; i++) {
        //        System.err.println("i: " + i + " [" + base[i] + ", " + check[i] + "]");
        //    }
        //}
    
        @Override
        public boolean equals(Object obj) {
            if (obj instanceof DoubleArrayTrie) {
                DoubleArrayTrie datObj = (DoubleArrayTrie) obj;
                if (size != datObj.size) {
                    return false;
                } else if (keySize != datObj.keySize) {
                    return false;
                }
                for (int i = 0; i < size; i++) {
                    if (base[i] != datObj.base[i] || check[i] != datObj.check[i]) {
                        return false;
                    }
                }
                for (int i = 0; i < keySize; i++) {
                    if (length[i] != datObj.length[i]) {
                        return false;
                    }
                }
                return true;
            } else {
                return false;
            }
        }
    
        @Override
        public int hashCode() {
            return super.hashCode();
        }
    }
  • 相关阅读:
    《Real Time 3D Terrain Engines Using C++ And DirectX9》(实时地形引擎)随书源码
    (转)ogreUML类图
    (转)导弹跟踪算法
    转:正则表达式语法
    读取数据
    Python lambda用法及其与def的区别
    转Vb6.0安装失败解决办法,完全卸载VB(提高班里win7没装上VB的可以看看,我实验成功了)
    vb imagelist 作用
    二叉树的实现
    转:Python语言编程学习资料(电子书+视频教程)下载汇总:
  • 原文地址:https://www.cnblogs.com/iscys/p/12077167.html
Copyright © 2011-2022 走看看