zoukankan      html  css  js  c++  java
  • 语法树

    个人感觉有点类似于有限状态机
    package
    com.smart.enumcompareto.test; import com.smart.enumcompareto.test.TernarySearchTrie.TSTNode; /** * 语法树匹配到的结果,记录最终节点和最终节点的index * * @author dell * */ public class MatchRet { private int index; private TSTNode node; public MatchRet(TSTNode node,int index){ this.index=index; this.node=node; } public int getIndex() { return index; } public void setIndex(int index) { this.index = index; } public TSTNode getNode() { return node; } public void setNode(TSTNode node) { this.node = node; } public String toString(){ return node.data; } }
    package com.smart.enumcompareto.test;
    
    public enum TokenType {
        Unknown,
        Basic,
        Suffix,
        Start,
        End
    }
    package com.smart.enumcompareto.test;
    
    import java.util.ArrayList;
    
    /**
     * 语法树的测试
     * 
     * @author dell
     * 
     */
    public class TernarySearchTrie {
    
        public final class TSTNode {
            public String data = null;
            protected TSTNode loNode;
            protected TSTNode eqNode;
            protected TSTNode hiNode;
            protected TokenType splitchar;
    
            public TSTNode(TokenType type) {
                this.splitchar = type;
            }
        }
    
        public TSTNode rootNode;
    
        public TSTNode add(ArrayList<TokenType> word) {
            if (null == word) {
                throw new NullPointerException("空指针异常");
            }
    
            int charIndex = 0;
            if (null == rootNode) {
                rootNode = new TSTNode(word.get(0));
            }
            TSTNode currentNode = rootNode;
            while (true) {
                int charComp = word.get(charIndex).compareTo(currentNode.splitchar);
                if (charComp == 0) {
                    charIndex++;
                    if (charIndex == word.size()) {
                        return currentNode;
                    }
                    if (null == currentNode.eqNode) {
                        currentNode.eqNode = new TSTNode(word.get(charIndex));
                    }
                    currentNode = currentNode.eqNode;
                } else if (charComp < 0) {
                    if (null == currentNode.loNode) {
                        currentNode.loNode = new TSTNode(word.get(charIndex));
                    }
                    currentNode = currentNode.loNode;
                } else {
                    if (null == currentNode.hiNode) {
                        currentNode.hiNode = new TSTNode(word.get(charIndex));
                    }
                    currentNode = currentNode.hiNode;
                }
            }
        }
    
        protected TSTNode getNode(ArrayList<TokenType> word) {
            if (null == word) {
                return null;
            }
            int len = word.size();
            if (len == 0)
                return null;
            TSTNode currentNode = rootNode; // 匹配过程中的当前节点的位置
            int charIndex = 0; // 表示当前要比较的字符在Key中的位置
            TokenType cmpChar = word.get(charIndex);
            int charComp;
            while (true) {
                if (currentNode == null) {// 没找到
                    return null;
                }
                charComp = cmpChar.compareTo(currentNode.splitchar);
                if (charComp == 0) {// 相等往下走
                    charIndex++;
                    if (charIndex == len) {// 找到了
                        return currentNode;
                    } else {
                        cmpChar = word.get(charIndex);// 词往下走
                    }
                    currentNode = currentNode.eqNode;
                } else if (charComp < 0) {// 小于往左走
                    currentNode = currentNode.loNode;
                } else {// 大于往右走
                    currentNode = currentNode.hiNode;
                }
            }
        }
    
        public void tag(ArrayList<TokenType> tokens) {
            if (tokens == null || rootNode == null) {
                return;
            }
            for (int i = 0; i < tokens.size();) {
                MatchRet ret = matchLong(tokens, i);
                if (null != ret) {
                    System.out.println(ret.toString());
                    i = ret.getIndex();
                } else {
                    System.out.println("null");
                    i++;
                }
            }
        }
    
        public MatchRet matchLong(ArrayList<TokenType> tokens, int offset) {
            if (tokens == null || rootNode == null) {
                return null;
            }
    
            MatchRet ret = null;
            TSTNode currentNode = rootNode;
            int index = offset;
            while (currentNode != null) {
                int charComp = tokens.get(index).compareTo(currentNode.splitchar);
                if (charComp == 0) {
                    index++;
                    if (currentNode.data != null) {
                        ret = new MatchRet(currentNode, index);
                    }
                    if (index == tokens.size()) {
                        return ret;
                    }
                    currentNode = currentNode.eqNode;
                } else if (charComp < 0) {
                    currentNode = currentNode.loNode;
                } else {
                    currentNode = currentNode.hiNode;
                }
            }
            return ret;
        }
    
        public static void main(String[] args) {
            //testTag();
            testAddGet();
        }
    
        private static void testAddGet(){
            TernarySearchTrie tree=new TernarySearchTrie();
            ArrayList<TokenType> list=new ArrayList<TokenType>();
            list.add(TokenType.Start);
            list.add(TokenType.Suffix);
            list.add(TokenType.Basic);
            list.add(TokenType.End);
            
            TSTNode node=tree.add(list);
            node.data="我是start-suffix-basic-end组合";
            
            TSTNode ret=tree.getNode(list);
            if(ret==null){
                System.out.println("未匹配到");
            }
            else{
                System.out.println(ret.data);
            }
        }
        
        private static void testTag() {
            TernarySearchTrie tree = new TernarySearchTrie();
            ArrayList<TokenType> list = new ArrayList<TokenType>();
    
            list.add(TokenType.Basic);
            list.add(TokenType.Suffix);
    
            TSTNode node = tree.add(list);
            node.data = "我是basic-suffix组合";
            
            list = new ArrayList<TokenType>();
            list.add(TokenType.Unknown);
            list.add(TokenType.Suffix);
            
            TSTNode node_1 = tree.add(list);
            node_1.data = "我是unknown-suffix组合";
    
            list = new ArrayList<TokenType>();
            list.add(TokenType.Start);
            list.add(TokenType.Basic);
            list.add(TokenType.Suffix);
            list.add(TokenType.Unknown);
            list.add(TokenType.Suffix);
            list.add(TokenType.End);
    
            tree.tag(list);
        }
    
    }
  • 相关阅读:
    常见搜索召回方式
    阿里Tree-based Deep Match(TDM) 学习笔记
    阻塞、非阻塞、同步、异步的理解
    最佳实践:深度学习用于自然语言处理(Deep Learning for NLP Best Practices)
    ELK日志分析工具
    mysql性能测试--sysbench实践
    mysql性能测试-tpcc
    mysql基础测试
    压力测试sysbench
    压力测试工具MySQL mysqlslap
  • 原文地址:https://www.cnblogs.com/i80386/p/2730435.html
Copyright © 2011-2022 走看看