zoukankan html css js c++ java

语法树

个人感觉有点类似于有限状态机
package com.smart.enumcompareto.test;

import com.smart.enumcompareto.test.TernarySearchTrie.TSTNode;

/**
 * 语法树匹配到的结果，记录最终节点和最终节点的index
 *
 * @author dell
 *
 */
public class MatchRet {
    private int index;
    private TSTNode node;
    
    public MatchRet(TSTNode node,int index){
        this.index=index;
        this.node=node;
    }
    
    public int getIndex() {
        return index;
    }
    public void setIndex(int index) {
        this.index = index;
    }
    public TSTNode getNode() {
        return node;
    }
    public void setNode(TSTNode node) {
        this.node = node;
    }
    
    public String toString(){
        return node.data;
    }
    
}

package com.smart.enumcompareto.test;

public enum TokenType {
    Unknown,
    Basic,
    Suffix,
    Start,
    End
}

package com.smart.enumcompareto.test;

import java.util.ArrayList;

/**
 * 语法树的测试
 * 
 * @author dell
 * 
 */
public class TernarySearchTrie {

    public final class TSTNode {
        public String data = null;
        protected TSTNode loNode;
        protected TSTNode eqNode;
        protected TSTNode hiNode;
        protected TokenType splitchar;

        public TSTNode(TokenType type) {
            this.splitchar = type;
        }
    }

    public TSTNode rootNode;

    public TSTNode add(ArrayList<TokenType> word) {
        if (null == word) {
            throw new NullPointerException("空指针异常");
        }

        int charIndex = 0;
        if (null == rootNode) {
            rootNode = new TSTNode(word.get(0));
        }
        TSTNode currentNode = rootNode;
        while (true) {
            int charComp = word.get(charIndex).compareTo(currentNode.splitchar);
            if (charComp == 0) {
                charIndex++;
                if (charIndex == word.size()) {
                    return currentNode;
                }
                if (null == currentNode.eqNode) {
                    currentNode.eqNode = new TSTNode(word.get(charIndex));
                }
                currentNode = currentNode.eqNode;
            } else if (charComp < 0) {
                if (null == currentNode.loNode) {
                    currentNode.loNode = new TSTNode(word.get(charIndex));
                }
                currentNode = currentNode.loNode;
            } else {
                if (null == currentNode.hiNode) {
                    currentNode.hiNode = new TSTNode(word.get(charIndex));
                }
                currentNode = currentNode.hiNode;
            }
        }
    }

    protected TSTNode getNode(ArrayList<TokenType> word) {
        if (null == word) {
            return null;
        }
        int len = word.size();
        if (len == 0)
            return null;
        TSTNode currentNode = rootNode; // 匹配过程中的当前节点的位置
        int charIndex = 0; // 表示当前要比较的字符在Key中的位置
        TokenType cmpChar = word.get(charIndex);
        int charComp;
        while (true) {
            if (currentNode == null) {// 没找到
                return null;
            }
            charComp = cmpChar.compareTo(currentNode.splitchar);
            if (charComp == 0) {// 相等往下走
                charIndex++;
                if (charIndex == len) {// 找到了
                    return currentNode;
                } else {
                    cmpChar = word.get(charIndex);// 词往下走
                }
                currentNode = currentNode.eqNode;
            } else if (charComp < 0) {// 小于往左走
                currentNode = currentNode.loNode;
            } else {// 大于往右走
                currentNode = currentNode.hiNode;
            }
        }
    }

    public void tag(ArrayList<TokenType> tokens) {
        if (tokens == null || rootNode == null) {
            return;
        }
        for (int i = 0; i < tokens.size();) {
            MatchRet ret = matchLong(tokens, i);
            if (null != ret) {
                System.out.println(ret.toString());
                i = ret.getIndex();
            } else {
                System.out.println("null");
                i++;
            }
        }
    }

    public MatchRet matchLong(ArrayList<TokenType> tokens, int offset) {
        if (tokens == null || rootNode == null) {
            return null;
        }

        MatchRet ret = null;
        TSTNode currentNode = rootNode;
        int index = offset;
        while (currentNode != null) {
            int charComp = tokens.get(index).compareTo(currentNode.splitchar);
            if (charComp == 0) {
                index++;
                if (currentNode.data != null) {
                    ret = new MatchRet(currentNode, index);
                }
                if (index == tokens.size()) {
                    return ret;
                }
                currentNode = currentNode.eqNode;
            } else if (charComp < 0) {
                currentNode = currentNode.loNode;
            } else {
                currentNode = currentNode.hiNode;
            }
        }
        return ret;
    }

    public static void main(String[] args) {
        //testTag();
        testAddGet();
    }

    private static void testAddGet(){
        TernarySearchTrie tree=new TernarySearchTrie();
        ArrayList<TokenType> list=new ArrayList<TokenType>();
        list.add(TokenType.Start);
        list.add(TokenType.Suffix);
        list.add(TokenType.Basic);
        list.add(TokenType.End);
        
        TSTNode node=tree.add(list);
        node.data="我是start-suffix-basic-end组合";
        
        TSTNode ret=tree.getNode(list);
        if(ret==null){
            System.out.println("未匹配到");
        }
        else{
            System.out.println(ret.data);
        }
    }
    
    private static void testTag() {
        TernarySearchTrie tree = new TernarySearchTrie();
        ArrayList<TokenType> list = new ArrayList<TokenType>();

        list.add(TokenType.Basic);
        list.add(TokenType.Suffix);

        TSTNode node = tree.add(list);
        node.data = "我是basic-suffix组合";
        
        list = new ArrayList<TokenType>();
        list.add(TokenType.Unknown);
        list.add(TokenType.Suffix);
        
        TSTNode node_1 = tree.add(list);
        node_1.data = "我是unknown-suffix组合";

        list = new ArrayList<TokenType>();
        list.add(TokenType.Start);
        list.add(TokenType.Basic);
        list.add(TokenType.Suffix);
        list.add(TokenType.Unknown);
        list.add(TokenType.Suffix);
        list.add(TokenType.End);

        tree.tag(list);
    }

}

查看全文

相关阅读:
常见搜索召回方式
 阿里Tree-based Deep Match(TDM) 学习笔记
 阻塞、非阻塞、同步、异步的理解
 最佳实践：深度学习用于自然语言处理(Deep Learning for NLP Best Practices)
ELK日志分析工具
 mysql性能测试--sysbench实践
 mysql性能测试-tpcc
mysql基础测试
 压力测试sysbench
压力测试工具MySQL mysqlslap

原文地址：https://www.cnblogs.com/i80386/p/2730435.html