zoukankan      html  css  js  c++  java
  • DFA敏感词过滤

    import java.io.UnsupportedEncodingException;
    import java.nio.ByteBuffer;
    import java.util.ArrayList;
    import java.util.HashMap;
    import java.util.List;
    import java.util.Map;
    
    import org.apache.commons.lang3.StringUtils;
    import org.json.JSONException;
    import org.json.JSONObject;
      
    public class DFA {        
        /**  
         * 根节点  
         */    
        private static TreeNode rootNode = new TreeNode();    
        
        /**
         * 创建关键词树时的临时根节点
         */
        private static TreeNode rootNodeTemp = new TreeNode();
            
        /**  
         * 关键词缓存  
         */    
        private static ByteBuffer keywordBuffer = ByteBuffer.allocate(1024);       
            
        /**  
         * 关键词编码  
         */    
        private static String charset = "utf-8";    
        
        /**
         * 关键词树是否初始化
         */
        private static boolean treeIsInit = false;    
        
        /**  
         * 创建DFA树  
         * @param keywordList  
         * @throws UnsupportedEncodingException   
         */    
        public static void createKeywordTree(List<String> keywordList) throws UnsupportedEncodingException{    
            for (String keyword : keywordList) {    
                if(keyword == null) continue;    
                keyword = keyword.trim().toLowerCase();    
                byte[] bytes = keyword.getBytes(charset);    
                TreeNode tempNode = rootNodeTemp;    
                for (int i = 0; i < bytes.length; i++) {    
                    int index = bytes[i] & 0xff;     
                    TreeNode node = tempNode.getSubNode(index);    
                    if(node == null){    
                        node = new TreeNode();    
                        tempNode.setSubNode(index, node);    
                    }   
                    tempNode = node;  
                    if(i == bytes.length - 1){    
                        tempNode.setKeywordEnd(true);      
                    }    
                }    
            }  
            synchronized (DFA.class)
            {
                rootNode = rootNodeTemp;
                if(!treeIsInit)treeIsInit = true;
            }
            
        }    
            
        /**
         * 检索关键词 
         * @param text
         * @return
         * @throws UnsupportedEncodingException
         */
        public static JSONObject searchKeyword(String text) throws UnsupportedEncodingException{
            if(StringUtils.isEmpty(text)){
                JSONObject json = new JSONObject();
                try
                {
                    json.put("islegal", "1");
                    json.put("deststr", "");
                }
                catch (JSONException e)
                {
                    e.printStackTrace();
                }            
                return json;
            }
            if(!treeIsInit)createKeywordTree();
            System.out.println("searchKeyword:"+text);
            return searchKeyword(text.getBytes(charset),text.toLowerCase().getBytes(charset));    
        }    
         
        /**
         * 检索关键词
         * @param oldBytes
         * @param bytes
         * @return
         */
        public static JSONObject searchKeyword(byte[] oldBytes,byte[] bytes){  
            String is_legal = "1";   
            JSONObject json = new JSONObject();
            String result = null; 
            if(bytes == null || bytes.length == 0){    
                result = "";  
            }else{ 
                synchronized (DFA.class)
                {         
                    TreeNode tempNode = rootNode;    
                    int rollback = 0;     
                    int position = 0;   
                    while (position < bytes.length) {    
                        int index = bytes[position] & 0xFF;    
                        keywordBuffer.put(bytes[position]);   
                        tempNode = tempNode.getSubNode(index);   
                        if(tempNode == null){  
                            position = position - rollback;  
                            rollback = 0;    
                            tempNode = rootNode;        
                            keywordBuffer.clear();    
                        }    
                        else if(tempNode.isKeywordEnd()){    
                            keywordBuffer.flip();    
                            for (int i = 0; i <= rollback; i++) {  
                                    bytes[position-i] = 42;  
                                    oldBytes[position-i] = 42;
                            }  
                            keywordBuffer.limit(keywordBuffer.capacity());    
                            rollback = 1;   
                            is_legal = "0";
                        }else{     
                            rollback++;   
                        }    
                        position++;    
                    } 
                }             
                try {  
                     result  =  new String(oldBytes,charset);    
                      
                } catch (Exception e) {  
                    e.printStackTrace();  
                }  
                 
            } 
            try
            {
                json.put("islegal", is_legal);
                json.put("deststr", result);
            }
            catch (JSONException e)
            {
                // TODO Auto-generated catch block
                e.printStackTrace();
            }         
            return json;  
        }    
            
        public void setCharset(String charset) {    
            this.charset = charset;    
        } 
        
        public static void main(String[] args){
            try{
                List<String> keys = new ArrayList<String>();
                keys.add("TMD");
                keys.add("변형");
                String text ="你真TMD是个변형啊!";
                DFA.createKeywordTree(keys);
                JSONObject a = DFA.searchKeyword(text);
                System.out.println(a);
            }
            catch(Exception ex){
                ex.printStackTrace();
            }
        }
    } 
    import java.util.ArrayList;
    import java.util.List;
    
    public class TreeNode {  
        private static final int NODE_LEN = 256;  
          
        /** 
         * true 关键词的终结 ; false 继续 
         */  
        private boolean end = false;   
          
        private List<TreeNode> subNodes = new ArrayList<TreeNode>(NODE_LEN);  
          
        public TreeNode(){  
            for (int i = 0; i < NODE_LEN; i++) {  
                subNodes.add(i, null);  
            }  
        }  
          
        /** 
         * 向指定位置添加节点树 
         * @param index 
         * @param node 
         */  
        public void setSubNode(int index, TreeNode node){  
            subNodes.set(index, node);  
        }  
          
        public TreeNode getSubNode(int index){  
            //System.out.println("index:"+index+" node:"+this+"--subNodes:"+subNodes.get(index));
            return subNodes.get(index);  
        }  
          
      
        public boolean isKeywordEnd() {  
            return end;  
        }  
      
        public void setKeywordEnd(boolean end) {  
            this.end = end;  
        }  
    } 
  • 相关阅读:
    OpenCV中的绘图函数
    整理不错的opencv博客
    opencv中的函数
    这是一个学习前端技术的网站
    HDU1520 Anniversary party(树形DP入门)
    CF1255C League of Leesins(图论)
    HDU4725 The Shortest Path in Nya Graph(最短路分层)
    1288C Two Arrays
    CF1294D MEX maxiszing
    CF1295C Obtain the String
  • 原文地址:https://www.cnblogs.com/ld-swust/p/6132861.html
Copyright © 2011-2022 走看看