zoukankan      html  css  js  c++  java
  • java敏感字查找和替换

    java类:

    • SearchNode
    • SensitiveWords
    • SensitiveWordsReplace
    • SensitiveWordsSearch
    • WordsNode
    • TestSensitiveWordsSearch (测试类)
    /**
     * @date 2020-12-10 010 13:28
     */
    public class SearchNode {
    
        private String words;
        private int index;
        private int lastIndex;
        private long id;
    
        public SearchNode() {
        }
    
        public SearchNode(String words, int index, int lastIndex) {
            this.words = words;
            this.index = index;
            this.lastIndex = lastIndex;
        }
    
        public SearchNode(String words, int index, int lastIndex, long id) {
            this.words = words;
            this.index = index;
            this.lastIndex = lastIndex;
            this.id = id;
        }
    
        public String getWords() {
            return words;
        }
    
        public void setWords(String words) {
            this.words = words;
        }
    
        public int getIndex() {
            return index;
        }
    
        public void setIndex(int index) {
            this.index = index;
        }
    
        public int getLastIndex() {
            return lastIndex;
        }
    
        public void setLastIndex(int lastIndex) {
            this.lastIndex = lastIndex;
        }
    
        public long getId() {
            return id;
        }
    
        public void setId(long id) {
            this.id = id;
        }
    }
    /**
     * @date 2020-12-10 010 13:53
     */
    public class SensitiveWords {
    
        private long id;
        private String words;
        private String replace;
    
        public SensitiveWords() {
        }
    
        public SensitiveWords(long id, String words, String replace) {
            this.id = id;
            this.words = words;
            this.replace = replace;
        }
    
        public long getId() {
            return id;
        }
    
        public void setId(long id) {
            this.id = id;
        }
    
        public String getWords() {
            return words;
        }
    
        public void setWords(String words) {
            this.words = words;
        }
    
        public String getReplace() {
            return replace;
        }
    
        public void setReplace(String replace) {
            this.replace = replace;
        }
    }
    /**
     * @date 2020-12-09 009 19:51
     */
    public class SensitiveWordsReplace {
    
        /**
         * 所有关键字
         */
        protected static List<SensitiveWords> sensitiveWordsList;
        protected static Map<Long, SensitiveWords> sensitiveWordsMap;
    
        public static void init(List<SensitiveWords> sensitiveWordsList) {
            SensitiveWordsReplace.sensitiveWordsList = sensitiveWordsList;
            SensitiveWordsReplace.sensitiveWordsMap = new HashMap<>(sensitiveWordsList.size());
            for (SensitiveWords sensitiveWords : sensitiveWordsList) {
                SensitiveWordsReplace.sensitiveWordsMap.put(sensitiveWords.getId(), sensitiveWords);
            }
        }
    
        public static String findReplace(String text) {
            // 只能支持全文匹配
            List<SearchNode> searchNodeList = SensitiveWordsSearch.getInstance().findWords(text, true);
            if (CollectionUtils.isEmpty(searchNodeList)) {
                return text;
            }
            Map<Integer, SearchNode> searchNodeMap = new HashMap<>(searchNodeList.size());
            for (SearchNode searchNode : searchNodeList) {
                int index = searchNode.getIndex();
                searchNodeMap.put(index, searchNode);
            }
            StringBuilder builder = new StringBuilder();
            int length = text.length();
            for (int i = 0; i < length; i++) {
                SearchNode searchNode = searchNodeMap.get(i);
                if (null != searchNode) {
                    SensitiveWords sensitiveWords = SensitiveWordsReplace.sensitiveWordsMap.get(searchNode.getId());
                    if (null != sensitiveWords) {
                        builder.append(sensitiveWords.getReplace());
                    } else  {
                        int i1 = searchNode.getLastIndex() - searchNode.getIndex();
                        for (int j = 0; j < i1; j++) {
                            builder.append("*");
                        }
                    }
                    i = searchNode.getLastIndex() - 1;
                } else {
                    builder.append(text.charAt(i));
                }
            }
            return builder.toString();
        }
    }
    /**
     * @date 2020-12-09 009 19:36
     */
    public class SensitiveWordsSearch {
    
        /**
         * 关键字根节点
         */
        protected WordsNode rootNode;
        /**
         * 所有关键字
         */
        protected List<SensitiveWords> sensitiveWordsList;
        /**
         * 关键字加载中
         */
        protected boolean keywordsLoading;
    
        private SensitiveWordsSearch() {
        }
    
        /**
         * 获取实例
         *
         * @return SensitiveWordsSearch
         */
        public static SensitiveWordsSearch getInstance() {
            return SensitiveWordsSearchInstance.INSTANCE;
        }
    
        /**
         * 初始化关键字
         */
        private void initKeywords() {
            // 初始化
            rootNode = new WordsNode();
            for (SensitiveWords sensitiveWords : sensitiveWordsList) {
                WordsNode node = rootNode;
                String words = sensitiveWords.getWords();
                int length = words.length();
                for (int i = 0; i < length; i++) {
                    node = node.add(words.charAt(i));
                    if (node.getLayer() == 0) {
                        node.setLayer(i + 1);
                    }
                }
                node.setEnd(true);
                node.setId(sensitiveWords.getId());
            }
    
            System.out.println(JSON.toJSONString(rootNode));
        }
    
        /**
         * 更新关键字
         *
         * @param sensitiveWordsList 关键字集合
         */
        public void updateKeywords(List<SensitiveWords> sensitiveWordsList) {
            if (!this.keywordsLoading) {
                this.keywordsLoading = true;
                this.sensitiveWordsList = sensitiveWordsList;
                this.initKeywords();
                this.keywordsLoading = false;
            }
        }
    
        /**
         * 获取关键字
         *
         * @param text     检索文本
         * @param maxMatch 最大匹配
         * @return 查找到的关键字
         */
        public List<SearchNode> findWords(String text, boolean maxMatch) {
            if (null == rootNode) {
                throw new RuntimeException("SensitiveWordsSearch uninitialized.");
            }
            WordsNode top = null;
            List<SearchNode> list = new ArrayList<>();
            WordsNode preNode = null;
            int length = text.length();
            int lastLength = length - 1;
            for (int i = 0; i < length; i++) {
                final char t = text.charAt(i);
                WordsNode node;
                if (top == null) {
                    node = rootNode.getNode(t);
                } else {
                    if (top.hasKey(t)) {
                        node = top.getNode(t);
                    } else {
                        if (maxMatch && top.isEnd()) {
                            preNode = top;
                        }
                        node = rootNode.getNode(t);
                    }
                }
                if (maxMatch) {
                    // 下一个节点
                    if (preNode != null) {
                        // 计算层级向前
                        list.add(new SearchNode(preNode.getWords(), i - preNode.getLayer(), i, preNode.getId()));
                        preNode = null;
                    }
                } else {
                    // 当前节点
                    if (node != null && node.isEnd()) {
                        list.add(new SearchNode(node.getWords(), i + 1 - node.getLayer(), i + 1, node.getId()));
                    }
                }
                // 最大匹配时修正最后一个文本无法匹配的问题
                if (lastLength == i && maxMatch && node != null && node.isEnd()) {
                    // 当前节点
                    // 最后匹配
                    list.add(new SearchNode(node.getWords(), i + 1 - node.getLayer(), i + 1, node.getId()));
                }
                top = node;
            }
            return list;
        }
    
        /**
         * 静态内部类
         */
        private static class SensitiveWordsSearchInstance {
            /**
             * 实例对象
             */
            private static final SensitiveWordsSearch INSTANCE = new SensitiveWordsSearch();
        }
    }
    /**
     * @date 2020-12-09 009 19:29
     */
    public class WordsNode {
    
        private int layer;
        private boolean end;
        private char c;
        private long id;
        private Map<Character, WordsNode> nodeMap;
        private WordsNode parent;
    
        public WordsNode() {
            nodeMap = new HashMap<>(16);
        }
    
        /**
         * 新增字符
         *
         * @param c c
         * @return WordsNode
         */
        public WordsNode add(final Character c) {
            if (nodeMap.containsKey(c)) {
                return nodeMap.get(c);
            }
            final WordsNode node = new WordsNode();
            node.parent = this;
            node.c = c;
            nodeMap.put(c, node);
            return node;
        }
    
        public boolean hasKey(final char c) {
            return nodeMap.containsKey(c);
        }
    
        public WordsNode getNode(final char c) {
            return nodeMap.get(c);
        }
    
        /**
         * 获取当前节点的文本
         *
         * @return String
         */
        public String getWords() {
            if ('u0000' == this.c) {
                return "";
            }
            List<String> words = new ArrayList<>(this.layer);
            words.add(String.valueOf(this.c));
            if (null != this.parent) {
                words.add(this.parent.getWords());
            }
            Collections.reverse(words);
            StringBuilder builder = new StringBuilder();
            for (String word : words) {
                builder.append(word);
            }
            return builder.toString();
        }
    
        public int getLayer() {
            return layer;
        }
    
        public void setLayer(int layer) {
            this.layer = layer;
        }
    
        public boolean isEnd() {
            return end;
        }
    
        public void setEnd(boolean end) {
            this.end = end;
        }
    
        public char getC() {
            return c;
        }
    
        public void setC(char c) {
            this.c = c;
        }
    
        public long getId() {
            return id;
        }
    
        public void setId(long id) {
            this.id = id;
        }
    
        public Map<Character, WordsNode> getNodeMap() {
            return nodeMap;
        }
    
        public void setNodeMap(Map<Character, WordsNode> nodeMap) {
            this.nodeMap = nodeMap;
        }
    
        public WordsNode getParent() {
            return parent;
        }
    
        public void setParent(WordsNode parent) {
            this.parent = parent;
        }
    }
    /**
     * @date 2020-12-10 010 10:15
     */
    public class TestSensitiveWordsSearch {
    
    
        @Test
        public void init() {
            SensitiveWordsSearch instance = SensitiveWordsSearch.getInstance();
    
            List<SensitiveWords> sensitiveWordsList = new ArrayList<>();
            sensitiveWordsList.add(new SensitiveWords(1L, "凌晨两点", "丑时三刻"));
            sensitiveWordsList.add(new SensitiveWords(2L, "国庆", "庆国"));
            sensitiveWordsList.add(new SensitiveWords(3L, "阅兵", "大阅"));
            sensitiveWordsList.add(new SensitiveWords(4L, "七点", "辰时"));
            sensitiveWordsList.add(new SensitiveWords(5L, "战地", "战场"));
            sensitiveWordsList.add(new SensitiveWords(6L, "维和军士", "和平使者"));
            sensitiveWordsList.add(new SensitiveWords(7L, "特警", "使者"));
            sensitiveWordsList.add(new SensitiveWords(8L, "小说", "软文"));
            instance.updateKeywords(sensitiveWordsList);
    
            System.out.println("已加载关键词:");
            System.out.println(JSON.toJSONString(sensitiveWordsList));
    
            String text = "凌晨两点毫无睡意,受国庆阅兵影响,七点爬起来看《白色橄榄树》这部小说。无论怎样评价玖月晞,战地记者与维和军士的配置简直招架不住。阿瓒又成为所看过小说里难以忘怀的一个名字,他是柔和淡定的排弹士兵,是机场突然出现的特警,也是在东国苏睿城郊5秒救下她的人。玖月晞轻描淡写,出来的故事却刻骨铭心,真的很喜欢这样力度的小说。";
            System.out.println("查询文本:
    " + text);
    
            SensitiveWordsReplace.init(sensitiveWordsList);
            String replace = SensitiveWordsReplace.findReplace(text);
            System.out.println(replace);
        }
    }

    结果对照:

    查询文本:
    凌晨两点毫无睡意,受国庆阅兵影响,七点爬起来看《白色橄榄树》这部小说。无论怎样评价玖月晞,战地记者与维和军士的配置简直招架不住。阿瓒又成为所看过小说里难以忘怀的一个名字,他是柔和淡定的排弹士兵,是机场突然出现的特警,也是在东国苏睿城郊5秒救下她的人。玖月晞轻描淡写,出来的故事却刻骨铭心,真的很喜欢这样力度的小说丑时三刻毫无睡意,受庆国大阅影响,辰时爬起来看《白色橄榄树》这部软文。无论怎样评价玖月晞,战场记者与和平使者的配置简直招架不住。阿瓒又成为所看过软文里难以忘怀的一个名字,他是柔和淡定的排弹士兵,是机场突然出现的使者,也是在东国苏睿城郊5秒救下她的人。玖月晞轻描淡写,出来的故事却刻骨铭心,真的很喜欢这样力度的软文
  • 相关阅读:
    php 验证码生成方法 及使用
    idea的jar文件,“java.lang.SecurityException: Invalid signature file digest for Manifest main attribute
    Ubuntu下Java JDK安装
    Ubuntu 忘记密码
    为 ubuntu 切换更新源
    使用Java开发桌面即时通讯程序遇到的问题
    MySQL 1093
    Java中命名Dao、Bean、conn等包的含义(不定期补充)
    通过导入Jar包的方式使用JSONObject
    IM开发通信协议基础知识(一)---TCP、UDP、HTTP、SOCKET
  • 原文地址:https://www.cnblogs.com/se7end/p/14122924.html
Copyright © 2011-2022 走看看