zoukankan      html  css  js  c++  java
  • java敏感字查找和替换

    java类:

    • SearchNode
    • SensitiveWords
    • SensitiveWordsReplace
    • SensitiveWordsSearch
    • WordsNode
    • TestSensitiveWordsSearch (测试类)
    /**
     * @date 2020-12-10 010 13:28
     */
    public class SearchNode {
    
        private String words;
        private int index;
        private int lastIndex;
        private long id;
    
        public SearchNode() {
        }
    
        public SearchNode(String words, int index, int lastIndex) {
            this.words = words;
            this.index = index;
            this.lastIndex = lastIndex;
        }
    
        public SearchNode(String words, int index, int lastIndex, long id) {
            this.words = words;
            this.index = index;
            this.lastIndex = lastIndex;
            this.id = id;
        }
    
        public String getWords() {
            return words;
        }
    
        public void setWords(String words) {
            this.words = words;
        }
    
        public int getIndex() {
            return index;
        }
    
        public void setIndex(int index) {
            this.index = index;
        }
    
        public int getLastIndex() {
            return lastIndex;
        }
    
        public void setLastIndex(int lastIndex) {
            this.lastIndex = lastIndex;
        }
    
        public long getId() {
            return id;
        }
    
        public void setId(long id) {
            this.id = id;
        }
    }
    /**
     * @date 2020-12-10 010 13:53
     */
    public class SensitiveWords {
    
        private long id;
        private String words;
        private String replace;
    
        public SensitiveWords() {
        }
    
        public SensitiveWords(long id, String words, String replace) {
            this.id = id;
            this.words = words;
            this.replace = replace;
        }
    
        public long getId() {
            return id;
        }
    
        public void setId(long id) {
            this.id = id;
        }
    
        public String getWords() {
            return words;
        }
    
        public void setWords(String words) {
            this.words = words;
        }
    
        public String getReplace() {
            return replace;
        }
    
        public void setReplace(String replace) {
            this.replace = replace;
        }
    }
    /**
     * @date 2020-12-09 009 19:51
     */
    public class SensitiveWordsReplace {
    
        /**
         * 所有关键字
         */
        protected static List<SensitiveWords> sensitiveWordsList;
        protected static Map<Long, SensitiveWords> sensitiveWordsMap;
    
        public static void init(List<SensitiveWords> sensitiveWordsList) {
            SensitiveWordsReplace.sensitiveWordsList = sensitiveWordsList;
            SensitiveWordsReplace.sensitiveWordsMap = new HashMap<>(sensitiveWordsList.size());
            for (SensitiveWords sensitiveWords : sensitiveWordsList) {
                SensitiveWordsReplace.sensitiveWordsMap.put(sensitiveWords.getId(), sensitiveWords);
            }
        }
    
        public static String findReplace(String text) {
            // 只能支持全文匹配
            List<SearchNode> searchNodeList = SensitiveWordsSearch.getInstance().findWords(text, true);
            if (CollectionUtils.isEmpty(searchNodeList)) {
                return text;
            }
            Map<Integer, SearchNode> searchNodeMap = new HashMap<>(searchNodeList.size());
            for (SearchNode searchNode : searchNodeList) {
                int index = searchNode.getIndex();
                searchNodeMap.put(index, searchNode);
            }
            StringBuilder builder = new StringBuilder();
            int length = text.length();
            for (int i = 0; i < length; i++) {
                SearchNode searchNode = searchNodeMap.get(i);
                if (null != searchNode) {
                    SensitiveWords sensitiveWords = SensitiveWordsReplace.sensitiveWordsMap.get(searchNode.getId());
                    if (null != sensitiveWords) {
                        builder.append(sensitiveWords.getReplace());
                    } else  {
                        int i1 = searchNode.getLastIndex() - searchNode.getIndex();
                        for (int j = 0; j < i1; j++) {
                            builder.append("*");
                        }
                    }
                    i = searchNode.getLastIndex() - 1;
                } else {
                    builder.append(text.charAt(i));
                }
            }
            return builder.toString();
        }
    }
    /**
     * @date 2020-12-09 009 19:36
     */
    public class SensitiveWordsSearch {
    
        /**
         * 关键字根节点
         */
        protected WordsNode rootNode;
        /**
         * 所有关键字
         */
        protected List<SensitiveWords> sensitiveWordsList;
        /**
         * 关键字加载中
         */
        protected boolean keywordsLoading;
    
        private SensitiveWordsSearch() {
        }
    
        /**
         * 获取实例
         *
         * @return SensitiveWordsSearch
         */
        public static SensitiveWordsSearch getInstance() {
            return SensitiveWordsSearchInstance.INSTANCE;
        }
    
        /**
         * 初始化关键字
         */
        private void initKeywords() {
            // 初始化
            rootNode = new WordsNode();
            for (SensitiveWords sensitiveWords : sensitiveWordsList) {
                WordsNode node = rootNode;
                String words = sensitiveWords.getWords();
                int length = words.length();
                for (int i = 0; i < length; i++) {
                    node = node.add(words.charAt(i));
                    if (node.getLayer() == 0) {
                        node.setLayer(i + 1);
                    }
                }
                node.setEnd(true);
                node.setId(sensitiveWords.getId());
            }
    
            System.out.println(JSON.toJSONString(rootNode));
        }
    
        /**
         * 更新关键字
         *
         * @param sensitiveWordsList 关键字集合
         */
        public void updateKeywords(List<SensitiveWords> sensitiveWordsList) {
            if (!this.keywordsLoading) {
                this.keywordsLoading = true;
                this.sensitiveWordsList = sensitiveWordsList;
                this.initKeywords();
                this.keywordsLoading = false;
            }
        }
    
        /**
         * 获取关键字
         *
         * @param text     检索文本
         * @param maxMatch 最大匹配
         * @return 查找到的关键字
         */
        public List<SearchNode> findWords(String text, boolean maxMatch) {
            if (null == rootNode) {
                throw new RuntimeException("SensitiveWordsSearch uninitialized.");
            }
            WordsNode top = null;
            List<SearchNode> list = new ArrayList<>();
            WordsNode preNode = null;
            int length = text.length();
            int lastLength = length - 1;
            for (int i = 0; i < length; i++) {
                final char t = text.charAt(i);
                WordsNode node;
                if (top == null) {
                    node = rootNode.getNode(t);
                } else {
                    if (top.hasKey(t)) {
                        node = top.getNode(t);
                    } else {
                        if (maxMatch && top.isEnd()) {
                            preNode = top;
                        }
                        node = rootNode.getNode(t);
                    }
                }
                if (maxMatch) {
                    // 下一个节点
                    if (preNode != null) {
                        // 计算层级向前
                        list.add(new SearchNode(preNode.getWords(), i - preNode.getLayer(), i, preNode.getId()));
                        preNode = null;
                    }
                } else {
                    // 当前节点
                    if (node != null && node.isEnd()) {
                        list.add(new SearchNode(node.getWords(), i + 1 - node.getLayer(), i + 1, node.getId()));
                    }
                }
                // 最大匹配时修正最后一个文本无法匹配的问题
                if (lastLength == i && maxMatch && node != null && node.isEnd()) {
                    // 当前节点
                    // 最后匹配
                    list.add(new SearchNode(node.getWords(), i + 1 - node.getLayer(), i + 1, node.getId()));
                }
                top = node;
            }
            return list;
        }
    
        /**
         * 静态内部类
         */
        private static class SensitiveWordsSearchInstance {
            /**
             * 实例对象
             */
            private static final SensitiveWordsSearch INSTANCE = new SensitiveWordsSearch();
        }
    }
    /**
     * @date 2020-12-09 009 19:29
     */
    public class WordsNode {
    
        private int layer;
        private boolean end;
        private char c;
        private long id;
        private Map<Character, WordsNode> nodeMap;
        private WordsNode parent;
    
        public WordsNode() {
            nodeMap = new HashMap<>(16);
        }
    
        /**
         * 新增字符
         *
         * @param c c
         * @return WordsNode
         */
        public WordsNode add(final Character c) {
            if (nodeMap.containsKey(c)) {
                return nodeMap.get(c);
            }
            final WordsNode node = new WordsNode();
            node.parent = this;
            node.c = c;
            nodeMap.put(c, node);
            return node;
        }
    
        public boolean hasKey(final char c) {
            return nodeMap.containsKey(c);
        }
    
        public WordsNode getNode(final char c) {
            return nodeMap.get(c);
        }
    
        /**
         * 获取当前节点的文本
         *
         * @return String
         */
        public String getWords() {
            if ('u0000' == this.c) {
                return "";
            }
            List<String> words = new ArrayList<>(this.layer);
            words.add(String.valueOf(this.c));
            if (null != this.parent) {
                words.add(this.parent.getWords());
            }
            Collections.reverse(words);
            StringBuilder builder = new StringBuilder();
            for (String word : words) {
                builder.append(word);
            }
            return builder.toString();
        }
    
        public int getLayer() {
            return layer;
        }
    
        public void setLayer(int layer) {
            this.layer = layer;
        }
    
        public boolean isEnd() {
            return end;
        }
    
        public void setEnd(boolean end) {
            this.end = end;
        }
    
        public char getC() {
            return c;
        }
    
        public void setC(char c) {
            this.c = c;
        }
    
        public long getId() {
            return id;
        }
    
        public void setId(long id) {
            this.id = id;
        }
    
        public Map<Character, WordsNode> getNodeMap() {
            return nodeMap;
        }
    
        public void setNodeMap(Map<Character, WordsNode> nodeMap) {
            this.nodeMap = nodeMap;
        }
    
        public WordsNode getParent() {
            return parent;
        }
    
        public void setParent(WordsNode parent) {
            this.parent = parent;
        }
    }
    /**
     * @date 2020-12-10 010 10:15
     */
    public class TestSensitiveWordsSearch {
    
    
        @Test
        public void init() {
            SensitiveWordsSearch instance = SensitiveWordsSearch.getInstance();
    
            List<SensitiveWords> sensitiveWordsList = new ArrayList<>();
            sensitiveWordsList.add(new SensitiveWords(1L, "凌晨两点", "丑时三刻"));
            sensitiveWordsList.add(new SensitiveWords(2L, "国庆", "庆国"));
            sensitiveWordsList.add(new SensitiveWords(3L, "阅兵", "大阅"));
            sensitiveWordsList.add(new SensitiveWords(4L, "七点", "辰时"));
            sensitiveWordsList.add(new SensitiveWords(5L, "战地", "战场"));
            sensitiveWordsList.add(new SensitiveWords(6L, "维和军士", "和平使者"));
            sensitiveWordsList.add(new SensitiveWords(7L, "特警", "使者"));
            sensitiveWordsList.add(new SensitiveWords(8L, "小说", "软文"));
            instance.updateKeywords(sensitiveWordsList);
    
            System.out.println("已加载关键词:");
            System.out.println(JSON.toJSONString(sensitiveWordsList));
    
            String text = "凌晨两点毫无睡意,受国庆阅兵影响,七点爬起来看《白色橄榄树》这部小说。无论怎样评价玖月晞,战地记者与维和军士的配置简直招架不住。阿瓒又成为所看过小说里难以忘怀的一个名字,他是柔和淡定的排弹士兵,是机场突然出现的特警,也是在东国苏睿城郊5秒救下她的人。玖月晞轻描淡写,出来的故事却刻骨铭心,真的很喜欢这样力度的小说。";
            System.out.println("查询文本:
    " + text);
    
            SensitiveWordsReplace.init(sensitiveWordsList);
            String replace = SensitiveWordsReplace.findReplace(text);
            System.out.println(replace);
        }
    }

    结果对照:

    查询文本:
    凌晨两点毫无睡意,受国庆阅兵影响,七点爬起来看《白色橄榄树》这部小说。无论怎样评价玖月晞,战地记者与维和军士的配置简直招架不住。阿瓒又成为所看过小说里难以忘怀的一个名字,他是柔和淡定的排弹士兵,是机场突然出现的特警,也是在东国苏睿城郊5秒救下她的人。玖月晞轻描淡写,出来的故事却刻骨铭心,真的很喜欢这样力度的小说丑时三刻毫无睡意,受庆国大阅影响,辰时爬起来看《白色橄榄树》这部软文。无论怎样评价玖月晞,战场记者与和平使者的配置简直招架不住。阿瓒又成为所看过软文里难以忘怀的一个名字,他是柔和淡定的排弹士兵,是机场突然出现的使者,也是在东国苏睿城郊5秒救下她的人。玖月晞轻描淡写,出来的故事却刻骨铭心,真的很喜欢这样力度的软文
  • 相关阅读:
    How to function call using 'this' inside forEach loop
    jquery.validate.unobtrusive not working with dynamic injected elements
    Difference between jQuery.extend and jQuery.fn.extend?
    Methods, Computed, and Watchers in Vue.js
    Caution using watchers for objects in Vue
    How to Watch Deep Data Structures in Vue (Arrays and Objects)
    Page: DOMContentLoaded, load, beforeunload, unload
    linux bridge
    linux bridge
    EVE-NG网卡桥接
  • 原文地址:https://www.cnblogs.com/se7end/p/14122924.html
Copyright © 2011-2022 走看看