1 import java.util.HashSet;
2 import java.util.Iterator;
3 import java.util.Map;
4 import java.util.Set;
5
6 /**
7 * 敏感词过滤
8 */
9 public class SensitivewordFilter {
10 private Map sensitiveWordMap = null;
11 public static int minMatchTYpe = 1; //最小匹配规则
12 public static int maxMatchType = 2; //最大匹配规则
13 private static String replaceString = null;
14 /**例如:敏感词中含有中国人、中国
15 * 最小匹配规则minMatchTYpe为1时,会匹配出**人,为2时,会匹配出***
16 * */
17 public static void main(String[] args) throws Exception{
18 SensitivewordFilter filter = new SensitivewordFilter();
19 System.out.println("敏感词的数量:" + filter.sensitiveWordMap.size());
20 String string = "dfa是面向三级装配的设计(Design for assembly)的英文简称,是指在产品设计阶段设计产品使得产品具有良好" +
21 "的可装配性,确保装配工序简单、装配效率高、装配质量高、装配不良率低和装配成本低。面向装配的设计通过一系" +
22 "列有利于装配的设计指南例如简化产品设计、减少零件数量等,女女并同装配工程师一起合作,被逼简化产品结构,近親使其便于" +
23 "装配,为提高产品质量、缩短产品开发周期和降低产品成本奠定基础";
24 // ------获取敏感词---------
25 Set<String> set = filter.getSensitiveWord(string, 1);
26 System.out.println("含敏感词的个数为:" + set.size() + "。包含:" + set);
27 // ------------------------替换敏感字begin----------------------
28 Iterator<String> iterator = set.iterator();
29 String word = null;
30 while (iterator.hasNext()) {
31 word = iterator.next();
32 /**
33 * 得到word中敏感关键词被替换后的字符串,例如:***
34 * */
35 getReplaceCharsS("*", word.length());
36 /**
37 * 将原字符串中的敏感关键词替换成带有replaceChar
38 * 或全部为replaceChar的关键词
39 * */
40 string = string.replaceAll(word, replaceString);
41 }
42 // ------------------------替换敏感字end----------------------
43 System.out.println(string);
44 }
45
46 /**
47 * 构造函数,初始化敏感词库
48 */
49 public SensitivewordFilter(){
50 sensitiveWordMap = new SensitiveWordInit().initKeyWord();
51 }
52
53 /**
54 * 判断文字是否包含敏感字符
55 * @param matchType 匹配规则 1:最小匹配规则,2:最大匹配规则
56 */
57 public boolean isContaintSensitiveWord(String txt,int matchType){
58 boolean flag = false;
59 for(int i = 0 ; i < txt.length() ; i++){
60 int matchFlag = this.CheckSensitiveWord(txt, i, matchType); //判断是否包含敏感字符
61 if(matchFlag > 0){ //大于0存在,返回true
62 flag = true;
63 }
64 }
65 return flag;
66 }
67
68 /**
69 * 获取文字中的敏感词
70 * @param matchType 匹配规则 1:最小匹配规则,2:最大匹配规则
71 */
72 public Set<String> getSensitiveWord(String txt , int matchType){
73 Set<String> sensitiveWordList = new HashSet<String>();
74
75 for(int i = 0 ; i < txt.length() ; i++){
76 int length = CheckSensitiveWord(txt, i, matchType); //判断是否包含敏感字符
77 if(length > 0){ //存在,加入list中
78 sensitiveWordList.add(txt.substring(i, i+length));
79 i = i + length - 1; //减1的原因,是因为for会自增
80 }
81 }
82
83 return sensitiveWordList;
84 }
85
86 /**
87 * 替换敏感字字符,默认*
88 */
89 public String replaceSensitiveWord(String txt,int matchType,String replaceChar){
90 String resultTxt = txt;
91 Set<String> set = getSensitiveWord(txt, matchType); //获取所有的敏感词
92 Iterator<String> iterator = set.iterator();
93 String word = null;
94 String replaceString = null;
95 while (iterator.hasNext()) {
96 word = iterator.next();
97 replaceString = getReplaceChars(replaceChar, word.length());
98 resultTxt = resultTxt.replaceAll(word, replaceString);
99 }
100
101 return resultTxt;
102 }
103
104 /**
105 * 获取替换字符串
106 */
107 private String getReplaceChars(String replaceChar,int length){
108 String resultReplace = replaceChar;
109 for(int i = 1 ; i < length ; i++){
110 resultReplace += replaceChar;
111 }
112
113 return resultReplace;
114 }
115
116 /**
117 * 获取替换字符串,无返回值
118 */
119 private static void getReplaceCharsS(String replaceChar,int length){
120 replaceString = "";
121 String resultReplace = replaceChar;
122 for(int i = 1 ; i < length ; i++){
123 resultReplace += replaceChar;
124 }
125 replaceString = resultReplace;
126 }
127
128 /**
129 * 检查文字中是否包含敏感字符,检查规则如下:<br>
130 */
131 @SuppressWarnings({ "rawtypes"})
132 public int CheckSensitiveWord(String txt,int beginIndex,int matchType){
133 boolean flag = false; //敏感词结束标识位:用于敏感词只有1位的情况
134 int matchFlag = 0; //匹配标识数默认为0
135 char word = 0;
136 Map nowMap = sensitiveWordMap;
137 for(int i = beginIndex; i < txt.length() ; i++){
138 word = txt.charAt(i);
139 nowMap = (Map) nowMap.get(word); //获取指定key
140 if(nowMap != null){ //存在,则判断是否为最后一个
141 matchFlag++; //找到相应key,匹配标识+1
142 if("1".equals(nowMap.get("isEnd"))){ //如果为最后一个匹配规则,结束循环,返回匹配标识数
143 flag = true; //结束标志位为true
144 if(SensitivewordFilter.minMatchTYpe == matchType){ //最小规则,直接返回,最大规则还需继续查找
145 break;
146 }
147 }
148 }
149 else{ //不存在,直接返回
150 break;
151 }
152 }
153 if(matchFlag < 2 || !flag){ //长度必须大于等于1,为词
154 matchFlag = 0;
155 }
156 return matchFlag;
157 }
158
159 }