zoukankan      html  css  js  c++  java
  • java 敏感字过滤

      1 import java.util.ArrayList;
      2 import java.util.Arrays;
      3 import java.util.HashMap;
      4 import java.util.Iterator;
      5 import java.util.List;
      6 import java.util.Map;
      7 import java.util.Map.Entry;
      8 /**
      9  * 过滤敏感词,并把敏感词替换成*
     10  * 
     11  */
     12 public class SensitiveWordUtils {
     13 
     14     //敏感词库
     15     static final String keysContent = "@要过滤的字";
     49     static String[] keys = null;
     50 
     51     static ArrayList<String> first = new ArrayList<String>();
     52     static String[] sortFirst;
     53     static char[] charFirst;
     54     static HashMap<String, ArrayList<String>> map = new HashMap<String, ArrayList<String>>();
     55     static HashMap<String, String[]> sortMap = new HashMap<String, String[]>();
     56     static HashMap<String, char[]> charMap = new HashMap<String, char[]>();
     57 
     58     static ArrayList<String> temp;
     59     static String key, value;
     60     int length;
     61 
     62 
     63     /*
     64      * 静态代码块只会被执行一次 用来注册敏感词
     65      */
     66     static {
     67         keys = keysContent.split("@");
     68     }
     69 
     70     /**
     71      * 带参数的构造函数
     72      * 
     73      * @param keys
     74      *            敏感词
     75      * @param tContent
     76      *            需要过滤的内容
     77      */
     78     public SensitiveWordUtils(String tContent) {
     79         for (String k : keys) {
     80             if (!first.contains(k.substring(0, 1))) {
     81                 first.add(k.substring(0, 1));
     82             }
     83             length = k.length();
     84             for (int i = 1; i < length; i++) {
     85                 key = k.substring(0, i);
     86                 value = k.substring(i, i + 1);
     87                 if (i == 1 && !first.contains(key)) {
     88                     first.add(key);
     89                 }
     90 
     91                 // 有,添加
     92                 if (map.containsKey(key)) {
     93                     if (!map.get(key).contains(value)) {
     94                         map.get(key).add(value);
     95                     }
     96                 }
     97                 // 没有添加
     98                 else {
     99                     temp = new ArrayList<String>();
    100                     temp.add(value);
    101                     map.put(key, temp);
    102                 }
    103             }
    104         }
    105         sortFirst = first.toArray(new String[first.size()]);
    106         Arrays.sort(sortFirst); // 排序
    107 
    108         charFirst = new char[first.size()];
    109         for (int i = 0; i < charFirst.length; i++) {
    110             charFirst[i] = first.get(i).charAt(0);
    111         }
    112         Arrays.sort(charFirst); // 排序
    113 
    114         String[] sortValue;
    115         ArrayList<String> v;
    116         Map.Entry<String, ArrayList<String>> entry;
    117         Iterator<Entry<String, ArrayList<String>>> iter = map.entrySet()
    118         .iterator();
    119         while (iter.hasNext()) {
    120             entry = (Map.Entry<String, ArrayList<String>>) iter.next();
    121             v = (ArrayList<String>) entry.getValue();
    122             sortValue = v.toArray(new String[v.size()]);
    123             Arrays.sort(sortValue); // 排序
    124             sortMap.put(entry.getKey(), sortValue);
    125         }
    126 
    127         char[] charValue;
    128         iter = map.entrySet().iterator();
    129         while (iter.hasNext()) {
    130             entry = (Map.Entry<String, ArrayList<String>>) iter.next();
    131             v = (ArrayList<String>) entry.getValue();
    132             charValue = new char[v.size()];
    133             for (int i = 0; i < charValue.length; i++) {
    134                 charValue[i] = v.get(i).charAt(0);
    135             }
    136             Arrays.sort(charValue); // 排序
    137             charMap.put(entry.getKey(), charValue);
    138         }
    139     }
    140     /**
    141      * 把敏感词替换成*
    142      * 
    143      * @param content
    144      *            需要过滤的内容
    145      * @return 过滤完后的符合要求的内容
    146      */
    147     public String replace(String content) {
    148         String r = null, f, c = content;
    149         String replacedword = content;
    150         char g;
    151         char[] temps;
    152         int length = c.length();
    153         for (int i = 0; i < length - 1; i++) {
    154             g = c.charAt(i);
    155             // 二分查找
    156             if (Arrays.binarySearch(charFirst, g) > -1) {
    157                 tag : for (int j = i + 1; j < length; j++) {
    158                     f = c.substring(i, j);
    159                     g = c.charAt(j);
    160                     temps = charMap.get(f);
    161                     if (temps == null) { // 找到了
    162                         //System.out.println("ok");
    163                         r = f;
    164                         String str = "";
    165                         for (int m = 1; m <= r.length(); m++) {
    166                             str = str + "*";
    167                         }
    168                         replacedword = c.replace(r, str);
    169                         c = replacedword;
    170                         break tag;
    171                     }
    172                     // 二分查找
    173                     if (Arrays.binarySearch(temps, g) > -1) {
    174                         if (j == length - 1) {
    175                             // print("find!");
    176                             //System.out.println("find!");
    177                             r = c.substring(i, j + 1);
    178                             String str = "";
    179                             for (int m = 1; m <= r.length(); m++) {
    180                                 str = str + "*";
    181                             }
    182                             replacedword = c.replace(r, str);
    183                             c = replacedword;
    184                             break tag;
    185                         }
    186                     } else { // 没有找到了
    187                         break;
    188                     }
    189                 }
    190             }
    191         }
    192         return replacedword;
    193     }
    194 }
     1 import java.util.ArrayList;
     2 import java.util.List;
     3 
     4 
     5 public class TestMGC {
     6 
     7     public static void main(String[] args) {
     8         //被过滤内容
     9         String contentStr = "法律严禁传播色情";
    10         
    11         SensitiveWordUtils swu = new SensitiveWordUtils(contentStr);
    12         System.out.println(swu.replace(contentStr));
    13     }
    14 }

    原文地址: http://www.oschina.net/code/snippent_813213_14355

  • 相关阅读:
    0--分析JDK及其他开源框架的初衷
    2.2--RandomAccessFile实现类和它的关联类FileChannel
    2.1--RandomAccessFile实现类的关联类FileChannel类图
    2--IO包DataInput接口类图
    1--UML语言IO包组件图
    跟老齐学Python Django实战 5/n Second edition
    跟老齐学Python Django实战 3/n Second edition
    Vim
    跟老齐学Python Django实战 2/n Second edition
    跟老齐学Python Django实战 1/n Second edition
  • 原文地址:https://www.cnblogs.com/yixiwenwen/p/2730686.html
Copyright © 2011-2022 走看看