zoukankan      html  css  js  c++  java
  • C# 添加敏感词

           public class CheckStreamReader
    	{
            //使用的数据:
            private static HashSet<string> hash = new HashSet<string>();
            private byte[] fastCheck = new byte[char.MaxValue];
            private BitArray charCheck = new BitArray(char.MaxValue);
            private int maxWordLength = 0;
            private int minWordLength = int.MaxValue;
            private static string[] badwords = { };
    
    
            public CheckStreamReader()
            {
                if (hash == null || hash.Count <= 0)
                {
                    //添加敏感词
                    string path = HttpContext.Current.Server.MapPath("~/config") + "/StreamReader.txt";
                    StreamReader sr = new StreamReader(path, Encoding.GetEncoding("utf-8"));
                    string strText = sr.ReadToEnd();
                    badwords = strText.Split('|');
                    InitializationText();
                }
            }
    
    
            //初始化数据的代码:将敏感词加入的hash表中
            private void InitializationText()
            {
                foreach (string word in badwords)
                {
                    maxWordLength = Math.Max(maxWordLength, word.Length);
                    minWordLength = Math.Min(minWordLength, word.Length);
    
                    for (int i = 0; i < 7 && i < word.Length; i++)
                    {
                        fastCheck[word[i]] |= (byte)(1 << i);
                    }
    
                    for (int i = 7; i < word.Length; i++)
                    {
                        fastCheck[word[i]] |= 0x80;
                    }
    
                    if (word.Length == 1)
                    {
                        charCheck[word[0]] = true;
                    }
                    else
                    {
                        hash.Add(word);
                    }
                }
            }
    
            //判断是否包含脏字的代码:
            public bool HasBadWord(string text)
            {
                if (hash == null || hash.Count<=0)
                {
                    string path = HttpContext.Current.Server.MapPath("~/" + ConfigurationManager.AppSettings["ConfigPath"]) + "/StreamReader.txt";
                    StreamReader sr = new StreamReader(path, Encoding.GetEncoding("utf-8"));
                    string strText = sr.ReadToEnd();
                    badwords = strText.Split('|');
                    InitializationText();
                }
    
                int index = 0;
    
                while (index < text.Length)
                {
                    if ((fastCheck[text[index]] & 1) == 0)
                    {
                        while (index < text.Length - 1 && (fastCheck[text[++index]] & 1) == 0) ;
                    }
    
                    if (minWordLength == 1 && charCheck[text[index]])
                    {
                        return true;
                    }
    
                    for (int j = 1; j <= Math.Min(maxWordLength, text.Length - index - 1); j++)
                    {
                        if ((fastCheck[text[index + j]] & (1 << Math.Min(j, 7))) == 0)
                        {
                            break;
                        }
    
                        if (j + 1 >= minWordLength)
                        {
                            string sub = text.Substring(index, j + 1);
    
                            if (hash.Contains(sub))
                            {
                                return true;
                            }
                        }
                    }
    
                    index++;
                }
    
                return false;
            }
    	}
    

      

  • 相关阅读:
    BZOJ 4805: 欧拉函数求和 杜教筛
    BZOJ 2694: Lcm 莫比乌斯反演 + 积性函数 + 线性筛 + 卡常
    BZOJ 2693: jzptab 莫比乌斯反演 + 积性函数 + 筛法
    BZOJ 2154: Crash的数字表格 莫比乌斯反演
    BZOJ 3884: 上帝与集合的正确用法 扩展欧拉定理 + 快速幂
    BZOJ 3595: [Scoi2014]方伯伯的Oj Splay + 动态裂点 + 卡常
    GitHub的使用
    Selenium模块的安装
    关于__new__和__init__
    博客一键保存本地exe可视化界面文件
  • 原文地址:https://www.cnblogs.com/Aamir-Ye/p/4571630.html
Copyright © 2011-2022 走看看