zoukankan      html  css  js  c++  java
  • 聊天系统违禁词过滤

    网络游戏作为一种多人交互游戏,方便玩家在游戏中交流是很重要的。

    为了防止玩家在游戏中说违禁的东西,所有的聊天系统都需要对聊天信息进行过滤。

    本文描述了一种简单的基于DFA的算法用于过滤聊天内容中的违禁词,算法的运行复杂度,遍历

    输入字符串n,最多对每个字符执行一次二分查找lgn,所以最坏情况下也是O(nlgn).

    因为编写这段代码的时间很短,所以代码写得有点丑陋,但已经足以描述其主要思想了:

    #include <stdint.h>
    #include <stdlib.h>
    #include <string.h>
    #include <stdio.h>
    #include "wordfilter.h"
    
    
    struct token{ 
        char   code;       //字符的编码     
        struct token   **children;       //子节点
        uint32_t       children_size;  //子节点的数量
        uint8_t        end;          //是否一个word的结尾
    };
    
    
    typedef struct wordfilter{
        struct token * tokarry[256];
    }*wordfilter_t;
    
    struct token *inserttoken(struct token *tok,char c)     
    {
        struct token *child = calloc(1,sizeof(*child));
        child->code = c;
        if(tok->children_size == 0){
            tok->children = calloc(tok->children_size+1,sizeof(child));
            tok->children[0] = child;
        }else{
            struct token **tmp = calloc(tok->children_size+1,sizeof(*tmp));
            int i = 0;
            int flag = 0;
            for(; i < tok->children_size; ++i){
                if(!flag && tok->children[i]->code > c){
                    tmp[i] = child;
                    flag = 1;
                }else
                    tmp[i] = tok->children[i];
            }
            if(!flag) 
                tmp[tok->children_size] = child;
            else
                tmp[tok->children_size] = tok->children[tok->children_size-1];
            free(tok->children);
            tok->children = tmp;
        }
        tok->children_size++;
        return child;    
    }     
    
    static struct token *getchild(struct token *tok,char c)     
    {   
        
        if(!tok->children_size) return NULL;
        int left = 0;
        int right = tok->children_size - 1;
        for( ; ; )
        {
            if(right - left <= 0)
                return tok->children[left]->code == c ? tok->children[left]:NULL; 
            int index = (right - left)/2 + left;
            if(tok->children[index]->code == c)
                return tok->children[index];
            else if(tok->children[index]->code > c)
                right = index-1;
            else
                left = index+1;
        } 
    }
    
    
    static struct token *addchild(struct token *tok,char c){
        struct token *child = getchild(tok,c);
        if(!child)
            return inserttoken(tok,c);
        return child;
    }
    
    static void NextChar(struct token *tok,const char *str,int i,int *maxmatch)     
    { 
        if(str[i] == 0) return;      
        struct token *childtok = getchild(tok,str[i]);  
        if(childtok)     
        {     
            if(childtok->end)     
                *maxmatch = i + 1;     
            NextChar(childtok,str,i+1,maxmatch);     
        }
        else{
            if(tok->end)
                *maxmatch = i;
        }
    }   
    
    
    static uint8_t processWord(wordfilter_t filter,const char *str,int *pos)     
    {   
        struct token *tok = filter->tokarry[(uint8_t)str[*pos]];
        if(tok == NULL)
        {
            (*pos) += 1;
            return 0;
        }else{
            int maxmatch = 0;     
            NextChar(tok,str,(*pos)+1,&maxmatch);                      
            if(maxmatch == 0)     
            {     
                (*pos) += 1;
                if(tok->end)
                    return 1;
                return 0;     
            }     
            else     
            {     
                (*pos) = maxmatch;     
                return 1;     
            }   
        }
        return 0;
    }
    
    wordfilter_t wordfilter_new(const char **forbidwords){
        wordfilter_t filter = calloc(1,sizeof(*filter));
        int i = 0;
        for(;forbidwords[i] != NULL; ++i){
            const char *str = forbidwords[i];
            int size = strlen(str);
            struct token *tok = filter->tokarry[(uint8_t)str[0]];
            if(!tok){
                tok = calloc(1,sizeof(*tok));
                tok->code = str[0];
                filter->tokarry[(uint8_t)str[0]] = tok;
            } 
            int j = 1;
            for(; j < size;++j)     
                tok = addchild(tok,str[j]);
            tok->end = 1; 
        }
        return filter;
    }     
    
    uint8_t isvaildword(wordfilter_t filter,const char *str)
    {
        uint8_t ret = 1;
        //首先将srt从const char *转换成_char*
        int size = strlen(str);
        int i = 0;
        for(; i < size;)     
        {       
            if(processWord(filter,str,&i)){
                ret = 0;
                break;
            }
        } 
        return ret;
    }
    
    string_t wordfiltrate(wordfilter_t filter,const char *str,char replace){
        int size = strlen(str);
        int i,j;    
        char *tmp = calloc(1,size+1);
        strcpy(tmp,str);
        for(i = 0; i < size;)     
        {     
            int o = i;     
            if(processWord(filter,str,&i)){       
                 j = o;           
                 for(; j < i; ++j) tmp[j] = replace;
            }
        }
        
        string_t ret = new_string(tmp);
        //将连续的replace符号合成1个
        int flag = 0;
        j = 0;
        for(i = 0; i < size; ++i){
            if(tmp[i] == replace){
                if(!flag){
                    flag = 1;
                    ++j;
                }
            }else{
                ((char*)to_cstr(ret))[j++] = tmp[i];
                if(flag) flag = 0;
            }
        }
        free(tmp);
        ((char*)to_cstr(ret))[j] = 0; 
        return ret;
           
    }  
  • 相关阅读:
    React入门实例
    【C语言】一些重要的知识点
    【C语言】字符串模块
    【C语言】指针模块
    贝尔曼福特算法
    dijkstra算法
    拓扑序列
    树和图的广度优先遍历
    树和图的深度优先遍历
    回溯剪枝,dfs,bfs
  • 原文地址:https://www.cnblogs.com/sniperHW/p/2607311.html
Copyright © 2011-2022 走看看