zoukankan      html  css  js  c++  java
  • PHP违禁词敏感词 全站文件扫描

    全站违禁词扫描下载地址

    https://files.cnblogs.com/files/kingchou/%E8%BF%9D%E7%A6%81%E8%AF%8D%E7%B1%BB%E6%96%87%E4%BB%B6.rar

    全站扫描违禁词代码

           require_once ROOT_PATH."/Banned.php";   
           $banned=new Banned();
           $banned->write_html=true;
           $banned->write_log=true;
           $banned->check_type="file";
    
           $banned->checkFileAll();
    

      

    类文件

    <?php
    date_default_timezone_set('Asia/Shanghai');
    /*error_reporting(E_WARNING);*/
    /**
     * 违禁词:
     * Created by PhpStorm.
     * Author: zhouzj -周宗君
     * Date: 2017/11/17 15:52
     */
    
    class Banned
    {
        private $data;//数据
        private $match_banned;//违禁词规则
        private $match_mingan;//敏感词规则
        private $match_field;//字符类型规则
        private $finish_path=array();//已完成的路径
        private $finish_table=array();//已完成的表
        private $logtime;
        private  $path;
        private  $check_sub_dir;
        private  $check_type;
        private  $banned_from;
        private  $clean;
        private  $write_html;
        private  $write_log;
        private  $fornum;
        private  $bannedword;
        public   $document_root;
        public $_config=array(
            "path"=>'',
            "check_sub_dir"=>array('web'),
            'check_type'=>'file',
            'banned_from'=>'table',
            'clean'=>false,
            'fornum'=>20,
            'write_html'=>false,
            'write_log'=>false,
            'bannedword'=>true,);
    
    
        /**
         *初始化方法
         */
        public function __construct($config=array())
        {
    
            if(!empty($config)) {
                $this->_config = array_merge($this->_config, $config);
            }
            $this->BannedInit();
        }
    
        /**
         * 设置key
         * @param $key
         * @param $value
         */
        public function __set($key, $value)
        {
            $this->_config[$key] = $value;
        }
    
        /**
         * 读取key
         * @param $key
         * @return mixed
         */
        public function __get($key)
        {
            return $this->_config[$key];
        }
    
        /**
         * 初始化违禁词和敏感词
         */
        private function BannedInit(){
            $this->logtime=date("YmdHis");
            $this->document_root=$_SERVER['DOCUMENT_ROOT'];
    
             //初始化加载违禁词 否则默认读取文件中的违禁词
          
    		//敏感词正则规则
    		$mingan_words= $this->getMinganWords();
    		$this->match_mingan=$this->generateRegularExpression($mingan_words);
    
    		//违禁词正则规则
    		$banned_words= $this->getBannedWords();
    		$this->match_banned=$this->generateRegularExpression($banned_words);
       
        }
    
        /**
         * 检测所有内容
         */
        public function checkAll(){
              //判断类型是数据库
             if($this->_config['check_type']=="file"){
               return  $this->checkFileAll();//检测文件
             }
        }
    
        
    
        /**
         * 检查文件
         */
        public function checkFileAll(){
            $path=$this->_config['path'];
            if(!$path){
                $path=$this->document_root."";
            }
    
            $laststr= substr($path, -1);
            if($laststr!="/"){
                $path.="/";
            }
            $dir_list=  $this->getDir($path);
            //判断是否包含子目录
            if(count($dir_list)>0){
                foreach ($dir_list as $dirkey=>$dirvalue){
                    if(empty($dirvalue)){
                        continue;
                    }
                    //判断是否是允许检测的子目录
                  //  if(in_array($dirvalue,$this->_config['check_sub_dir'])){
                        $subpath=$path.$dirvalue.'/';
    
                        $this->check_sub_dir($subpath);
                 //   }
                }
            }
            return true;
        }
    
        
        /**
         * 检测单条内容
         */
        public function check($content){
          
            if($this->_config['check_type']=="file"){
                return  $this->check_file($content);//检测文件
            }
        }
    
        
        /**
         * 检测单条文本内容
         * @param $content
         */
        public function check_text($content){
            $res_mingan=array();$res_banned=array();
            $this->__check_content($content,$res_mingan,$res_banned);
            $data['mingan']=$res_mingan;
            $data['banned']=$res_banned;
            return $data;
        }
    
        /**
         * 检测单条文件内容
         * @param $content
         */
        public function check_file($content){
           $res=  $this->__check_file($content);
           $this->set_finish_path($content);
           return $res;
        }
    
    
        /**
         * 迭代检查子文件目录
         * @param $path
         */
        function check_sub_dir($path){
            $file_list=  $this->getFile($path);//获取文件目录
            if(count($file_list)>0){
                foreach ($file_list as $filekey=>$filevalue){
                    if(empty($filevalue))
                        continue;
                    $this->__check_file($filevalue);//执行检查文件
                }
            }
            $dir_list=  $this->getDir($path);//获取文件夹目录
            if(count($dir_list)>0){
                foreach ($dir_list as $dirkey=>$dirvalue){
                    if(empty($dirvalue)){
                        continue;
                    }
                    $subpath=$path.$dirvalue.'/';
                    $this->check_sub_dir($subpath);
                }
            }
        }
    
        /**
         * 验证单个文件
         * @param $filepath
         */
        function __check_file($filepath){
            //判断文件是否已经完成检查,如果已经完成则不需要检查
            if(!$filepath) return;
    
            if(in_array($filepath,$this->finish_path))
                return;
    
            if(!file_exists($filepath)) return;
    
            if(stripos($filepath,"mingan_words.txt")>0 || stripos($filepath,"banned_words.txt")>0 ){
                return ;
            }
            //判断文件如果是 违禁词或敏感词文件则跳过不处理
            $content =  file_get_contents($filepath);
            $res_mingan=array();$res_banned=array();
            $this->__check_content($content,$res_mingan,$res_banned);
            $data=array();
            if($res_mingan || $res_banned){//如果有敏感词或违禁词则写日志
    
                $this->write_log($filepath,$res_mingan,$res_banned);
                $this->write_html($filepath,$res_mingan,$res_banned);
                $data['mingan']=$res_mingan;
                $data['banned']=$res_banned;
            }
            //执行保存文件路径
            return $data;
    
        }
    
        /**
         * 检查内容
         * @param $content
         * @param $res_mingan
         * @param $res_banned
         */
        private function __check_content($content,&$res_mingan,&$res_banned){
            //检查敏感词
            $res_mingan=$this->check_words($this->match_mingan,$content);
    
            //检查违禁词
            $res_banned=$this->check_words($this->match_banned,$content);
        }
    
        /**
         * 检查敏感词
         * @param $banned
         * @param $string
         * @return bool|string
         */
        private function check_words($banned,$string)
        {    $match_banned=array();
            //循环查出所有敏感词
    
            $new_banned=strtolower($banned);
            $i=0;
            do{
                $matches=null;
                if (!empty($new_banned) && preg_match($new_banned, $string, $matches)) {
                    $isempyt=empty($matches[0]);
                    if(!$isempyt){
                        $match_banned = array_merge($match_banned, $matches);
                        $matches_str=strtolower($this->generateRegularExpressionString($matches[0]));
                        $new_banned=str_replace("|".$matches_str."|","|",$new_banned);
                        $new_banned=str_replace("/".$matches_str."|","/",$new_banned);
                        $new_banned=str_replace("|".$matches_str."/","/",$new_banned);
                    }
                }
                $i++;
                if($i>$this->_config['fornum']){
                    $isempyt=true;
                    break;
                }
            }while(count($matches)>0 && !$isempyt);
    
            //查出敏感词
            if($match_banned){
                return $match_banned;
            }
            //没有查出敏感词
            return array();
        }
        /**
         * @describe 生成正则表达式
         * @param array $words
         * @return string
         */
        private function generateRegularExpression($words)
        {
            $regular = implode('|', array_map('preg_quote', $words));
            return "/$regular/i";
        }
        /**
         * @describe 生成正则表达式
         * @param array $words
         * @return string
         */
        private function generateRegularExpressionString($string){
              $str_arr[0]=$string;
              $str_new_arr=  array_map('preg_quote', $str_arr);
              return $str_new_arr[0];
        }
    
       
        /**
         * 写日志
         * @param $path
         * @param $content
         */
       private function write_log($location,$contentarr,$weijinciarr){
            if($this->_config['write_log']) {
                if (!$contentarr && !$weijinciarr) {
                    return;
                }
                $content = $location;
                if (count($contentarr) > 0) {
                    $content .= "," . count($contentarr) . "," . implode('|', $contentarr);
                } else {
                    $content .= ",,";
                }
                if (count($weijinciarr) > 0) {
                    $content .= "," . count($weijinciarr) . "," . implode('|', $weijinciarr);
                } else {
                    $content .= ",,";
                }
                $content .= "
    ";
                $filename =$this->document_root."/logs/file" . $this->logtime . "/file_bannwords.csv";
                /* 文件日志路径 */
            //    $file = './' . $filename;
                $file = $filename;
                if (!file_exists($file)) {
                    $pathdir = dirname($file);
                    if (!is_dir($pathdir)) {
                        mkdir($pathdir, 0775, true);
                    }
                    $content_title = "位置,敏感词数量,敏感词,违禁词数量,违禁词" . "
    ";
                    error_log(iconv('UTF-8', 'GB2312', $content_title), 3, $file);
                }
                error_log(iconv('UTF-8', 'GB2312', $content), 3, $file);
            }
        }
    
        /**
         * 打印到页面上
         * @param $filepath
         * @param $res_mingan
         * @param $res_banned
         */
       private function write_html($location,$res_mingan,$res_banned){
            if($this->_config['write_html']){
                print_r(iconv('GB2312','UTF-8',$location));
                if($res_mingan){
                    print_r("  <font color='red'>敏感词(".count($res_mingan)."):</font>".implode('|',$res_mingan));
                }
                if($res_banned){
                    print_r("  <font color='red'>违禁词(".count($res_banned)."):</font>".implode('|',$res_banned));
                }
                echo "<br>";
            }
        }
        /**
         * 保存已完成文件
         * @param $path
         */
       private function set_finish_path($path){
            if(!$path){
                return;
            }
            $content =$path. "
    ";
           $filename=$this->document_root."/logs/file" . $this->logtime . "/banned_finish_path.txt";
            /* 文件日志路径 */
           // $file ='./' . $filename;
           $file = $filename;
            if (!file_exists($file)) {
                mkdir(dirname($file), 0775, true);
            }
            error_log(iconv('GB2312','UTF-8',$content), 3, $file);
        }
    
    
        //重置已完成文件
    /*    function clean_finish_file(){
            $filename=$this->document_root."/logs/banned_finish_path.txt";
            file_put_contents($filename,'');
        }
    */
    
    
        //获取文件目录列表,该方法返回数组
        private  function getDir($dir) {
            $dirArray[]=NULL;
            if (is_dir($dir)) {
                try{
                    if (false != ($handle = @opendir($dir))) {
                        $i = 0;
                        while (false !== ($file = @readdir($handle))) {
                            //去掉"“.”、“..”以及带“.xxx”后缀的文件
                            if ($file != "." && $file != ".." && !strpos("*" . $file, ".")) {
                                $dirArray[$i] = $file;
                                $i++;
                            }
                        }
                        //关闭句柄
                        @closedir($handle);
                    }
                }catch (Exception $ex){
    
                }
            }
            return $dirArray;
        }
    
        //获取文件列表
        private  function getFile($dir) {
            $fileArray[]=NULL;
            if (false != ($handle = @opendir ( $dir ))) {
                $i=0;
                while ( false !== ($file = @readdir ( $handle )) ) {
                    //去掉"“.”、“..”以及带“.xxx”后缀的文件
                    if ($file != "." && $file != ".." && (strpos($file,".php") || strpos($file,".html"))) {
                        $fileArray[$i]=$dir.$file;
                        if($i==1000){//当同一个文件下超出1000个文件则跳出循环
                            break;
                        }
                        $i++;
                    }
                }
                //关闭句柄
                @closedir ( $handle );
            }
            return $fileArray;
        }
        //获取敏感词文件
        private   function getMinganWords(){
    
            $shehuangwords=file_get_contents($this->document_root."/words/forbidden.txt");
           // $shehuangwords=iconv("GBK","UTF-8",$shehuangwords);
            $shehuangword_arr=explode("
    ",$shehuangwords);
            return $shehuangword_arr;
        }
        //获取违禁词文件
        private  function getBannedWords(){
            if($this->_config['bannedword']){
                $guanggaowords=file_get_contents($this->document_root."/words/banned.txt");
               // $guanggaowords=iconv("GBK","UTF-8",$guanggaowords);
                $guanggaowords_arr=explode("
    ",$guanggaowords);
                return $guanggaowords_arr;
            }else{
                return array();
            }
        }
    
    }
    

      

  • 相关阅读:
    86. Partition List
    2. Add Two Numbers
    55. Jump Game
    70. Climbing Stairs
    53. Maximum Subarray
    64. Minimum Path Sum
    122. Best Time to Buy and Sell Stock II
    以场景为中心的产品设计方法
    那些产品经理犯过最大的错
    Axure教程:如何使用动态面板?动态面板功能详解
  • 原文地址:https://www.cnblogs.com/kingchou/p/7943304.html
Copyright © 2011-2022 走看看