zoukankan      html  css  js  c++  java
  • PHP 搜索分词实现代码

    <?php 
    /**
     * @author: xiaojiang 2014-01-08
     * php 建立分词树
     * */
    class Tree{
    
        public $w = '';
        public $subT = array();
        public $isEnd = false;
        
        public function __construct($w= '' , $isEnd = false){
            if(!empty($w)){
                $this->w = $w;
                $this->isEnd = $isEnd;
            }
        }
        public function insert( $str ){
        
            $len = strlen($str);
            if(!$len) return ;
            $scope = $this;
            for( $i = 0; $i< $len; $i++ ){
                //判断汉字
                $cStr = $str[$i];
                if( ord( $cStr ) > 127 ){
                    $cStr = substr($str, $i, 3);
                    $i += 2;
                }
                $scope = $scope->insertNode( $cStr );
            }
            $scope->isEnd = true;
        }
        
        private function &insertNode(  $w ){
            $t = $this->hasTree( $w );
            if( !$t ){
                $t =  new Tree( $w );
                array_push($this->subT, $t );
            }
            return $t;
        }
        
        public function &hasTree($w){
            foreach ($this->subT as $t){
                if($t->w == $w)
                    return $t;
            }
            return false;
        }
    
    }
    
    
    class myStr{
        
        private $str = '';
        private $arr = array();
        private $len = 0;
        public function __construct( $str){
            $this->str = $str;
            $len = strlen($str);
            for ($i = 0; $i < $len; $i++ ){
                $cStr = $str[$i];
                if(ord($cStr) > 127){
                    $cStr = substr($str, $i , 3);
                    $i += 2;
                }
                array_push($this->arr, $cStr);
            }
            $this->len = count($this->arr);
        }
        
        public function getIndex( $idx ){
            return $this->arr[$idx];
        }
        
        public function getLength(){
            return $this->len;
        }
    }
    
    $tIns = new Tree();
    $tIns->insert('中华');
    $tIns->insert('人民');
    $tIns->insert('共和国');
    $tIns->insert('baidu');
    
    $strIns = new myStr("cc中华的人民共和国和中国啊啊www.baidua.com");
    
    for ($i = 0; $i < $strIns->getLength(); $i++ ){
        
        $j = $i;
        $curW = $strIns->getIndex($i);
        $stIns = $tIns->hasTree( $curW );
        if(!$stIns) continue;
        
        $sw = '';
        while ( $stIns ){
            $sw .= $stIns->w; 
            $_isEnd = $stIns->isEnd;
            $stIns = $stIns->hasTree( $strIns->getIndex( ++$j ) );
            if( !$stIns && !$_isEnd)
                $sw = '';
        }
        if($sw)
            echo $sw."<br>";
    }
    
    
    ?>

     输出:

    中华
    人民
    共和国
    baidu
  • 相关阅读:
    Flask框架总结
    spring boot 批量新增
    Visual Studio Code代码格式化Vue文件设置快捷键ctrl+alt+l
    Visual Studio Code安装插件!!!
    JavaScript:改变 HTML 内容
    使用JavaScript脚本在页面上显示输出
    将redis添加到linux系统服务
    虚拟机VMware下CentOS6.6安装教程图文详解
    CentOS-6.5下portal连接mysql失败,报permission denied
    2017-03-22、Linux同步当前服务器时间
  • 原文地址:https://www.cnblogs.com/glory-jzx/p/3509975.html
Copyright © 2011-2022 走看看