zoukankan      html  css  js  c++  java
  • 一个用PHP写的中文分词函数

    <?php

    class Segmentation {
    var $options = array('lowercase' => TRUE,
    'segment_english' => FALSE);
    var $dict_name = 'Unknown';
    var $dict_words = array();
    function setLowercase($value) {
    if ($value) {
    $this->options['lowercase'] = TRUE;
    } else {
    $this->options['lowercase'] = FALSE;
    }
    return TRUE;
    }
    function setSegmentEnglish($value) {
    if ($value) {
    $this->options['segment_english'] = TRUE;
    } else {
    $this->options['segment_english'] = FALSE;
    }
    return TRUE;
    }
    function load($dict_file) {
    if (!file_exists($dict_file)) {
    return FALSE;
    }
    $fp = fopen($dict_file, 'r');
    $temp = fgets($fp, 1024);
    if ($temp === FALSE) {
    return FALSE;
    } else {
    if (strpos($temp, "t") !== FALSE) {
    list ($dict_type, $dict_name) = explode("t", trim($temp));
    } else {
    $dict_type = trim($temp);
    $dict_name = 'Unknown';
    }
    $this->dict_name = $dict_name;
    if ($dict_type !== 'DICT_WORD_W') {
    return FALSE;
    }
    }
    while (!feof($fp)) {
    $this->dict_words[rtrim(fgets($fp, 32))] = 1;
    }
    fclose($fp);
    return TRUE;
    }
    function getDictName() {
    return $this->dict_name;
    }
    function segmentString($str) {
    if (count($this->dict_words) === 0) {
    return FALSE;
    }
    $lines = explode("n", $str);
    return $this->_segmentLines($lines);
    }
    function segmentFile($filename) {
    if (count($this->dict_words) === 0) {
    return FALSE;
    }
    $lines = file($filename);
    return $this->_segmentLines($lines);
    }
    function _segmentLines($lines) {
    $contents_segmented = '';
    foreach ($lines as $line) {
    $contents_segmented .= $this->_segmentLine(rtrim($line)) . " n";
    }
    do {
    $contents_segmented = str_replace(' ', ' ', $contents_segmented);
    } while (strpos($contents_segmented, ' ') !== FALSE);
    return $contents_segmented;?>

  • 相关阅读:
    Arcgis silverlight4 Sublayerlist
    U盘不显示盘符
    Error: The spatial references do not match
    如何让你的SQL运行得更快
    Arcgis silverlight3 layerlist
    oracle客户端登陆
    Arcgis silverlight1 地图显示
    通过BAT文件部署windows服务
    在博客园安家了
    java中static作用详解
  • 原文地址:https://www.cnblogs.com/ymj0906/p/3003497.html
Copyright © 2011-2022 走看看