zoukankan      html  css  js  c++  java
  • PHP切割汉字

    <?php
    /*
    @UTF-8编码的字符可能由1~3个字节组成。
    
    */
    /*--------------------------方法一截取中文字符串方法------------------------------*/
    function msubstr($str, $start, $len)
    {
        $tmpstr = "";
        $strlen = $start + $len;
        for ($i = 0; $i < $strlen; $i++) {
            if (ord(substr($str, $i, 1)) > 0xa0)   //ord()函数返回字符串的第一个字符的ASCII值
            {
                $tmpstr .= substr($str, $i, 2);
                $i++;
            } else {
                $tmpstr .= substr($str, $i, 1);
            }
        }
        return $tmpstr;
    }
    
    
    /*----------------------------第二种方法-----------------------------------*/
    //截取的是UTF-8字符串
    function utf_substr($str, $len)
    {
        $new_str = [];
        for ($i = 0; $i < $len; $i++) {
            $tem_str = substr($str, 0, 1);
            if (ord($tem_str > 127)) {
                $i++;
                if ($i < $len) {
                    $new_str[] = substr($str, 0, 3);
                    $str = substr($str, 3);
                }
            } else {
                $new_str[] = substr($str, 0, 1);
                $str = substr($str, 1);
            }
        }
        return join($new_str);//join()函数把数组元素组合为一个字符串
    }
    
    
    /*-------------------------------------第三种方法(UTF-8)--------------------------------*/
    function cutstr($string, $length)
    {
        preg_match_all("/[x01-x7f]|[xc2-xdf]|[x80-xbf]|xe0[xa0-xbf][x80-xbf]|[xe1-xef][x80-xbf][x80-xbf]|xf0[x90-xbf][x80-xbf][x80-xbf]|[xf1-xf7][x80-xbf][x80-xbf][x80-xbf]/", $string, $info);
        $wordscut = "";
        $j = 0;
        for ($i = 0; $i < count($info[0]); $i++) {
            $wordscut .= $info[0][$i];
            $j = ord($info[0][$i]) > 127 ? $j + 2 : $j + 1;
            if ($j > $length - 3) {
                return $wordscut . "...";
            }
        }
        return join('', $info[0]);
    }
    
    $string = "312哈哈,这个组合很难切割哦";
    echo cutstr($string, 10);
    
    
    /*---------------------------------下面是曾经用过的截取第三个的字符串的------------------------------*/
    // $name1 = mysql_result($my_rst,0,"name");
    // $name = preg_match("/([1-9][0-9]+)/",$name1,$r);
    // $name = $r[0];
    // if($name == ""){
    // $name=preg_replace('#^(?:[x00-x7F]|[xC0-xFF][x80-xBF]+){0,2}'.
    // '((?:[x00-x7F]|[xC0-xFF][x80-xBF]+){0,1}).*#s',
    // '$1',$name1);
    // }
    
    /*--------------------------------------------第四种方法(UTF-8)---------------------------------------------*/
    function cut_str($sourcestr, $cutlength)
    {
        $returnstr = '';
        $i = 0;
        $n = 0;
        $str_length = strlen($sourcestr);//字符串的字节数
        while ($n < $cutlength && $i <= $str_length) {
            $temp_str = substr($sourcestr, $i, 1);
            $ascnum = ord($temp_str);//得到字符串中第$i位字符的ASCII码
            if ($ascnum >= 224) {
                $returnstr = $returnstr . substr($sourcestr, $i, 3);//根据UTF-8编码规范,将3个连续的字符计为单个字符
                $i = $i + 3;//实际Byte记为3
                $n++;//字串长度为1
            } elseif ($ascnum >= 192)//如果ASCII位高于192
            {
                $returnstr = $returnstr . substr($sourcestr, $i, 2);//根据UTF-8编码规范,将2个连续的字符记为单个字符
                $i = $i + 2;//实际Byte记为2
                $n++;//字串长度为1
            } elseif ($ascnum >= 65 && $ascnum <= 90)//如果是大写字母
            {
                $returnstr = $returnstr . substr($sourcestr, $i, 1);
                $i = $i + 1;//byte记为1
                $n++;//但考虑到整体美观,大写字母计成一个高位字符
            } else {
                $returnstr = $returnstr . substr($sourcestr, $i, 1);
                $i = $i + 1;//实际的Byte记为1
                $n = $n + 0.5;//小写字母和半角标点等与半个高位字符宽...
            }
        }
        if ($str_length > $cutlength) {
            $returnstr = $returnstr . "...";//超过长度时在尾处加上省略号
        }
        return $returnstr;
    }
    
    
    /*--------------------第五种方法(UTF-8)---------------------------------------------*/
    
    function FSubstr($title, $start, $len = "", $magic = true)
    {
        if ($len == "") $len = strlen($title);
    
        if ($start != 0) {
            $startv = ord(substr($title, $start, 1));
            if ($startv >= 128) {
                if ($startv < 192) {
                    for ($i = $start - 1; $i > 0; $i--) {
                        $tempv = ord(substr($title, $i, 1));
                        if ($tempv >= 192) break;
                    }
                    $start = $i;
                }
            }
        }
    
        if (strlen($title) <= $len) return substr($title, $start, $len);
    
        $alen = 0;
        $blen = 0;
        $realnum = 0;
        $length = 0;
        for ($i = $start; $i < strlen($title); $i++) {
            $ctype = 0;
            $cstep = 0;
    
            $cur = substr($title, $i, 1);
            if ($cur == "&") {
                if (substr($title, $i, 4) == "&lt;") {
                    $cstep = 4;
                    $length += 4;
                    $i += 3;
                    $realnum++;
                    if ($magic) {
                        $alen++;
                    }
                } elseif (substr($title, $i, 4) == "&gt;") {
                    $cstep = 4;
                    $length += 4;
                    $i += 3;
                    $realnum++;
                    if ($magic) {
                        $alen++;
                    }
                } elseif (substr($title, $i, 5) == "&amp;") {
                    $cstep = 5;
                    $length += 5;
                    $i += 4;
                    $realnum++;
                    if ($magic) {
                        $alen++;
                    }
                } elseif (substr($title, $i, 6) == "&quot;") {
                    $cstep = 6;
                    $length += 6;
                    $i += 5;
                    $realnum++;
                    if ($magic) {
                        $alen++;
                    }
                } elseif (preg_match("/&#(d+);?/i", substr($title, $i), $match)) {
                    $cstep = strlen($match[0]);
                    $length += strlen($match[0]);
                    $i += strlen($match[0]) - 1;
                    $realnum++;
                    if ($magic) {
                        $blen++;
                        $ctype = 1;
                    }
                }
            } else {
                if (ord($cur) >= 252) {
                    $cstep = 6;
                    $length += 6;
                    $i += 5;
                    $realnum++;
                    if ($magic) {
                        $blen++;
                        $ctype = 1;
                    }
                } elseif (ord($cur) >= 248) {
                    $cstep = 5;
                    $length += 5;
                    $i += 4;
                    $realnum++;
                    if ($magic) {
                        $ctype = 1;
                        $blen++;
                    }
                } elseif (ord($cur) >= 240) {
                    $cstep = 4;
                    $length += 4;
                    $i += 3;
                    $realnum++;
                    if ($magic) {
                        $blen++;
                        $ctype = 1;
                    }
                } elseif (ord($cur) >= 224) {
                    $cstep = 3;
                    $length += 3;
                    $i += 2;
                    $realnum++;
                    if ($magic) {
                        $ctype = 1;
                        $blen++;
                    }
                } elseif (ord($cur) >= 192) {
                    $ctype = 2;
                    $length += 2;
                    $i += 1;
                    $realnum++;
                    if ($magic) {
                        $blen++;
                        $ctype = 1;
                    }
                } elseif (ord($cur) >= 128) {
                    $length += 1;
                } else {
                    $cstep = 1;
                    $length += 1;
                    $realnum++;
                    if ($magic) {
                        if (ord($cur) >= 65 && ord($cur) <= 90) {
                            $blen++;
                        } else {
                            $alen++;
                        }
                    }
                }
            }
            if ($magic) {
                if (($blen * 2 + $alen) == ($len * 2)) break;
                if (($blen * 2 + $alen) == ($len * 2) + 1) {
                    if ($ctype == 1) {
                        $length -= $cstep;
                        break;
                    } else {
                        break;
                    }
                }
            } else {
                if ($realnum == $len) break;
            }
        }
        unset($cur);
        unset($alen);
        unset($blen);
        unset($realnum);
        unset($ctype);
        unset($cstep);
    
        return substr($title, $start, $length);
    }
    function utf8Substr($str, $from, $len)
    {
        return preg_replace('#^(?:[x00-x7F]|[xC0-xFF][x80-xBF]+){0,' . $from . '}' .
            '((?:[x00-x7F]|[xC0-xFF][x80-xBF]+){0,' . $len . '}).*#s',
            '$1', $str);
    }
    
    $title = "你哈珀niad1纳斯达wop asdni你爱谁都没阿斯顿撒旦12ccs- sd";
    
    $title = utf8Substr($title, 0, 15);
    echo $title;
    
    
    ?>
  • 相关阅读:
    博客
    Windows Live Writer
    VirtualBox
    Linux dd
    Nginx与tomcat组合的简单使用
    压力测试之badboy和Jmeter的简单使用方法
    WebGIS中基于控制点库进行SHP数据坐标转换的一种查询优化策略
    浅谈利用SQLite存储离散瓦片的思路和实现方法
    常见ArcGIS操作(以10.0为例)
    (二十一)WebGIS中鹰眼的实现思路
  • 原文地址:https://www.cnblogs.com/isuben/p/8126628.html
Copyright © 2011-2022 走看看