今天有个朋友问我,无乱码截取中文的,我找了找以前的代码,现在po出来,自己看看吧
1 function msubstr($str, $encodeType='utf-8',$start = 0, $length = 20, $hasSuffix = false, $suffix = '。。。') { 2 $res['utf-8'] = "/[\x01-\x7f]|[\xc2-\xdf][\x80-\xbf]|[\xe0-\xef][\x80-\xbf]{2}|[\xf0-\xff][\x80-\xbf]{3}/"; 3 $res['gb2312'] = "/[\x01-\x7f]|[\xb0-\xf7][\xa0-\xfe]/"; 4 $res['gbk']= "/[\x01-\x7f]|[\x81-\xfe][\x40-\xfe]/"; 5 $res['big5'] = "/[\x01-\x7f]|[\x81-\xfe]([\x40-\x7e]|\xa1-\xfe])/"; 6 if(!array_key_exists($encodeType, $res)){ 7 return false; 8 } 9 $re=$res[$encodeType]; 10 preg_match_all ( $re, $str, $match ); 11 $slice = join ( "", array_slice ( $match [0], $start, $length ) ); 12 if ($hasSuffix) { 13 return $slice . $suffix; 14 } else { 15 return $slice; 16 } 17 } 18 $str='我们啥呢好的的司法考试多拉风了的司法考试多拉风了的司法考试多拉风了司法考试多拉风了'; 19 $encodeType='utf-3'; 20 echo msubstr($str,$encodeType);