核心用到的是mb_convert_encoding函数,示例代码如下:
<?php header("Content-type: text/html; charset=utf-8"); /** * 获取内容的编码 * @param string $str */ function get_encoding($str = "") { $encodings = array ( 'ASCII', 'UTF-8', 'GBK' ); foreach ( $encodings as $encoding ) { if ($str === mb_convert_encoding ( mb_convert_encoding ( $str, "UTF-32", $encoding ), $encoding, "UTF-32" )) { return $encoding; } } return false; } /** * * 检测utf-8内容是否含有BOM头信息 * @param string $str */ function utf8_has_bom($str) { $chars = substr ( $str, 0, 3 ); $bom = chr ( 0xEF ) . chr ( 0xBB ) . chr ( 0xBF ); return $chars === $bom; } // $filename = 'C:UsersAdministratorDesktop est.txt'; // $filename = 'C:UsersAdministratorDesktopansi.txt'; // $filename = 'C:UsersAdministratorDesktopUnicode.txt'; $filename = 'C:UsersAdministratorDesktopmy.txt'; // 检测编码 $str = file_get_contents($filename); $encode = get_encoding($str); var_dump($encode); // 转换成"UTF-8"编码 if($encode != 'UTF-8'){ $str = mb_convert_encoding($str, "UTF-8", $encode); } var_dump($str);
参考文档:https://gist.github.com/welefen/7746175