Python 编码格式检测,可以使用 chardet ,
例如:
import urllib rawdata = urllib.urlopen('http://www.google.cn/').read() import chardet print chardet.detect(rawdata) 输出结果是: {'confidence': 0.98999999999999999, 'encoding': 'GB2312'}
# win下命令行参数为gbk编码:star.gbk2unicode(sys.argv[1]) + u'也有' def gbk2unicode(s): return s.decode('gbk', 'ignore') # 脚本文件#coding:utf-8时默认不带u的字符串为utf8字符串:star.utf82unicode('我') def utf82unicode(s): return s.decode('utf-8', 'ignore') # 带u的字符串为unicode # star.unicode2gbk(u'u4e5fu6709') # star.unicode2gbk(u'也有') def unicode2gbk(s): return s.encode('gbk') # 带u的字符串为unicode # star.unicode2utf8(u'u4e5fu6709') # star.unicode2utf8(u'也有') def unicode2utf8(s): return s.encode('utf-8') # win下命令行参数为gbk编码:star.gbk2utf8(sys.argv[1]) + '也有' def gbk2utf8(s): return s.decode('gbk', 'ignore').encode('utf-8') def utf82gbk(s): return s.decode('utf-8', 'ignore').encode('gbk')