zoukankan html css js c++ java

python 检测文件编码等

参考：http://my.oschina.net/waterbear/blog/149852

chardet模块，能够实现文本编码的检查，

核心代码：

import chardet
chardet.detect(content)['encoding']

实现目录java文件转码：

#-*- coding: utf-8 -*-

import codecs
import os
import shutil
import re
import chardet

def convert_encoding(filename, target_encoding):
    # Backup the origin file.
    shutil.copyfile(filename, filename + '.bak')

    # convert file from the source encoding to target encoding
    content = codecs.open(filename, 'r').read()
    source_encoding = chardet.detect(content)['encoding']
    print source_encoding, filename
    content = content.decode(source_encoding) #.encode(source_encoding)
    codecs.open(filename, 'w', encoding=target_encoding).write(content)

def main():
    for root, dirs, files in os.walk(os.getcwd()):
        for f in files:
            if f.lower().endswith('.java'):
                filename = os.path.join(root, f)
                try:
                    convert_encoding(filename, 'utf-8')
                except Exception, e:
                    print filename

def process_bak_files(action='restore'):
    for root, dirs, files in os.walk(os.getcwd()):
        for f in files:
            if f.lower().endswith('.java.bak'):
                source = os.path.join(root, f)
                target = os.path.join(root, re.sub('.java.bak$', '.java', f, flags=re.IGNORECASE))
                try:
                    if action == 'restore':
                        shutil.move(source, target)
                    elif action == 'clear':
                        os.remove(source)
                except Exception, e:
                    print source

if __name__ == '__main__':
    # process_bak_files(action='clear')
    main()

另，参考：Python 的中文编码处理

http://in355hz.iteye.com/blog/1860787

# 检查标准输出流的编码
print sys.stdout.encoding

# 无论如何，请用 linux 系统的当前字符集输出：
if sys.stdout.encoding is None:
enc = os.environ['LANG'].split('.')[1]
sys.stdout = codecs.getwriter(enc)(sys.stdout) # 替换 sys.stdout

# 使得 sys.getdefaultencoding() 的值为 'utf-8'
reload(sys) # reload 才能调用 setdefaultencoding 方法
sys.setdefaultencoding('utf-8') # 设置 'utf-8'

查看全文

相关阅读:
转载：MyBatis获取插入记录的自增长字段值
 006---抽象类
 005---组合
 004---继承与派生
 003---属性查找和绑定方法
 002---类与对象
 001---面向对象和面向过程的区别
 017---Django的中间件解决跨域
 10---git安装
 007---归并排序

原文地址：https://www.cnblogs.com/sudawei/p/3480362.html