# coding=utf-8 import os import chardet import codecs # 批量转换文件夹中的index.shtml为utf-8编码 def run(): # 第一步,读取所有的子文件夹,形成地址列表 all_child_dir = get_all_child_dir("D:\2") # 第二步,判断路径下的index.shtml是否存在,如果存在加入新的index.shtml列表 index_shtml = get_all_index_shtml(all_child_dir) # 第三步 自动判断index.shtml文件的编码,如果为gb2312,加入新列表 gb2312_list = get_all_gb2312(index_shtml) # 第四步 转换gb2312的文件列表为utf-8 convert_to_utf8(gb2312_list) def get_all_child_dir(path): dir_list = [] # 判断路径是否存在 if (os.path.exists(path)): print("该母路径存在") # 获取该目录下的所有文件或文件夹目录 files = os.listdir(path) for file in files: # 得到该文件夹下所有子目录的路径 m = os.path.join(path, file) # 判断是否为文件夹 if (os.path.isdir(m)): dir_list.append(m) print("所有列表如下") print(dir_list) return dir_list def get_all_index_shtml(all_child_dir): index_shtml = [] for i in all_child_dir: i = i + "index.shtml" # 判断文件是否存在 if os.path.exists(i): index_shtml.append(i) print("index.shtml列表如下") print(index_shtml) return index_shtml def get_all_gb2312(index_shtml): gb2312_list = [] for i in index_shtml: # with open(i, 'rb') as f: # if chardet.detect(f.read())['encoding'] == "GB2312": # gb2312_list.append(f) f = open(i, "rb") data = f.read() print(chardet.detect(data)["encoding"]) # 如果文件为Gb2312加入新列表 if (chardet.detect(data)["encoding"] == "GB2312"): gb2312_list.append(i) print("GB2312列表如下") print(gb2312_list) return gb2312_list def convert_to_utf8(gb2312_list): to_coding_type = "utf-8" from_coding_type = "ansi" jishuqi = 0 for i in gb2312_list: try: f = codecs.open(i, "rb", from_coding_type) new_content = f.read() codecs.open(i, "wb", to_coding_type).write(new_content) jishuqi += 1 except IOError as err: print("IO ERROR:".format(err)) print("本次转换%d个文件" % jishuqi) if __name__ == '__main__': run()