zoukankan      html  css  js  c++  java
  • 查看文件编码 + 查看文件扩展名 + 文件编码转换

    参考资料:python中的encode()和decode()函数

    unicode编码转换

    查看文件编码 + 查看文件扩展名

    import os
    import sys
    import codecs
    import chardet
    
    def GetFileEncodingFormat(file):
        fileHandle  = open(file, 'r', errors = 'ignore')
        fileContext = fileHandle.read()
        return chardet.detect(fileContext.encode())["encoding"]
    
    def GetFileExtension(file):
        (filepath, filename) = os.path.split(file)
        (shortname, extension) = os.path.splitext(filename)
        return extension
    
    def CovertFileCodeFormat(file, out_encode):
        try:
            encoding = GetFileEncodingFormat(file)
            extension = GetFileExtension(file)
            if (encoding != out_encode and (extension == '.c' or extension == '.h')):
                fileHandle  = codecs.open(file, 'r', encoding, errors = 'ignore')
                fileContext =  fileHandle.read()
                codecs.open(file, 'w', out_encode,  errors = 'ignore').write(fileContext)
                print ("convert:" + file + " sucess")
        except IOError as err:
            print ("I/O error: {0}".format(err))
            
    
    def ProcessDir(path):
        for root, dirs, files in os.walk(path):
            for file in files:
                filePath = os.path.join(root, file)
                CovertFileCodeFormat(filePath, sys.argv[2])
    
    def main():
        path = sys.argv[1]
        if (os.path.isfile(path)):
            CovertFileCodeFormat(path, sys.argv[2])
        elif (os.path.isdir(path)):
            ProcessDir(path)
        else:
            pass
    

      查看文件编码+扩展名

    filepath = r'C:UsersAdministratorDesktop
    jhcfx_1205zjtpymplan_1204.csv'
    GetFileEncodingFormat(filepath)
    GetFileExtension(filepath)
    
    def ProcessDir(path):
        for root, dirs, files in os.walk(path):
            for file in files:
                filePath = os.path.join(root, file)
                #CovertFileCodeFormat(filePath, sys.argv[2])
                print(GetFileEncodingFormat(filePath))
    
    def main():
        path = sys.argv[1]
        if (os.path.isfile(path)):
            #CovertFileCodeFormat(path, sys.argv[2])
            pass
        elif (os.path.isdir(path)):
            ProcessDir(path)
        else:
            pass
    

      文件编码转换

    def GB18030ToUTF8(path, new_path, chunksize):
        for root, dirs, files in os.walk(path):
            for file in files:
                #if file not in ('zjtpjl_1204.csv'):
                #    continue
                filePath = os.path.join(root, file)
                #print(filePath, '
    ', GetFileEncodingFormat(filepath))
                chunks = pd.read_csv(filePath, chunksize=chunksize, encoding='gb18030', 
                                 engine='python', dtype=str, na_values='')
                filePath = os.path.join(new_path, file)
                flag = 1
                for chunk in chunks:
                    if flag==1:
                        chunk.to_csv(filePath, encoding='utf_8_sig', index=False, header=True)
                        flag = 0
                    else:
                        chunk.to_csv(filePath, encoding='utf_8_sig', mode='a+', index=False, header=False) #
    
    #测试
    chunksize = 1000000
    path = r'C:UsersAdministratorDesktop
    jhcfx_1205'
    GB18030ToUTF8(path, path+'1', chunksize)
    
    #测试转换后文件是否可读
    #filepath = r'C:UsersAdministratorDesktop
    jhcfx_12051zjtpjl_1204.csv'
    #aa = pd.read_csv(filepath, encoding='utf_8_sig', dtype=str)
    

      

    #content = open(filepath).read().decode("gb18030")
    #open("C:\Users\Administrator\Desktop\njhcfx_1205\zjtpymplan_1205.txt","w").write(content.encode("utf8"))



  • 相关阅读:
    懒加载 和 json
    [iOS]用instancetype代替id作返回类型有什么好处?
    (转)Objective-C语法之KVC使用
    UITableView 展示数据
    shopee
    防火墙
    vue项目开发技巧
    文件流
    vant
    node 使用
  • 原文地址:https://www.cnblogs.com/iupoint/p/12054205.html
Copyright © 2011-2022 走看看