zoukankan      html  css  js  c++  java
  • python3 把excel文件合并并保存到csv文件

      具体是这样,某路径下有很多 excel文件,文件名中包含相同关键字的是一类文件,把包含相同关键字的文件合并成一个文件,生成一个新的csv文件

      

    # coding=utf-8
    import xlrd
    import xlwt
    import datetime
    import os
    import time
    import httplib2
    
    # 具体是这样,某路径下有很多 excel文件,文件名中包含相同关键字的是一类文件,把包含相同关键字的文件合并成一个文件,生成一个新的csv文件
    class csvFile:
        def WriteLog(self, message, fileName):
            with open(fileName, 'a') as f:
                f.write(message)
    
        def combinExcelToCsv(self,keyword_list,mainExceldir = "D:\work\Excel_txtProcesss\new-微博",csvDir="D:\work\Excel_txtProcesss\new-微博-合并"):
    
            # 把文件名中包含上述文件列表的文件合并成一个文件,比如最新微博-new77723-长城.xls和最新微博-new60000-长城.xls 合并成一个文件:长城.csv
            key_filelist_dict = {}
            for root, dirs, files in os.walk(mainExceldir):
                for key in keyword_list:
                    filelist = []
                    for file in files:
                        strfilename = mainExceldir + "\" + file
                        if file.find(key) > -1:
                            filelist.append(strfilename)
                    if len(filelist) > 0:
                        key_filelist_dict[key] = filelist
    
            for key1 in key_filelist_dict.keys():
                filename = os.path.join(csvDir, key1 + ".csv")
                file_list = key_filelist_dict[key1]
                file_index = 0
    
                for file in file_list:
                    print(file)
                    try:
                        data = xlrd.open_workbook(file)
                        table = data.sheets()[0]
                        nrows = table.nrows
                        ncols = table.ncols
    
                        # 文件标题
                        if file_index==0:
                            rowValues = table.row_values(0)
                            headtitle = ""
                            for colIndex  in range(0,ncols):
                                headtitle += str(rowValues[colIndex])+","
                            self.WriteLog(headtitle, filename)
    
                        file_index+=1
                        for i in range(1, nrows):
                            rowValues = table.row_values(i)
                            message = ""
                            for colIndex in range(0, ncols):
                                message += str(rowValues[colIndex]).replace(",", "") + ","
                            print(i)
                            message = message.replace("
    ", "")
                            message = message.encode("gbk", "ignore").decode("gbk")
                            self.WriteLog("
    "+message, filename)
                    except UnicodeDecodeError as rr:
                        print("error:" + file)
                        print(rr.args)
    
    
    obj = csvFile()
    keyword_list = ["BOSS直聘", "VIVO", "蒙牛", "宝马", "拼多多", "OPPO", "上汽通用", "小米", "优信", "长城", "58", "百威", "东风", "东鹏",
                    "富连网", "链家", "青岛", "一汽大众", "梅赛德斯-奔驰"]
    # 存放excel文件路径
    mainExceldir="D:\work\Excel_txtProcesss\new-微博"
    # 要生成的csv文件路径
    csvDir="D:\work\Excel_txtProcesss\new-微博-合并"
    obj.combinExcelToCsv(keyword_list,mainExceldir)
  • 相关阅读:
    HDU 4814 Golden Radio Base
    我对Swift的几点疑问
    【UTR #1】ydc的大树
    jsp中的隐含9对象
    动作元素
    指令元素
    JSP语法
    设计模式六大原则(6):开闭原则
    设计模式六大原则(5):迪米特法则
    设计模式六大原则(4):接口隔离原则
  • 原文地址:https://www.cnblogs.com/shaosks/p/9354960.html
Copyright © 2011-2022 走看看