具体是这样,某路径下有很多 excel文件,文件名中包含相同关键字的是一类文件,把包含相同关键字的文件合并成一个文件,生成一个新的csv文件
# coding=utf-8 import xlrd import xlwt import datetime import os import time import httplib2 # 具体是这样,某路径下有很多 excel文件,文件名中包含相同关键字的是一类文件,把包含相同关键字的文件合并成一个文件,生成一个新的csv文件 class csvFile: def WriteLog(self, message, fileName): with open(fileName, 'a') as f: f.write(message) def combinExcelToCsv(self,keyword_list,mainExceldir = "D:\work\Excel_txtProcesss\new-微博",csvDir="D:\work\Excel_txtProcesss\new-微博-合并"): # 把文件名中包含上述文件列表的文件合并成一个文件,比如最新微博-new77723-长城.xls和最新微博-new60000-长城.xls 合并成一个文件:长城.csv key_filelist_dict = {} for root, dirs, files in os.walk(mainExceldir): for key in keyword_list: filelist = [] for file in files: strfilename = mainExceldir + "\" + file if file.find(key) > -1: filelist.append(strfilename) if len(filelist) > 0: key_filelist_dict[key] = filelist for key1 in key_filelist_dict.keys(): filename = os.path.join(csvDir, key1 + ".csv") file_list = key_filelist_dict[key1] file_index = 0 for file in file_list: print(file) try: data = xlrd.open_workbook(file) table = data.sheets()[0] nrows = table.nrows ncols = table.ncols # 文件标题 if file_index==0: rowValues = table.row_values(0) headtitle = "" for colIndex in range(0,ncols): headtitle += str(rowValues[colIndex])+"," self.WriteLog(headtitle, filename) file_index+=1 for i in range(1, nrows): rowValues = table.row_values(i) message = "" for colIndex in range(0, ncols): message += str(rowValues[colIndex]).replace(",", ",") + "," print(i) message = message.replace(" ", "") message = message.encode("gbk", "ignore").decode("gbk") self.WriteLog(" "+message, filename) except UnicodeDecodeError as rr: print("error:" + file) print(rr.args) obj = csvFile() keyword_list = ["BOSS直聘", "VIVO", "蒙牛", "宝马", "拼多多", "OPPO", "上汽通用", "小米", "优信", "长城", "58", "百威", "东风", "东鹏", "富连网", "链家", "青岛", "一汽大众", "梅赛德斯-奔驰"] # 存放excel文件路径 mainExceldir="D:\work\Excel_txtProcesss\new-微博" # 要生成的csv文件路径 csvDir="D:\work\Excel_txtProcesss\new-微博-合并" obj.combinExcelToCsv(keyword_list,mainExceldir)