首先贴出四种方法适用范围比较:
注释:Excel 2003 即XLS文件有大小限制即65536行256列,所以不支持大文件,而Excel 2007以上即XLSX文件的限制则为1048576行16384列
着重介绍openpyxl 操作excel
#!/usr/bin/env python # -*- coding: utf-8 -*- import openpyxl import os class Excel_Operate(): """ openpyxl 只支持xlsx 优点能够读写大文件 """ wb = openpyxl.Workbook() @classmethod def create(cls, ex_path, name, sheet_name=None, sheet_names=[]): """ :param ex_path:保存路径 :param name: excel名称 :param sheet_name: 默认sheet名称 :param sheet_names:其它sheet名称 :return: """ if not os.path.exists(ex_path): os.makedirs(ex_path) ex_path_name = os.path.join(ex_path, name) if os.path.exists(ex_path_name): return True ws1 = cls.wb.active # 默认表sheet1 if sheet_name: ws1.title = sheet_name for name in sheet_names: # 定义其它sheet页 cls.wb.create_sheet(str(name)) cls.wb.save(ex_path_name) @classmethod def add_to_data(cls, ex_path_name, data, sheetname): """ :param ex_path_name: :param data: [[1, 2, 3], [4, 5, 6]] :return: """ if not os.path.exists(ex_path_name): return {"status": False, "message": "文件不存在"} wb = openpyxl.load_workbook(ex_path_name) # sheetnames = wb.sheetnames # [u'Sheet'] 获取所有sheet页list ws = wb[sheetname] # 选取第一个sheet页 for x in data: ws.append(x) wb.save(ex_path_name) @classmethod def read_data(cls, ex_path_name, is_col, is_row=True): """ :param ex_path_name: :param is_col: 以列形式返回 :param is_row: 以行形式返回 :return: """ wb = openpyxl.load_workbook(ex_path_name) # 获取全部表名 sheetnames = wb.sheetnames ws = wb[sheetnames[0]] # 表总行数max_row max_row = ws.max_row # 表总列数 max_col = ws.max_column row_data = [] # 行数据 col_data = [] # 列数据 if is_row: for i in range(1, max_row + 1): t_data = [] for x in range(1, max_col + 1): # 获取表中x行1列的值 cell_data = ws.cell(column=x, row=i).value t_data.append(cell_data) row_data.append(t_data) return row_data if is_col: for i in range(1, max_col + 1): t_data = [] for x in range(1, max_row + 1): # 获取表中x行1列的值 cell_data = ws.cell(row=x, column=i).value t_data.append(cell_data) col_data.append(t_data) return col_data return [] if __name__ == "__main__": Excel_Operate.create("/tmp/abcd/202108", "数据导出.xlsx", sheet_name=u"物流数据", sheet_names=[1, 2, 3]) Excel_Operate.add_to_data("/tmp/abcd/202108/数据导出.xlsx", data=[[1, 2, 3], [4, 5, 6]], sheetname=u"物流数据") # new_data = Excel_Operate.read_data("/tmp/excel/6_new.xlsx", is_col=True, is_row=False) # # old_data = Excel_Operate.read_data("/tmp/excel/6_old.xlsx", is_col=True, is_row=False) # print(len(new_data[0]), len(old_data[0])) # # new_cha_data = set(new_data[0]) - set(old_data[0]) # print(new_cha_data) # # old_cha_data = set(old_data[0]) - set(new_data[0]) # print(old_cha_data)