关于excel,大部分操作应该主要用于数据分析,其中,最主流的方式应该是采用pandas库,这里主要介绍几种方式来读写excel
读取excel数据
处理一般的excel,尤其没有合并单元格之类的情况时,我们可以采用普通的方法
这里采用xlrd库
-
无单元格合并的情况下
import xlrd def parse_list(data): """ parse a list :param data: list :return: """ data_list = [] for i in range(1, len(data)): data_dict = {} for index, key in enumerate(data[0]): data_dict[key] = data[i][index] data_list.append(data_dict) return data_list def read_excel(filename, sheet=None): """ read excel and parse every line :param name: file of absolute path :return: some dict which is about everyline of list """ workbook = xlrd.open_workbook(filename) total_data = [] if sheet: sh = workbook.sheet_by_name(sheet) for row in range(0, sh.nrows): row_list = sh.row_values(row) total_data.append(row_list) return parse_list(total_data)
上述方法主要针对特定的sheet,当然,根据上述方法也也可进行改变,读取所有的sheet
关键方法在于read_excel,所以我们来改写该方法
import xlrd def read_excel(filename): """ read excel and parse every line :param name: file of absolute path :return: some dict which is about everyline of list """ workbook = xlrd.open_workbook(filename) total_data = [] name_sheets = workbook.sheet_names() for sheet in name_sheets: sh = workbook.sheet_by_name(sheet) for row in range(0, sh.nrows): row_list = sh.row_values(row) total_data.append(row_list) return parse_list(total_data)
-
主要用于处理合并存在合并单元格的情况
import xlrd def get_excel(): data_list = [] with xlrd.open_workbook(r'D: 325.xlsx') as workbook: name_sheets = workbook.sheet_names() # 获取Excel的sheet表列表,存储是sheet表名 for index in name_sheets: # for 循环读取每一个sheet表的内容 apply_dic = [] sheet_info = workbook.sheet_by_name(index) # 根据表名获取表中的所有内容,sheet_info也是列表,列表中的值是每个单元格里值 first_line = sheet_info.row_values(0) # 获取首行,我这里的首行是表头,我打算用表头作为字典的key,每一行数据对应表头的value,每一行组成一个字典 values_merge_cell = merge_cell(sheet_info) # 这里是调用处理合并单元格的函数 for i in range(1, sheet_info.nrows): # 开始为组成字典准备数据 other_line = sheet_info.row_values(i) for key in values_merge_cell.keys(): if key[0] == i: other_line[key[1]] = values_merge_cell[key] # print(other_line) dic = list_dic(first_line, other_line) # 调用组合字典的函数,传入key和value,字典生成 apply_dic.append(dic) data_list.append({'index': index, 'data': apply_dic}) return data_list def list_dic(list1, list2): ''' two lists merge a dict,a list as key,other list as value :param list1:key :param list2:value :return:dict ''' dic = dict(map(lambda x, y: [x, y], list1, list2)) return dic def merge_cell(sheet_info): ''' #handle Merge transverse cells and handle Merge Vertical Cells, assign empty cells, :param rlow:row, include row exclusive of row_range :param rhigh:row_range :param clow:col, include col exclusive of col_range :param chigh:col_range :param sheet_info:object of sheet :return:dic contain all of empty cells value ''' merge = {} merge_cells = sheet_info.merged_cells for (rlow, rhigh, clow, chigh) in merge_cells: value_mg_cell = sheet_info.cell_value(rlow, clow) if rhigh - rlow == 1: # Merge transverse cells for n in range(chigh - clow - 1): merge[(rlow, clow + n + 1)] = value_mg_cell elif chigh - clow == 1: # Merge Vertical Cells for n in range(rhigh - rlow - 1): merge[(rlow + n + 1, clow)] = value_mg_cell return merge if __name__ == '__main__': get_excel()
写excel
-
生成xlsx,使用pandas
import pandas as pd def write_excel(data, filename, keys=''): data_df = pd.DataFrame(data) if keys != '': data_df.columns = list(keys) writer = pd.ExcelWriter('{}.xlsx'.format(filename)) data_df.to_excel(writer, index=None) writer.save()
-
生成csv
import csv def write_csv(name, data_list): fieldnames = data_list[0] print(fieldnames) with open(name + '.csv', mode='w', newline='', encoding='utf-8-sig') as csv_file: writer = csv.DictWriter(csv_file, fieldnames=fieldnames) writer.writeheader() for data in data_list: writer.writerow(data)
当然,可以通过json去生成csv
import json def json_to_csv(name): with open(name) as f: a = json.loads(f.read()) write_csv(name, a)
这里依旧可以使用pandas
def data_to_csv(data_list, csv_key, name): """ save data to csv :param data_list: data :param csv_key: csv column :param name: file name :return: """ final_data = [] for data in data_list: sign_key = [] for key, value in data.items(): sign_key.append(value) final_data.append(sign_key) # 将总数据转化为data frame再输出 df = pd.DataFrame(data=final_data, columns=csv_key) df.to_csv(name + '.csv', index=False, encoding='utf-8_sig')