改进的地方:
以前是固定删除某几列,由于excel经常更换格式,现改为按照单元格是否含有中文来判断,因为编码那列是不含有中文的,
列的宽度自适应,方便查看数据,
参考的文章代码里面都有标注原文
import os
import openpyxl
from openpyxl import Workbook
from copy import deepcopy
from openpyxl.utils import get_column_letter
# 原文:
# https://www.cnblogs.com/liuda9495/p/9039732.html
# https://blog.csdn.net/gaifuxi9518/article/details/80575880
# https://blog.csdn.net/COCO56/article/details/84590544
# http://www.voidcn.com/article/p-aadcasvw-bst.html
fenBuName = '五分部'
dirName = './所有分部/' + fenBuName+ '/'
#workbook2 = Workbook()
workbook2 = openpyxl.load_workbook(fenBuName + '.xlsx')# 加载excel
workbook = None
worksheet = None
unit_name = None
def is_chinese(string):
"""
检查整个字符串是否包含中文
:param string: 需要检查的字符串
:return: bool
"""
if string is None:
return False
for ch in string:
if u'u4e00' <= ch <= u'u9fff':
return True
return False
def each_files():
pathDir = os.listdir(dirName)
for index, value in enumerate(pathDir):
filepath2 = dirName + value
print(filepath2)
each_sheet(filepath2)
def each_sheet(path):
global workbook
global worksheet
workbook = openpyxl.load_workbook(path)# 加载excel
name_list = workbook.sheetnames# 所有sheet的名字
for sheetname in name_list:
print(sheetname)
worksheet = workbook[sheetname]# 读取第一个工作表
CellSplit()
Business()
def CellSplit():
# 获取所有 合并单元格的 位置信息
# 是个可迭代对象,单个对象类型:openpyxl.worksheet.cell_range.CellRange
# print后就是excel坐标信息
m_list = worksheet.merged_cells
l = deepcopy(m_list)# 深拷贝
# 拆分合并的单元格 并填充内容
for m_area in l:
# 这里的行和列的起始值(索引),和Excel的一样,从1开始,并不是从0开始(注意)
r1, r2, c1, c2 = m_area.min_row, m_area.max_row, m_area.min_col, m_area.max_col
worksheet.unmerge_cells(start_row=r1, end_row=r2, start_column=c1, end_column=c2)
# print('区域:', m_area, ' 坐标:', r1, r2, c1, c2)
# 获取一个单元格的内容
first_value = worksheet.cell(r1, c1).value
# 数据填充
for r in range(r1, r2+1):# 遍历行
if c2 - c1 > 0:# 多个列,遍历列
for c in range(c1, c2+1):
worksheet.cell(r, c).value = first_value
else:# 一个列
worksheet.cell(r, c1).value = first_value
def Business():# 特定的业务逻辑
global worksheet2
global workbook2
global unit_name
# 获取单位工程的名字
unit_name = worksheet.cell(5, 1).value
'''
unit_name = worksheet.cell(2, 1).value
nPos = unit_name.find(':')
unit_name = unit_name[nPos+1:]
'''
worksheet.title = unit_name# 改sheet的名字
'''
# 针对无单位工程的excel,这种情况先不考虑
worksheet.insert_cols(1)# 插入列
for index in range(worksheet.max_row):# 首列批量填充数据
r = index+1
worksheet.cell(r, 1).value = unit_name
worksheet.cell(3, 1).value = '单位工程'# 单元格赋值
'''
# 删除行,根据不同的版本,删除不同的行
# worksheet.delete_rows(4)
# worksheet.delete_rows(2)
# worksheet.delete_rows(1)
cellValue_r2_c1 = worksheet.cell(2, 1).value
if cellValue_r2_c1 == '单位工程':
worksheet.delete_rows(3)
worksheet.delete_rows(1)
print('版本1')
else:
worksheet.delete_rows(4)
worksheet.delete_rows(2)
worksheet.delete_rows(1)
print('版本2')
# 删除列
# worksheet.delete_cols(5)
# worksheet.delete_cols(3)
# 自动删除列
max_column = worksheet.max_column# 提前把列的数量确定好,不然删除一个列后,数量就会变
for i in range(max_column):
cellValue = worksheet.cell(2, max_column-i).value # 2:代表第二行,10:代表总列数
ret = is_chinese(cellValue)
if ret == False:
worksheet.delete_cols(max_column-i)
worksheet2 = workbook2.create_sheet(unit_name)
# excel赋值
for x in range(worksheet.max_row):
r = x+1
for y in range(worksheet.max_column):
c = y+1
worksheet2.cell(r, c).value = worksheet.cell(r, c).value
# 列宽度自适应
for y in range(worksheet.max_column):
column_width = 10
for x in range(worksheet.max_row):
c = y+1
r = x+1
cellLength = 10
cellValue = worksheet.cell(r, c).value
if cellValue is not None:
cellLength = len(cellValue)
if cellLength > column_
column_width = cellLength
column_NameEn = get_column_letter(y + 1)
worksheet2.column_dimensions[column_NameEn].width = column_width * 2
each_files()
workbook2.save('./新的分部/'+fenBuName+'.xlsx')
目录结构