zoukankan      html  css  js  c++  java
  • Python 导入分部分项前的 数据处理2

    改进的地方:
    以前是固定删除某几列,由于excel经常更换格式,现改为按照单元格是否含有中文来判断,因为编码那列是不含有中文的,
    列的宽度自适应,方便查看数据,
    参考的文章代码里面都有标注原文

    import os
    import openpyxl
    from openpyxl import Workbook
    from copy import deepcopy
    from openpyxl.utils import get_column_letter
    
    
    # 原文:
    # https://www.cnblogs.com/liuda9495/p/9039732.html
    # https://blog.csdn.net/gaifuxi9518/article/details/80575880
    # https://blog.csdn.net/COCO56/article/details/84590544
    # http://www.voidcn.com/article/p-aadcasvw-bst.html
    
    
    
    fenBuName = '五分部'
    dirName = './所有分部/' + fenBuName+ '/'
    
    
    #workbook2 = Workbook()
    workbook2 = openpyxl.load_workbook(fenBuName + '.xlsx')# 加载excel
    workbook = None
    worksheet = None
    unit_name = None
    
    
    
    
    def is_chinese(string):
        """
        检查整个字符串是否包含中文
        :param string: 需要检查的字符串
        :return: bool
        """
        if string is None:
            return False
        for ch in string:
            if u'u4e00' <= ch <= u'u9fff':
                return True
        return False
        
        
    
    
    def each_files():    
        pathDir =  os.listdir(dirName)
        for index, value in enumerate(pathDir):
            filepath2 = dirName + value
            print(filepath2)
            each_sheet(filepath2)
    
    
    
    
    def each_sheet(path):
        global workbook
        global worksheet
        
        workbook = openpyxl.load_workbook(path)# 加载excel
        name_list = workbook.sheetnames# 所有sheet的名字    
        for sheetname in name_list:
            print(sheetname)
            worksheet = workbook[sheetname]# 读取第一个工作表
            CellSplit()
            Business()
    
    
    
    
    def CellSplit():
        
        # 获取所有 合并单元格的 位置信息
        # 是个可迭代对象,单个对象类型:openpyxl.worksheet.cell_range.CellRange
        # print后就是excel坐标信息
        m_list = worksheet.merged_cells
    
        l = deepcopy(m_list)# 深拷贝
    
        # 拆分合并的单元格 并填充内容
        for m_area in l:
            
            # 这里的行和列的起始值(索引),和Excel的一样,从1开始,并不是从0开始(注意)
            r1, r2, c1, c2 = m_area.min_row, m_area.max_row, m_area.min_col, m_area.max_col
                
            worksheet.unmerge_cells(start_row=r1, end_row=r2, start_column=c1, end_column=c2)
            # print('区域:', m_area, '  坐标:', r1, r2, c1, c2)
            
            # 获取一个单元格的内容
            first_value = worksheet.cell(r1, c1).value
    
            # 数据填充
            for r in range(r1, r2+1):# 遍历行        
                if c2 - c1 > 0:# 多个列,遍历列
                    for c in range(c1, c2+1):
                        worksheet.cell(r, c).value = first_value
                else:# 一个列
                    worksheet.cell(r, c1).value = first_value
        
    
    
    
    
    def Business():# 特定的业务逻辑
        global worksheet2
        global workbook2
        global unit_name
        
        # 获取单位工程的名字
        unit_name = worksheet.cell(5, 1).value
        '''
        unit_name = worksheet.cell(2, 1).value
        nPos = unit_name.find(':')
        unit_name = unit_name[nPos+1:]
        '''    
        worksheet.title = unit_name# 改sheet的名字
    
        
        '''
        # 针对无单位工程的excel,这种情况先不考虑
        worksheet.insert_cols(1)# 插入列
        for index in range(worksheet.max_row):# 首列批量填充数据
            r = index+1
            worksheet.cell(r, 1).value = unit_name
        worksheet.cell(3, 1).value = '单位工程'# 单元格赋值
        '''
    
        # 删除行,根据不同的版本,删除不同的行
        # worksheet.delete_rows(4)
        # worksheet.delete_rows(2)
        # worksheet.delete_rows(1)
    
        cellValue_r2_c1 = worksheet.cell(2, 1).value
        if cellValue_r2_c1 == '单位工程':
            worksheet.delete_rows(3)
            worksheet.delete_rows(1)
            print('版本1')
        else:
            worksheet.delete_rows(4)
            worksheet.delete_rows(2)
            worksheet.delete_rows(1)
            print('版本2')
    
    
        # 删除列
        # worksheet.delete_cols(5)
        # worksheet.delete_cols(3)
        
        # 自动删除列
        max_column = worksheet.max_column# 提前把列的数量确定好,不然删除一个列后,数量就会变
        for i in range(max_column):
            cellValue = worksheet.cell(2, max_column-i).value # 2:代表第二行,10:代表总列数
            ret = is_chinese(cellValue)
            if ret == False:
                worksheet.delete_cols(max_column-i)
    
    
        worksheet2 = workbook2.create_sheet(unit_name)
        
        
        # excel赋值
        for x in range(worksheet.max_row):
            r = x+1
            for y in range(worksheet.max_column):
                c = y+1
                worksheet2.cell(r, c).value = worksheet.cell(r, c).value
    
    
        # 列宽度自适应
        for y in range(worksheet.max_column):
            column_width = 10
            for x in range(worksheet.max_row):
                c = y+1
                r = x+1
                cellLength = 10
                cellValue = worksheet.cell(r, c).value
                if cellValue is not None:
                    cellLength = len(cellValue)
                if cellLength > column_
                    column_width = cellLength
            column_NameEn = get_column_letter(y + 1)
            worksheet2.column_dimensions[column_NameEn].width = column_width * 2
    
    
    
    
    
    each_files()
    workbook2.save('./新的分部/'+fenBuName+'.xlsx')
    
    




    目录结构



  • 相关阅读:
    批量修改横断面图高程范围
    VS添加命令直接创建pkt文件
    Msi中文件替换
    Vs2015 当前不会命中断点,没有与此关联的可执行代码
    纵断面图标注栏数据复制
    批量修改曲面样式中的显示模式
    《AutoCAD Civil 3D .NET二次开发》勘误2
    AutoCAD .NET Wizard下载地址
    样例文件C3DCustomUI无法编译、加载
    angular2 datePipe IOS不兼容问题
  • 原文地址:https://www.cnblogs.com/guxingy/p/13519059.html
Copyright © 2011-2022 走看看