1.读excel
import xlrd # 打开excel data = xlrd.open_workbook("Gitee.xlsx") table = data.sheet_by_name("程序开发") # # 选择的表单页 # print(table.nrows) # 多少行 # print(table.ncols) # 多少列 # print(table.row_values(1)) # 取出第一行数据 for i in range(1, 81): print(table.row_values(i))
2.写excel ---openpyxl
# 写 from openpyxl import Workbook wb = Workbook() # 先生成一个工作簿 wb1 = wb.create_sheet('index3',0) # 创建一个表单页 后面可以通过数字控制位置 wb2 = wb.create_sheet('index1') wb1.title = 'login' # 后期可以通过表单页对象点title修改表单页名称 wb1['A3'] = 666 wb1['A4'] = 444 wb1.cell(row=6,column=3,value=88888888) wb1['A5'] = '=sum(A3:A4)' # # wb2['G6'] = 999 wb1.append(['username','age','hobby']) wb1.append(['nick',28,]) wb1.append(['nick','','秃头']) # 保存新建的excel文件 wb.save('吴凯.xlsx')
3.写文件 xlwt
import xlwt # 创建一个workbook 设置编码 workbook = xlwt.Workbook(encoding='utf-8') # 创建一个worksheet worksheet = workbook.add_sheet('sheet') worksheet.write(0, 0, label='核心参数') worksheet.write(0, 1, label='参数1') workbook.save('lichuang.xls')
4.with open 操作excel
import csv with open('data.csv', 'a', newline="",encoding='utf-8') as filecsv: csvwriter = csv.writer(filecsv, delimiter=',') csvwriter.writerow(['凯帅','美女','人才'])
5.操作word文档
import docx file = docx.Document("D:code客户脚本制作_guojieli出院小结.docx") print("段落数:",str(len(file.paragraphs))) # 输出每一段 list1 = [] for para in file.paragraphs: # print(para.text) list1.append(para.text) # 输出段落编号及段落内容 for i in range(len(file.paragraphs)): print("第:"+str(i)+"段的内容是:"+file.paragraphs[i].text)
6.excel格式操作
from openpyxl import Workbook from openpyxl.styles import Alignment,PatternFill wb = Workbook() wb1 = wb.create_sheet('sh', 0) wb1.merge_cells("A1:A2") # 合并表单 # wb1.merge_cells("A2:C2") # wb1.merge_cells("A1:A2") wb1.cell(1, 1).value = '你就是个大石坝' # 表单赋值 wb1['A1'].fill = PatternFill(fgColor="00C0C0C0", fill_type='solid') # 修改颜色 wb1['A1'].alignment = Alignment(horizontal="center", vertical="center") # 合并居中 wb1.row_dimensions[3].height = 20 # 加高 wb1.column_dimensions['D1'].width = 30 # 加宽 # wb1.cell(1, 2).value = '2' # wb1.cell(1, 3).value = '3' # wb1.cell(2, 1).value = '4' # wb1.cell(2, 2).value = '5' # wb1.cell(2, 3).value = '6' # wb1.merge_cells("A1:D2") # wb1.append(["美女"]) nows = wb1.max_row print(nows) # wb1.append(["帅哥"]) nows = wb1.max_row print(nows) wb.save('data.xlsx')
7.操作pdf
# import importlib, sys # importlib.reload(sys) from pdfminer.pdfparser import PDFParser, PDFDocument from pdfminer.pdfinterp import PDFResourceManager, PDFPageInterpreter from pdfminer.converter import PDFPageAggregator from pdfminer.layout import LTTextBoxHorizontal, LAParams from pdfminer.pdfinterp import PDFTextExtractionNotAlloweddef parse(DataIO): # 用文件对象创建一个PDF文档分析器 parser = PDFParser(DataIO) # 创建一个PDF文档 doc = PDFDocument() # 分析器和文档相互连接 parser.set_document(doc) doc.set_parser(parser) # 提供初始化密码,没有默认为空 doc.initialize() # 检查文档是否可以转成TXT,如果不可以就忽略 if not doc.is_extractable: raise PDFTextExtractionNotAllowed else: # 创建PDF资源管理器,来管理共享资源 rsrcmagr = PDFResourceManager() # 创建一个PDF设备对象 laparams = LAParams() # 将资源管理器和设备对象聚合 device = PDFPageAggregator(rsrcmagr, laparams=laparams) # 创建一个PDF解释器对象 interpreter = PDFPageInterpreter(rsrcmagr, device) # 循环遍历列表,每次处理一个page内容 # doc.get_pages()获取page列表 for page in doc.get_pages(): interpreter.process_page(page) # 接收该页面的LTPage对象 layout = device.get_result() str = "" for x in layout: try: if (isinstance(x, LTTextBoxHorizontal)): result = x.get_text() str = str + result # print(result) except: print("Failed") # time.sleep(20) def main(): with open(r'D:code客户pdf转excel提取_穷人也要买东东Supplier release 232492 20200807201958.PDF', 'rb') as pdf_html: parse(pdf_html) main()