zoukankan      html  css  js  c++  java
  • Python-docx模块

    Python-docx

    python-docx包可以用来创建docx文档,并对现有文档进行更改,包含段落、分页符、表格、图片、标题、样式等几乎所有的word文档中能常用的功能都包含了

    只能解析docx文件,解析不了doc文件

    官方文档:

    https://python-docx.readthedocs.io/en/latest/user/text.html

    https://python-docx.readthedocs.io/en/latest/index.html

    安装使用

    pip3 install python-docx
    

    案例一

    from docx import Document #初始化对象
    from docx.shared import Inches  #定义英尺
    from docx.shared import Pt  #定义像素大小
    from docx.enum.text import WD_ALIGN_PARAGRAPH
    from docx.oxml.ns import qn #定义style的
    from docx.shared import RGBColor
    
    #打开docx文件
    document = Document()
    
    #增加一段
    paragraph = document.add_paragraph('This is a demo.')
    
    #在段落前直接插入一个新段落
    prior_paragraph = paragraph.insert_paragraph_before('welcome!')
    
    #这一类属性,每个有三种状态
    #True 为使用属性;False 为不使用属性;None 默认属性继承自上一个字体
    paragraph = document.add_paragraph()
    paragraph.add_run('Lorem ipsum')
    run = paragraph.add_run(' dolor')
    run.bold = True
    run.font.name=u'宋体'
    r = run._element
    r.rPr.rFonts.set(qn('w:eastAsia'), u'宋体')
    paragraph.add_run(' hello').underline = True 
    paragraph.add_run(u'斜体、').italic = True 
    paragraph.add_run(u'设置中文字体,') 
    paragraph.add_run(u'设置字号').font.size=Pt(24) 
    
    #添加文本
    p = document.add_paragraph('test')
    
    #文本居中
    #WD_ALIGN_PARAGRAPH 存储了多种对齐格式
    #例如:WD_ALIGN_PARAGRAPH.LEFT,左对齐;WD_ALIGN_PARAGRAPH.RIGHT,右对齐
    p.alignment = WD_ALIGN_PARAGRAPH.CENTER 
    
    #左缩进
    p.left_indent = Inches(0.3)
    
    #首行缩进
    p.first_line_indent = Inches(0.3)
    
    #上行间距
    p.space_before = Pt(18)
    
    #下行间距
    p.space_after = Pt(12)
    
    #添加标题
    document.add_heading('The REAL meaning of the universe')
    document.add_heading('The role of dolphins', level = 2)
    
    #添加引用
    document.add_paragraph('Intese quote',style="Intense Quote")
    
    #添加分页符
    document.add_page_break()
    
    #添加表
    table = document.add_table(rows=2,cols=2)
    cell = table.cell(0,0)
    cell.text = 'cell_00'
    table.cell(0,1).text = 'cell_01'
    row = table.rows[1]
    row.cells[0].text = 'cell_10'
    row.cells[1].text = 'cell_11'
    
    #行列计数
    row_count = len(table.rows)
    col_count = len(table.columns)
    
    #添加图片
    document.add_picture('1.png',width=Inches(1.25))
    
    #应用字符样式
    paragraph = document.add_paragraph('Normal text, ')
    paragraph.add_run('text with emphasis','Emphasis')
    
    #增加有序列表
    document.add_paragraph(
        u'有序列表元素1',style='List Number'
    )
    document.add_paragraph(
        u'有序列表元素2',style='List Number'
    )
    
    #增加无序列表
    document.add_paragraph(
        u'无序列表元素1',style='List Bullet'
    )
    document.add_paragraph(
        u'无序列表元素2',style='List Bullet'
    )
    #或者paragraph = document.add_paragraph('Lorem ipsum dolor sit amet.')
    #   paragraph.style = 'ListBullet'
    
    document.save('test.docx')
    

    案例二

    from docx import Document
    from docx.shared import Inches
    
    document = Document()
    
    #添加标题,并设置级别,范围:0 至 9,默认为1
    document.add_heading('Document Title', 0)
    
    #添加段落,文本可以包含制表符(	)、换行符(
    )或回车符(
    )等
    p = document.add_paragraph('A plain paragraph having some ')
    #在段落后面追加文本,并可设置样式
    p.add_run('bold').bold = True
    p.add_run(' and some ')
    p.add_run('italic.').italic = True
    
    document.add_heading('Heading, level 1', level=1)
    document.add_paragraph('Intense quote', style='Intense Quote')
    
    #添加项目列表(前面一个小圆点)
    document.add_paragraph(
        'first item in unordered list', style='List Bullet'
    )
    document.add_paragraph('second item in unordered list', style='List Bullet')
    
    #添加项目列表(前面数字)
    document.add_paragraph('first item in ordered list', style='List Number')
    document.add_paragraph('second item in ordered list', style='List Number')
    
    #添加图片
    document.add_picture('monty-truth.png', width=Inches(1.25))
    
    records = (
        (3, '101', 'Spam'),
        (7, '422', 'Eggs'),
        (4, '631', 'Spam, spam, eggs, and spam')
    )
    
    #添加表格:一行三列
    # 表格样式参数可选:
    # Normal Table
    # Table Grid
    # Light Shading、 Light Shading Accent 1 至 Light Shading Accent 6
    # Light List、Light List Accent 1 至 Light List Accent 6
    # Light Grid、Light Grid Accent 1 至 Light Grid Accent 6
    # 太多了其它省略...
    table = document.add_table(rows=1, cols=3, style='Light Shading Accent 2')
    #获取第一行的单元格列表
    hdr_cells = table.rows[0].cells
    #下面三行设置上面第一行的三个单元格的文本值
    hdr_cells[0].text = 'Qty'
    hdr_cells[1].text = 'Id'
    hdr_cells[2].text = 'Desc'
    for qty, id, desc in records:
        #表格添加行,并返回行所在的单元格列表
        row_cells = table.add_row().cells
        row_cells[0].text = str(qty)
        row_cells[1].text = id
        row_cells[2].text = desc
    
    document.add_page_break()
    
    #保存.docx文档
    document.save('demo.docx')
    

    读取word文档

    from docx import Document
    
    doc = Document('demo.docx')
    
    #每一段的内容
    for para in doc.paragraphs:
        print(para.text)
    
    #每一段的编号、内容
    for i in range(len(doc.paragraphs)):
        print(str(i),  doc.paragraphs[i].text)
    
    #表格
    tbs = doc.tables
    for tb in tbs:
        #行
        for row in tb.rows:    
            #列    
            for cell in row.cells:
                print(cell.text)
                #也可以用下面方法
                '''text = ''
                for p in cell.paragraphs:
                    text += p.text
                print(text)'''
    
  • 相关阅读:
    Python 之 raw_input()与input()区别
    Python基础语法
    在Cloudera Hadoop CDH上安装R及RHadoop(rhdfs、rmr2、rhbase、RHive)
    MapReduce 过程详解
    Cloudera Manager and CDH安装及配置
    RFC 目录
    聊一聊 tcp拥塞控制 九 fack
    聊一聊tcp 拥塞控制 八 相关数据结构&& 概念
    聊一聊 tcp 拥塞控制 七 转载
    udp connected socket
  • 原文地址:https://www.cnblogs.com/chenwenyin/p/13557353.html
Copyright © 2011-2022 走看看