zoukankan      html  css  js  c++  java
  • python从TXT创建PDF文件——reportlab

    使用reportlab创建PDF文件
    电子书一般都是txt格式的,某些电子阅读器不能读取txt的文档,如DPT-RP1。因此本文从使用python实现txt到pdf的转换,并且支持生成目录,目录能够生成连接进行点击(前提是在txt文件中能够知道每个章节的位置),支持中文。

    reportlab的使用可以查看reportlab官方文档。txt转pdf详细代码如下:

    # coding: utf-8

    # setting sts font utf-8
    import sys
    reload(sys)
    sys.setdefaultencoding('utf-8')

    from reportlab.pdfbase import pdfmetrics
    from reportlab.pdfbase.ttfonts import TTFont
    from reportlab.lib.styles import getSampleStyleSheet, ParagraphStyle
    from reportlab.platypus import BaseDocTemplate, Frame, PageTemplate, Paragraph
    from reportlab.platypus.tableofcontents import TableOfContents
    from reportlab.platypus import PageBreak
    from reportlab.lib.pagesizes import A4

    pdfmetrics.registerFont(TTFont('STSONG', './STSONG.TTF')) #register Font
    pdfmetrics.registerFont(TTFont('simhei', './simhei.ttf')) #register Font
    styles = getSampleStyleSheet()
    styles.add(ParagraphStyle(fontName='STSONG', name='STSONG', leading=20, fontSize=12, firstLineIndent=22, wordWrap='CJK'))
    styles.add(ParagraphStyle(fontName='simhei', name='simhei', leading=25, fontSize=14, wordWrap='CJK')) # content Font

    class MyDocTemplate(BaseDocTemplate):
    def __init__(self, filename, **kw):
    self.allowSplitting = 0
    apply(BaseDocTemplate.__init__, (self, filename), kw)

    # Entries to the table of contents can be done either manually by
    # calling the addEntry method on the TableOfContents object or automatically
    # by sending a 'TOCEntry' notification in the afterFlowable method of
    # the DocTemplate you are using. The data to be passed to notify is a list
    # of three or four items countaining a level number, the entry text, the page
    # number and an optional destination key which the entry should point to.
    # This list will usually be created in a document template's method like
    # afterFlowable(), making notification calls using the notify() method
    # with appropriate data.

    def afterFlowable(self, flowable):
    "Registers TOC entries."
    if flowable.__class__.__name__ == 'Paragraph':
    text = flowable.getPlainText()
    style = flowable.style.name
    if style == 'Heading1':
    level = 0
    elif style == 'simhei':
    level = 1
    else:
    return
    E = [level, text, self.page]
    #if we have a bookmark name append that to our notify data
    bn = getattr(flowable,'_bookmarkName',None)
    if bn is not None: E.append(bn)
    self.notify('TOCEntry', tuple(E))


    # this function makes our headings
    def doHeading(data, text, sty):
    from hashlib import sha1
    # create bookmarkname
    bn = sha1(text).hexdigest()
    # modify paragraph text to include an anchor point with name bn
    h = Paragraph(text + '<a name="%s"/>' % bn, sty)
    # store the bookmark name on the flowable so afterFlowable can see this
    h._bookmarkName = bn
    data.append(h)

    # Page Number
    def footer(canvas, doc):
    page_num = canvas.getPageNumber()
    canvas.saveState()
    P = Paragraph("%d" % page_num ,
    styles['Normal'])
    w, h = P.wrap(doc.width, doc.bottomMargin)
    P.drawOn(canvas, doc.leftMargin + w/2, h)
    canvas.restoreState()

    # load txt file
    def loadTxt(txt_path):
    with open(txt_path, 'r') as f:
    txt_datas = f.readlines()
    return txt_datas

    def toPDF(txt_datas, pdf_path):
    PDF = MyDocTemplate(pdf_path, pagesize=A4)
    frame = Frame(PDF.leftMargin, PDF.bottomMargin, PDF.width, PDF.height,
    id='normal')
    template = PageTemplate(frames=frame, onPage=footer)
    PDF.addPageTemplates([template])

    data = []

    # table of contents
    toc = TableOfContents()
    # setting contents fontName and fontSize
    toc.levelStyles = [
    ParagraphStyle(fontName='simhei', fontSize=20, name='TOCHeading1', leftIndent=20, firstLineIndent=-20, spaceBefore=10,
    leading=16),
    ParagraphStyle(fontName='simhei', fontSize=18, name='TOCHeading2', leftIndent=40, firstLineIndent=-20, spaceBefore=5, leading=12),
    ]
    data.append(toc) # add contents
    data.append(PageBreak()) #next page

    NUM = 0
    # add txt
    for txt_data in txt_datas:
    txt_data = txt_data.lstrip() # remove left space
    if len(txt_data) == 0: # no text
    continue
    try:
    txt_data = txt_data.decode("gb2312")
    except:
    txt_data = txt_data.decode("gbk")

    if txt_data[0] == u"第" and (u"章" in txt_data):
    doHeading(data, txt_data, styles['simhei'])
    else:
    data.append(Paragraph(txt_data, styles['STSONG']))
    NUM = NUM + 1
    print('{} line'.format(NUM))

    print('Build pdf!')
    PDF.multiBuild(data)

    if __name__ == "__main__":
    txt_path = "财运天降.txt".decode("utf8")
    pdf_path = "财运天降.pdf".decode("utf8")
    txt_datas = loadTxt(txt_path)
    toPDF(txt_datas, pdf_path)
    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    16
    17
    18
    19
    20
    21
    22
    23
    24
    25
    26
    27
    28
    29
    30
    31
    32
    33
    34
    35
    36
    37
    38
    39
    40
    41
    42
    43
    44
    45
    46
    47
    48
    49
    50
    51
    52
    53
    54
    55
    56
    57
    58
    59
    60
    61
    62
    63
    64
    65
    66
    67
    68
    69
    70
    71
    72
    73
    74
    75
    76
    77
    78
    79
    80
    81
    82
    83
    84
    85
    86
    87
    88
    89
    90
    91
    92
    93
    94
    95
    96
    97
    98
    99
    100
    101
    102
    103
    104
    105
    106
    107
    108
    109
    110
    111
    112
    113
    114
    115
    116
    117
    118
    119
    120
    121
    122
    123
    124
    125
    126
    127
    本代码在windows和python2下进行测试,主要注意有:

    系统默认字体设置:
    import sys
    reload(sys)
    sys.setdefaultencoding('utf-8')
    1
    2
    3
    中文字体支持:
    pdfmetrics.registerFont(TTFont('STSONG', './STSONG.TTF')) #register Font
    pdfmetrics.registerFont(TTFont('simhei', './simhei.ttf')) #register Font
    styles = getSampleStyleSheet(http://www.my516.com)
    styles.add(ParagraphStyle(fontName='STSONG', name='STSONG', leading=20, fontSize=12, firstLineIndent=22, wordWrap='CJK'))
    styles.add(ParagraphStyle(fontName='simhei', name='simhei', leading=25, fontSize=14, wordWrap='CJK')) # content Font
    1
    2
    3
    4
    5
    中文目录字体:
    toc.levelStyles = [
    ParagraphStyle(fontName='simhei', fontSize=20, name='TOCHeading1', leftIndent=20, firstLineIndent=-20, spaceBefore=10,
    leading=16),
    ParagraphStyle(fontName='simhei', fontSize=18, name='TOCHeading2', leftIndent=40, firstLineIndent=-20, spaceBefore=5, leading=12),
    ]
    1
    2
    3
    4
    5
    目录定位,这个需要根据你实际的txt文章进行定位修改
    if txt_data[0] == u"第" and (u"章" in txt_data):
    1
    中文解码,由于繁体中文不能解码为gb2312,因此使用try-except的方式
    try:
    txt_data = txt_data.decode("gb2312")
    except:
    txt_data = txt_data.decode("gbk")
    1
    2
    3
    4
    其效果如下:
    网上随便找了个txt文章:

    生成pdf目录:

    生成pdf内容:
    --------------------- 

  • 相关阅读:
    POJ 1320 Street Numbers 解佩尔方程
    数学分支(转)
    深入理解Java类加载器(1):Java类加载原理解析
    Java类加载器的工作原理
    深入理解Java:类加载机制及反射
    类加载机制:全盘负责和双亲委托
    java底层学习
    代码面试最常用的10大算法
    程序员面试金典算法题
    了解ASCII、gb系列、Unicode、UTF-8的区别
  • 原文地址:https://www.cnblogs.com/ly570/p/10995942.html
Copyright © 2011-2022 走看看