zoukankan      html  css  js  c++  java
  • PyPDF2切割单页为两页

    # -*- coding: UTF-8 -*-  
    from PyPDF2 import PdfFileReader, PdfFileWriter
    import os
    
    def split_pdf(infile, out_path):
        """
        :param infile: 待拆分的pdf文件
        :param out_path: 拆分成单页的pdf文件的存储路径
        :return: 无
        """
    
        if not os.path.exists(out_path):
            os.makedirs(out_path)
        with open(infile, 'rb') as infile:
        
            pdfReader = PdfFileReader(infile)
            number_of_pages = pdfReader.getNumPages()  #计算此PDF文件中的页数
            
            for i in range(number_of_pages):
    
                page = pdfReader.getPage(i)
                width = float(page.mediaBox.getWidth())
                height = float(page.mediaBox.getHeight())
                #print(""+str(width)+" Height"+str(height))
    
                #top page
                pdfReader=PdfFileReader(infile)   #一定要重新读取,要不会报错。
                pdfWriter = PdfFileWriter()    #循环创建空白的pdf 
                page_top = pdfReader.getPage(i)
                page_top.mediaBox.lowerLeft = (0,height/2)
                page_top.mediaBox.lowerRight = (width,height/2)
                page_top.mediaBox.upperLeft = (0,height)
                page_top.mediaBox.upperRight = (width,height)
                pdfWriter.addPage(page_top)
                out_file_name = out_path + str(i+1)+'_top.pdf'
                with open(out_file_name, 'wb') as outfile:
                    pdfWriter.write(outfile) 
       
                #bottom page
                pdfReader=PdfFileReader(infile)   #一定要重新读取,要不会报错。
                pdfWriter = PdfFileWriter()    #循环创建空白的pdf
                page_buttom = pdfReader.getPage(i)
                page_buttom.mediaBox.lowerLeft = (0,0)
                page_buttom.mediaBox.lowerRight = (width,0)
                page_buttom.mediaBox.upperLeft = (0,height/2)
                page_buttom.mediaBox.upperRight = (width,height/2)
        
                pdfWriter.addPage(page_buttom)   
                out_file_name = out_path + str(i+1)+'_bottom.pdf' 
                with open(out_file_name, 'wb') as outfile:
                    pdfWriter.write(outfile) 
    
    
    if __name__ == '__main__':
        in_File = './1.pdf'
        out_Path = './Single/'  # 生成输出文件夹
        split_pdf(in_File, out_Path)
  • 相关阅读:
    spark 读取mongodb失败,报executor time out 和GC overhead limit exceeded 异常
    在zepplin 使用spark sql 查询mongodb的数据
    Unable to query from Mongodb from Zeppelin using spark
    spark 与zepplin 版本兼容
    kafka 新旧消费者的区别
    kafka 新生产者发送消息流程
    spark ui acl 不生效的问题分析
    python中if __name__ == '__main__': 的解析
    深入C++的new
    NSSplitView
  • 原文地址:https://www.cnblogs.com/mysick/p/12702291.html
Copyright © 2011-2022 走看看