zoukankan      html  css  js  c++  java
  • 利用python,生成word

    依赖包如下所示:

    pip install python-docx==0.8.10
    pip install
    lxml==4.6.3

    代码如下所示:

    import datetime
    import os
    import time
    import uuid
    
    import docx
    import json
    import lxml
    import mysql.connector
    import requests
    import subprocess
    import sys
    from docx import Document
    from docx import shared
    from docx.enum.text import WD_PARAGRAPH_ALIGNMENT
    from docx.oxml import OxmlElement
    from docx.oxml.ns import qn
    
    import Baidu_Text
    
    sys.path.append(os.getcwd())
    
    
    class report:
        # doc = docx.Document()
        def __init__(self, doc):
            self.doc = doc
    
        def setHeading(self, lv, s):
            a = self.doc.add_heading(s, lv)
            a.alignment = WD_PARAGRAPH_ALIGNMENT.CENTER
    
        def setParagraph(self, s, alig):
            # s = s.encode('ascii')
            p = self.doc.add_paragraph(s)
            if len(alig) > 0:
                if alig == 'R':
                    p.alignment = WD_PARAGRAPH_ALIGNMENT.RIGHT
                elif alig == 'C':
                    p.alignment = WD_PARAGRAPH_ALIGNMENT.CENTER
    
        def addPicture(self, path):
            # shared.Inches(1) 按英寸设置 shared.Cm(2) 按厘米设置
            # self.doc.add_picture(path, width=shared.Inches(3))
            paragraph = self.doc.add_paragraph()
            # 图片居中设置
            paragraph.alignment = WD_PARAGRAPH_ALIGNMENT.CENTER
            run = paragraph.add_run("")
            run.add_picture(path, width=shared.Inches(3))
    
        def setTOC(self):
            paragraph = self.doc.add_paragraph()
            run = paragraph.add_run()
            fldChar = OxmlElement('w:fldChar')  # creates a new element
            fldChar.set(qn('w:fldCharType'), 'begin')  # sets attribute on element
            instrText = OxmlElement('w:instrText')
            instrText.set(qn('xml:space'), 'preserve')  # sets attribute on element
            instrText.text = 'TOC \o "1-4" \h \z \u'  # change 1-3 depending on heading levels you need
    
            fldChar2 = OxmlElement('w:fldChar')
            fldChar2.set(qn('w:fldCharType'), 'separate')
            fldChar3 = OxmlElement('w:t')
            # fldChar3.text = "Right-click to update field."
            fldChar3.text = "右键单击以更新字段。"
            fldChar2.append(fldChar3)
    
            fldChar4 = OxmlElement('w:fldChar')
            fldChar4.set(qn('w:fldCharType'), 'end')
    
            r_element = run._r
            r_element.append(fldChar)
            r_element.append(instrText)
            r_element.append(fldChar2)
            r_element.append(fldChar4)
            p_element = paragraph._p
    
            # 添加分页符
            paragraph.add_run().add_break(docx.enum.text.WD_BREAK.PAGE)
    
        def writeDoc(self, fileName):
            self.doc.save(fileName)
    
        def set_updatefields_true(self, docx_path):
            namespace = "{http://schemas.openxmlformats.org/wordprocessingml/2006/main}"
            f = open(docx_path, 'rb')
            doc = Document(f)
            # add child to doc.settings element
            element_updatefields = lxml.etree.SubElement(
                doc.settings.element, f"{namespace}updateFields"
            )
            element_updatefields.set(f"{namespace}val", "true")
            doc.save(docx_path)  ## Heading ##
    
    
    def translation(_lang, _con):
        if _lang == "en":
            _lang = "zh"
        _con = _con.strip()
        translation = ""
        print(len(_con))
        test_con = _con.replace(" ", "")
        if _con != "" and len(test_con):
            if len(_con) >= 5000:
                con_arr = _con.split("
    ")
                for con1 in con_arr:
                    time.sleep(1)
                    test_con = con1.replace(" ", "")
                    if con1 != '' and len(test_con):
                        rsp = Baidu_Text.baiduTrans("en", _lang, con1)
                        if rsp.get("error_code") is None:
                            for key in rsp.get("trans_result"):
                                translation = translation + key.get("dst") + "
    "
                            result = {
                                "code": 0,
                                "translation": translation
                            }
                        else:
                            result = {
                                "code": rsp.get("error_code"),
                                "translation": translation
                            }
                            return result  # 翻译中途出现错误,则返回错误结果
                return result  # 翻译完成,返回翻译内容
            else:
                time.sleep(1)
                rsp = Baidu_Text.baiduTrans("en", _lang, _con)
                if rsp.get("error_code") is None:
                    for key in rsp.get("trans_result"):
                        translation = translation + key.get("dst") + "
    "
                    result = {
                        "code": 0,
                        "translation": translation
                    }
                else:
                    result = {
                        "code": rsp.get("error_code"),
                        "translation": translation
                    }
    
            result["translation"] = result.get("translation").strip("
    ")
            print(result)
            return result
        else:
            result = {
                "code": 0,
                "translation": ""
            }
            return result
    
    
    def javaTOC(doc):
        p = sys.path[0]
        doc = p + '/' + doc
        print('java -jar ' + p + '/toc.jar ' + doc)  # stdout 文件对象  stderr 文件描述符
        subprocess.Popen('java -jar ' + p + '/toc.jar ' + doc, shell=True, stdout=subprocess.PIPE, stderr=subprocess.STDOUT)
    
    
    def entry(lalala):
        print(lalala)
        docc = docx.Document()
        doc = report(docc)
        doc.setTOC()
        curr_time = str(datetime.datetime.now().strftime("%Y-%m-%d日%H")) + '时推送'
        # fileName = sys.path[0] + '/report/thenationonlineng.docx'
        fileName = sys.path[0] + '/report/news{}.docx'.format(curr_time, curr_time)
        arrg = []
        for lala in lalala:
            print(type(lala["list"]))
            if len(lala["list"]):
                arrg += lala["list"]
                doc.setHeading(1, lala["website"])
                # 情报
                for obj in lala["list"]:
                    title = obj["title"]
                    author = obj["author"]
                    pd = str(obj["publish_time"])
                    content = str(obj["content"])
                    print(title)
                    # doc.setHeading(1, obj["child_source"])
                    doc.setHeading(2, title)
                    # for img in obj['imgs']:
                    #     imgName = uuid.uuid1()
                    #     explation = img["explation"].replace('"', '\"')
                    #     img_binary = requests.get(img["url"])  # .content  # 获取图片的二进制格式
                    #     content_type = img_binary.headers["Content-Type"].split('/')[1]
                    #     print(img_binary.status_code)
                    #     if img_binary.status_code == 200:
                    #         # picture_url = '/static/picture/' + str(imgName) + '.' + content_type
                    #         # print(picture_url)
                    #         try:
                    #             path = sys.path[0].replace('\', '/').replace('emailTest', '') + '/test/' + str(
                    #                 imgName) + '.' + content_type
                    #             # path = sys.path[0].replace('\', '/') + '/test/' + str(imgName) + '.' + content_type
                    #
                    #             print(path)
                    #             with open(path, "wb") as f:
                    #                 f.write(img_binary.content)  # img_bin里面保存着 以二进制方式读取的图片内容,当前目录会生成一张img.jpg的图片
                    #                 f.close()
                    #             pic = doc.addPicture(path)
                    #         except Exception as err:
                    #             print(err)
                    # pd_arr = pd.split('-')
                    # pd1 = pd_arr[0] + '' + pd_arr[1] + '' + pd_arr[2] + ''
                    # if author == '':
                    #     auth_time = pd1
                    # else:
                    #     auth_time = author + '-' + pd1
                    auth_time = pd
                    doc.setParagraph(auth_time, 'C')
                    doc.setParagraph(content, '')
                    chn_title = translation("en", title)["translation"]
                    doc.setHeading(2, chn_title)
                    doc.setParagraph(auth_time, 'C')
                    chn_content = translation("en", content)["translation"]
                    doc.setParagraph(chn_content, '')
        doc.writeDoc(fileName)
        javaTOC(str(fileName))
        doc.set_updatefields_true(fileName)
        if len(arrg):
            result = {
                "code": 1,
                "message": "生成成功"
            }
          
            print(result)
        else:
            result = {
                "code": 2,
                "message": "没有最新消息"
            }
            print(result)
    
        # dbt.db.close()
        return result
    
    
    # 接收子进程传递过来得参数
    print(sys.argv)
    
    if __name__ == '__main__':
        # # _from, _to, lang, name, type
        # _from = '2021-03-16'
        # _to = '2021-03-20'
        # lang = 1  # _lang =  0 双语 1 原语 2 译文
        # # name = "repost"
        # # type = "周报"
        # _uuid = '4b89d9f8-69be-11eb-914c-40ec996f89c9'
        # entry(_from, _to, lang, _uuid)
        from_date = sys.argv[1]
        _to_date = sys.argv[2]
        _lang = sys.argv[3]
        _uuid = sys.argv[4]
        _sid = sys.argv[5]
        entry(from_date, _to_date, _lang, _uuid, _sid)

    上面代码,加了翻译,如果不需要翻译,可将有关翻译的代码全部去掉。

    Baidu_Text.py代码:

    # -*- coding: utf-8 -*-
    
    # This code shows an example of text translation from English to Simplified-Chinese.
    # This code runs on Python 2.7.x and Python 3.x.
    # You may install `requests` to run.py this code: pip install requests
    # Please refer to `https://api.fanyi.baidu.com/doc/21` for complete api document
    
    import requests
    import random
    import json
    from hashlib import md5
    
    # Set your own appid/appkey.
    appid = 'xxxxxx'   # 这里写自己的appid
    appkey = 'xxxxxx'  # 这里写自己的appkey
    
    # # For list of language codes, please refer to `https://api.fanyi.baidu.com/doc/21`
    # from_lang = 'en'
    # to_lang = 'zh'
    
    endpoint = 'http://api.fanyi.baidu.com'
    path = '/api/trans/vip/translate'
    url = endpoint + path
    
    
    # Generate salt and sign
    def make_md5(s, encoding='utf-8'):
        return md5(s.encode(encoding)).hexdigest()
    
    
    def baiduTrans(from_lang, to_lang, query):
        salt = random.randint(32768, 65536)
        sign = make_md5(appid + query + str(salt) + appkey)
    
        # Build request
        headers = {'Content-Type': 'application/x-www-form-urlencoded'}
        payload = {'appid': appid, 'q': query, 'from': from_lang, 'to': to_lang, 'salt': salt, 'sign': sign}
    
        # Send request
        print(payload)
        try:
            r = requests.post(url, params=payload, headers=headers)
            result = r.json()
            # result = json.dumps(result, indent=4, ensure_ascii=False)
            # Show response
            # print(json.dumps(result, indent=4, ensure_ascii=False))
        except Exception as err:
            result = {
                "error_code": 500,
                "err": err
            }
        return result
    
    
    if __name__ == '__main__':
        from_lang = 'en'
        to_lang = 'zh'
        query = 'Hello World! This is 1st paragraph.This is 2nd paragraph.'
        aa = baiduTrans(from_lang, to_lang, query)
        print(aa.get("error_code"))
        print(aa.get("trans_result")[0].get("dst"))
  • 相关阅读:
    CSS_行内元素和块级元素
    jdbc连接oracle11g的问题——查不出来数据,权限问题
    新的起点
    MVC过滤器详解
    SQL Server游标的使用
    处理百万级以上的数据提高查询速度的方法
    两个有序数组找出相同数据
    C# 可变参数
    C#反射
    产生一个int数组,长度为100,并向其中随机插入1-100,并且不能重复。
  • 原文地址:https://www.cnblogs.com/lxz123/p/15503925.html
Copyright © 2011-2022 走看看