zoukankan      html  css  js  c++  java
  • 写了一个自动用google翻译文档的工具

    写了一个自动用google翻译文档的工具

    features:

    • [x] 支持word
    • [x] 每一个段落下面放上对照的翻译
    from googletrans import Translator
    import sys
    import docx
    
    fname = sys.argv[1] if len(
        sys.argv) > 1 else r'F:GoogleDriveSync3jobrelatedThe Fast Forward MBA in Project Management ( PDFDrive.com ).full.docx'
    
    
    translator = Translator()
    foname = fname + '-cn.docx'
    doc = docx.Document(fname)
    docdes = docx.Document(fname)
    
    N = len(doc.paragraphs)
    for i in range(N):
        print(1.0*i/N,)
        subCont = doc.paragraphs[i].text
        try:
            s = translator.translate(subCont, src='en', dest='zh-cn')
            docdes.paragraphs[i].add_run('
    ' + str(s.text) + '
    ')
        except Exception as e:
            print('except:', e)
            
    docdes.save(foname)
    
    
    from googletrans import Translator
    import sys, os
    import docx
    
    fname = sys.argv[1] if len(
        sys.argv) > 1 else r'D:UserscutepDownloadsThrow-Away-the-First-90-Days.docx'
    
    def trans(fname):
        translator = Translator()
        foname = fname + '-cn.docx'
        doc = docx.Document(fname)
        docdes = docx.Document(fname)
    
        N = len(doc.paragraphs)
        NextTarget = 0.1
        i = 0
        while i<N:
            percentage = 1.0*i/N
            if i%10==0: print(percentage)
            if percentage>NextTarget:
                outputfile = '%s-%.2f-cn.docx'%(fname, NextTarget)
                print(outputfile)
                docdes.save(outputfile)
                NextTarget = NextTarget + 0.1
    
            spacer = '
    ========================
    '
            spacer_short = '========================'
            subCont = doc.paragraphs[i].text
            j = i+1
            while len(subCont)<4500 and j<N:
                subCont = subCont + spacer + doc.paragraphs[j].text
                j = j+1
            print(i,j)
            if subCont.strip():
                #try:
                s = translator.translate(subCont, src='en', dest='zh-cn')
                ss = s.text.split(spacer_short)
                assert len(ss)==j-i, '%d, %d'%(len(ss), j-i)
                for k in range(j-i):
                    docdes.paragraphs[k+i].add_run('
    ' + ss[k] + '
    ')
                #except Exception as e:
                #    print('except:', e)
            i = j
    
        docdes.save(foname)
    
    if __name__ == '__main__':
        if os.path.isfile(fname):
            trans(fname)
        else:
            from multiprocessing import Process
    
            ps=[]
            for filename in os.listdir(fname):
                if filename.lower().endswith('.docx'):
                    p = Process(target=trans, args=(fname + '\' + filename,))
                    p.start()
                    ps.append(p)
    
            for p in ps:
                p.join()
    
  • 相关阅读:
    表格标签
    图片标签
    超链接标签
    媒体标签
    实体标签
    html常用的标签
    头信息的作用
    【bzoj5017】[Snoi2017]炸弹 线段树优化建图+Tarjan+拓扑排序
    【bzoj3309】DZY Loves Math 莫比乌斯反演+线性筛
    【bzoj4010】[HNOI2015]菜肴制作 拓扑排序+堆
  • 原文地址:https://www.cnblogs.com/cutepig/p/13771965.html
Copyright © 2011-2022 走看看