zoukankan      html  css  js  c++  java
  • python 将word另存为txt

     
    import os  
    import os.path
    from win32com import client as wc
    
    c=[]  
     
    rootdir=["d:/77"]                #以该路径为实验
    
    
    def txt(j,c):
            
             word = wc.Dispatch('Word.Application')
                            
             doc = word.Documents.Open(c[j])
    
             newname=c[j][:-5]+"(translate txt)"
    
             doc.SaveAs(newname,4)
    
             doc.Close()
    
             word.Quit()
    
             os.remove(c[j])
             
             print("完成")
    
    
    
    
                                     
    
    def wordt(c):                    #定义函数,进行筛选
            
            for j in range(0,len(c)):
                    
                    if c[j][-5:] == ".docx":  #寻找docx文件
    
    
                            txt(j,c) #
    
                    else:
                            pass
                            
            
            
    
    for i in rootdir:                  #定义函数,查找所有文件
        
            for parent,dirnames,filenames in os.walk(i):  
             
                
                for filename in filenames:    
                     
                    c.append(os.path.join(parent,filename))
                    
    wordt(c)

    将docx另存为txt ,并且删除源文件

    涉及到office中docx文档的打与另存为命令

    相关参考

    from win32com import client as wc
    word = wc.Dispatch('Word.Application')
    doc = word.Documents.Open('c:/test')
    doc.SaveAs('c:/test.text', 2)
    doc.Close()
    word.Quit()


    open(r'c:	ext','r')
    wdFormatDocument = 0
    wdFormatDocument97 = 0
    wdFormatDocumentDefault = 16
    wdFormatDOSText = 4
    wdFormatDOSTextLineBreaks = 5
    wdFormatEncodedText = 7
    wdFormatFilteredHTML = 10
    wdFormatFlatXML = 19
    wdFormatFlatXMLMacroEnabled = 20
    wdFormatFlatXMLTemplate = 21
    wdFormatFlatXMLTemplateMacroEnabled = 22
    wdFormatHTML = 8
    wdFormatPDF = 17
    wdFormatRTF = 6
    wdFormatTemplate = 1
    wdFormatTemplate97 = 1
    wdFormatText = 2
    wdFormatTextLineBreaks = 3
    wdFormatUnicodeText = 7
    wdFormatWebArchive = 9
    wdFormatXML = 11
    wdFormatXMLDocument = 12
    wdFormatXMLDocumentMacroEnabled = 13
    wdFormatXMLTemplate = 14
    wdFormatXMLTemplateMacroEnabled = 15
    wdFormatXPS = 18

    over!
  • 相关阅读:
    PHP文件上传代码和逻辑详解
    了解thinkphp(二)
    了解ThinkPHP(一)
    php关于static关键字
    php关于return的关键字
    会话控制
    PDO数据库
    PHP包含文件函数include、include_once、require、require_once区别总结
    jQuery事件
    一、MVC模式学习概述
  • 原文地址:https://www.cnblogs.com/jjj-fly/p/7051235.html
Copyright © 2011-2022 走看看