zoukankan      html  css  js  c++  java
  • .doc 2 .docx可用代码

      1 # # -*- coding:utf-8 -*-
      2 #读取docx中的文本代码示例
      3 import docx
      4 from win32com import client as wc
      5 from pyhanlp import *
      6 import time
      7 import eventlet#导入eventlet这个模块
      8 import shutil
      9 word = wc.Dispatch('Word.Application')
     10 '''重启计时器'''
     11 def restart(time_start,time_end,sub_deadline):
     12     #E:pycharmWorkPlaceGraduation_projectUtilfile_process.py
     13     time_sub = time_end - time_start
     14 
     15     i = 0
     16     print("NJNNNNNNNNN",time_sub,"NNNNNNNNNNNNNNN")
     17     if (time_sub > sub_deadline):
     18         str = "CHCP 65001" + "&&"
     19         str += "E:" + "&&"
     20         str += r"cd E:pycharmWorkPlaceGraduation_projectUtil" + "&&"
     21         str += "python file_process.py "
     22         # print(str)
     23         print("TIME______", i, "________", time_sub)
     24         i = i + 1
     25         cmd = os.system(str)
     26         # print(cmd)
     27 '''文件操作'''
     28 #将doc转成docx
     29 def doSaveAas(doc_path,docx_path):
     30     # time_start = time.time()
     31     print("MMMMMMMMMMMMMMMMMMMMMMMMMMMMM")
     32     eventlet.monkey_patch()#必须加这条代码
     33     with eventlet.Timeout(10, False):  # 设置超时时间为2秒
     34         # print("*$$$$$$$$$$$$$$$$")
     35         # time.sleep(2)
     36         # print("^^^^^^^^^^^^")
     37         doc = word.Documents.Open(doc_path)  # 目标路径下的文件
     38         # print("@@@@@@@@@@@@")
     39         doc.SaveAs(docx_path, 12, False, "", True, "", False, False, False, False)  # 转化后路径下的文件
     40         doc.Close()
     41         print("《《《《《《《《《《《《《《《《《《《《《《《《《《《《《")
     42         # time_end = time.time()
     43         # restart(time_start,time_end,20)
     44 
     45 #将相对路径转换乘绝对路径,同时调用转换文件进行转换,同时再顺便删除之前的文件
     46 def Dir_doc2docx(Dir_path):
     47     i=0
     48     for file_name in os.listdir(Dir_path):
     49 
     50         print("********************************************************************************************")
     51         try:
     52             print("文件名:"+file_name)
     53             file_path = os.path.join(Dir_path, file_name)
     54             print("文件后缀:"+os.path.splitext(file_name)[1] )
     55             if os.path.splitext(file_name)[1] == '.doc':
     56                 i = i + 1
     57                 abs_file_path=os.path.abspath(file_path)
     58                 print(i," 绝对路径:"+abs_file_path)
     59                 doSaveAas(abs_file_path,abs_file_path+'x')
     60                 os.remove(file_path)
     61 
     62         except:
     63             continue
     64 
     65 def Get_num_file_end(Dir_path,end):
     66     i=0
     67     for file_name in os.listdir(Dir_path):
     68         print("********************************************************************************************")
     69         try:
     70             if os.path.splitext(file_name)[1] == end:
     71                 i=i+1
     72         except:
     73             continue
     74     return i
     75 #获取文件值
     76 def Get_file_value(Dir_path,file_name):
     77     paragraph_id=[]
     78     paragraph_value=[]
     79     file_path = os.path.join(Dir_path, file_name)
     80     file = docx.Document(file_path)
     81     # 输出段落编号及段落内容
     82     for i in range(len(file.paragraphs)):
     83         paragraph_id.append(i)
     84         paragraph_value.append(file.paragraphs[i].text.strip().replace(u'u3000', u'').replace(u'xa0', u'').replace(' ', ''))
     85     return paragraph_id,paragraph_value
     86 #移动文件
     87 def remove_file(Dir_path,To_dirpath):
     88     i = 0
     89     for file_name in os.listdir(Dir_path):
     90         print("********************************************************************************************")
     91         try:
     92             print("文件名:" + file_name)
     93             file_path = os.path.join(Dir_path, file_name)
     94             print("文件后缀:" + os.path.splitext(file_name)[1])
     95             if os.path.splitext(file_name)[1] == '.docx':
     96                 i = i + 1
     97                 abs_file_path = os.path.abspath(file_path)
     98                 abs_to_file_path=os.path.abspath(os.path.join(To_dirpath, file_name))
     99                 shutil.move(abs_file_path,abs_to_file_path)
    100                 print(i, " 绝对路径:" + abs_file_path)
    101                 print(i, " 目标绝对路径:" + abs_to_file_path)
    102 
    103         except:
    104             continue
    105 if __name__ =="__main__":
    106     print("AAAAAAAAA")
    107     # a=Get_num_doc("D:ATESTjie")
    108     Dir_doc2docx("D:ATESTjie")
    109     # remove_file(r"D:ATESTjie", r"D:ATEST	ojie")
    110     print("LLLLLLL")
    111     # word.Quit()
  • 相关阅读:
    300万PV的ASP.NET网站使用阿里云的配置建议团队
    上周热点回顾(11.4-11.10)团队
    寻人启事:写得一手好代码的你在哪里?团队
    上周热点回顾(10.28-11.3)团队
    上周热点回顾(10.21-10.27)团队
    上周热点回顾(10.14-10.20)团队
    上周热点回顾(10.7-10.13)团队
    Elasticsearch之sense插件的安装(图文详解)
    Kibana里No Marvel Data Found问题解决(图文详解)
    Squirrel的安装(windows上Phoneix可视化工具)
  • 原文地址:https://www.cnblogs.com/smartisn/p/14408423.html
Copyright © 2011-2022 走看看