代码:
1 # # -*- coding:utf-8 -*-
2 #读取docx中的文本代码示例
3 import docx
4 from win32com import client as wc
5 from pyhanlp import *
6 import time
7 import re
8 import eventlet#导入eventlet这个模块
9 import shutil
10 word = wc.Dispatch('Word.Application')
11 #将doc转成docx
12 def doSaveAas(doc_path,docx_path):
13 # time_start = time.time()
14 print("MMMMMMMMMMMMMMMMMMMMMMMMMMMMM")
15 eventlet.monkey_patch()#必须加这条代码
16 with eventlet.Timeout(10, False): # 设置超时时间为2秒
17 # print("*$$$$$$$$$$$$$$$$")
18 # time.sleep(2)
19 # print("^^^^^^^^^^^^")
20 doc = word.Documents.Open(doc_path) # 目标路径下的文件
21 # print("@@@@@@@@@@@@")
22 doc.SaveAs(docx_path, 12, False, "", True, "", False, False, False, False) # 转化后路径下的文件
23 doc.Close()
24 print("《《《《《《《《《《《《《《《《《《《《《《《《《《《《《")
25 # time_end = time.time()
26 # restart(time_start,time_end,20)
27 #单个文件:将相对路径转换乘绝对路径,同时调用转换文件进行转换,同时再顺便删除之前的文件
28 def File_doc2docx(file_path):
29 print("********************************************************************************************")
30 try:
31 print("文件路径:"+file_path)
32 file_last_name=re.split(r'.',file_path)
33 print(file_last_name)
34 if 'doc' in file_last_name :
35 doSaveAas(file_path,file_path+'x')
36 # os.remove(file_path)
37 except:
38 print()
39 #移动文件
40 def remove_file(Dir_path,To_dirpath):
41 i = 0
42 for file_name in os.listdir(Dir_path):
43 print("********************************************************************************************")
44 try:
45 print("文件名:" + file_name)
46 file_path = os.path.join(Dir_path, file_name)
47 print("文件后缀:" + os.path.splitext(file_name)[1])
48 if os.path.splitext(file_name)[1] == '.docx':
49 i = i + 1
50 abs_file_path = os.path.abspath(file_path)
51 abs_to_file_path=os.path.abspath(os.path.join(To_dirpath, file_name))
52 shutil.move(abs_file_path,abs_to_file_path)
53 print(i, " 绝对路径:" + abs_file_path)
54 print(i, " 目标绝对路径:" + abs_to_file_path)
55
56 except:
57 continue
58 if __name__ =="__main__":
59 File_doc2docx(r"E:Tomcatwebapps基于合同纠纷知识图谱构建及应用\upload原告.doc")