zoukankan      html  css  js  c++  java
  • 用python导出mht文件中的JPG图片

    import os
    import sys
    import re
    import time
    import base64
    
    def extract_data(filename, dir):
    	file = open(filename, "r")
    	content = file.read()
    	index = 0
    	for match in re.finditer(r'(?=/9j)(.|\n)*?(?=(\n\n))', content):
    		index = index + 1
    		print("%s/%s.jpg" % (dir, index))
    		newfile = open("%s/%s.jpg" % (dir, index), "wb")
    		real = re.sub("\n", "", match.group())
    		length = len(real)
    		times = (4 - length % 4) % 4
    		i = 0
    		
    		while i < times:
    			real = "%sA===" % real
    			i = i + 1
    			
    		#newfile.write(real)	
    		newfile.write(base64.b64decode(real))
    		newfile.close()
    	file.close()
    	
    def export_file(file_path):
    	r  = re.search(r"(?<=\\|/).*", file_path)
    	file_name_ext = file_path
    	while r != None:	
    		r = r.group()
    		file_name_ext = r
    		r = re.search(r"(?<=\\|/).*", r)		
    	
    	pattern = "%s"  % file_name_ext
    	pattern = re.sub("\[", "\\\[", pattern)
    	pattern = re.sub("\]", "\\\]", pattern)
    	pattern = re.sub("\(", "\\\(", pattern)
    	pattern = re.sub("\)", "\\\)", pattern)
    	pattern = ".*?(?=%s)" % pattern
    	
    	path = re.search(pattern, file_path)
    	if path != None:
    		path = path.group()
    	else:
    		path = ""
    	dot = re.search(r"\.", file_name_ext)
    	file_name = file_name_ext
    	if dot != None:
    		file_name = re.search(r".*?(?=\.)", file_name_ext)
    		file_name = file_name.group()
    	else:
    		file_name = file_name + "_dir"
    		
    	full_path = path + file_name
    	bExist = os.path.exists(full_path)
    	if bExist != True:
    		os.mkdir(full_path)		
    	else:
    		print("The Directory \"%s\" Exists!\n" % full_path)
    	extract_data(file_path, full_path)
    	
    def main():
    	print(os.name)
    	
    	index = 0
    	for arg in sys.argv:
    		index += 1
    		if index == 1:
    			continue
    		if index == 2:
    			export_file(arg)
    	
    	if index == 1:
    		file_path = "E:\\Scripts\\test.mht"
    		export_file(file_path)
    		
    if __name__ == "__main__":
    	main()
    	print("just wait for 2 seconds!\n")
    	time.sleep(2)
    	
  • 相关阅读:
    关于cookie的一点知识
    一点简单的关于ASP.NET下载
    一个小小小问题
    复习linq
    拾遗一些关于存储过程
    一般处理程序(下)
    复习一下递归
    复习JS和jQuery
    一点关于Ajax和一个等待图标的显示
    C#开发BIMFACE系列14 服务端API之批量获取转换状态详情
  • 原文地址:https://www.cnblogs.com/LinuxHunter/p/1915311.html
Copyright © 2011-2022 走看看