1 #!/usr/bin/env python3 2 3 # -*- coding: UTF-8 -*- 4 5 from bs4 import BeautifulSoup 6 import operator 7 import os,shutil 8 import re 9 10 def processhtml(item): 11 html_path = item 12 with open(html_path) as fp: 13 soup = BeautifulSoup(fp, "html.parser") 14 return soup 15 16 def IsComputer(soup_arg): 17 soup = soup_arg 18 result = False 19 try: 20 value = soup.find('input', {'name':'资源类型'}).get('value') 21 if value == '主机': 22 print('资源类型:主机') 23 result = True 24 elif value == '数据库': 25 print('资源类型:数据库') 26 else: 27 print('资源类型:其他') 28 except: 29 pass 30 return result 31 32 def IsAgree(soup_arg): 33 soup = soup_arg 34 result = False 35 try: 36 for row in soup.findAll('tr'): 37 cells = row.findAll('td') 38 if len(cells) == 4: 39 if cells[1].findChild("font") != None: 40 nStr = "" 41 nStr = nStr.join(cells[0].string) 42 target = ['帐号管理人员处理'] 43 if (operator.eq(nStr.split(), target)): 44 print(cells[1].font.string) 45 if (operator.eq(nStr.split(), target) and cells[1].font.string == '同意'): 46 print("满足条件为:%s && 审批意见(同意)" % nStr.split()[0]) 47 result = True 48 except IndexError as e: 49 pass 50 return result 51 52 def IsIntersect(soup_arg): 53 soup = soup_arg 54 result = False 55 try: 56 value = soup.find('input', {'name':'239385_资源名称'}).get('value') 57 temp_list = re.split('[、: ]', value) 58 hosts_list = [] 59 hosts_list.clear() 60 for hostlist in temp_list: 61 if re.search('[a-z]', hostlist): 62 print(hostlist) 63 hosts_list.append(hostlist) 64 hosts_set = set(hosts_list) 65 if target_hosts.intersection(hosts_set): 66 print('非空,有交集') 67 result = True 68 else: 69 print("空,无交集") 70 except: 71 pass 72 return result 73 74 def IsIntersect2(soup_arg): 75 soup = soup_arg 76 result = False 77 try: 78 value = soup.find('input', {'name':'所在的硬件设备/软件平台'}).get('value') 79 temp_list = re.split('[、: ]', value) 80 hosts_list = [] 81 hosts_list.clear() 82 for hostlist in temp_list: 83 if re.search('[a-z]', hostlist): 84 hosts_list.append(hostlist) 85 hosts_set = set(hosts_list) 86 if target_hosts.intersection(hosts_set): 87 print('非空,有交集') 88 result = True 89 else: 90 print("空,无交集") 91 except: 92 pass 93 return result 94 95 if __name__ == '__main__': 96 target_hosts = {'cmszsoaa', 'cmszsoab', 'cmszdcss', 'cmszicss', 'cmsznpsa', 'cmsznpsb', 'cmszinta', 'cmszintb', 97 'cmszdpsa', 'cmszdpsb', 'mcbsoaa', 'mcbsoab', 'mcbinta', 'mcbintb', 'mcbdpsa', 'mcbdpsb', 98 'mcbnpsa', 'mcbnpsb', 'mcbdcss', 'mcbicss', 'newdcss', 'newicss'} 99 100 work_dir = '/root/XmlOut/' 101 target_dir = '/root/AccountOut/' 102 103 for parent, dirnames, filenames in os.walk(work_dir, followlinks=True): 104 for filename in filenames: 105 file_path = os.path.join(parent, filename) 106 print("filename with full path: %s" % file_path) 107 soup = processhtml(file_path) 108 flag1 = IsComputer(soup) 109 flag2 = IsAgree(soup) 110 flag3 = IsIntersect(soup) 111 flag4 = IsIntersect2(soup) 112 if (flag1 and flag2 and (flag3 or flag4)): 113 print('%s, ok----' % (file_path)) 114 shutil.copy(file_path, target_dir)