----
#-*-coding:utf-8-*- import re import urllib import MySQLdb import time from urllib import unquote def getHtml(url): page = urllib.urlopen(url) html = page.read() html = html.replace(' ', '') html = html.replace(' ', ' ') html = html.replace(' ', '') html = html.replace(' ', '') #html = html.replace(' ','') return html return mylist def gettitle(mylist): reg = (r'<a href="/bugs/wooyun-.+">(.*?)</a></td>') listre = re.compile(reg) mytitle = re.findall(listre, mylist) return mytitle def getoper(html): reg = (r'/whitehats/(.*?)">') listre = re.compile(reg) mylist = re.findall(listre, html) return mylist[0] #------------------------------------------------- def GetTitle(html): reg = (r"漏洞标题:(.*?)</h3>") listre = re.compile(reg) mylist = re.findall(listre, html) return mylist def BugNum(html): reg = (r'http://wooyun.org/bugs/(.*?)">查看原始来源') listre = re.compile(reg) mylist = re.findall(listre, html) return mylist def JiaFang(html): reg = (r'http://www.wooyun.org/corps/(.*?)">') listre = re.compile(reg) mylist = re.findall(listre, html) return mylist def SubmitTime(html): reg = (r"提交时间:(.*?)</h3>") listre = re.compile(reg) mylist = re.findall(listre, html) return mylist def OpenTime(html): reg = (r"公开时间:(.*?)</h3>") listre = re.compile(reg) mylist = re.findall(listre, html) return mylist def BugClass(html): reg = (r"漏洞类型:(.*?)</h3>") listre = re.compile(reg) mylist = re.findall(listre, html) return mylist def level(html): reg = (r"危害等级:(.*?)</h3>") listre = re.compile(reg) mylist = re.findall(listre, html) return mylist def BugState(html): reg = (r"漏洞状态:(.*?)</h3>") listre = re.compile(reg) mylist = re.findall(listre, html) mylist = mylist[0].strip() return mylist def BugSave(html): reg = (r'<a id="collection_num">(.*?)</a>人收藏') listre = re.compile(reg) mylist = re.findall(listre, html) return mylist def OkTime(html): reg = (r"确认时间:(.*?)</p>") listre = re.compile(reg) mylist = re.findall(listre, html) return mylist def Bugrank(html): reg = (r"漏洞Rank:(.*?)</p>") listre = re.compile(reg) mylist = re.findall(listre, html) return mylist def BugMark(html): reg = (r"Tags标签:(.*?)</h3>") listre = re.compile(reg) mylist = re.findall(listre, html) return mylist def ignoreTime(html): reg = (r"忽略时间:(.*?)</p>") listre = re.compile(reg) mylist = re.findall(listre, html) if len(mylist)!=0: mylist=mylist else: mylist='1900-01-01 00:00:00' return mylist def Bugeye(html): reg = (r'<span id="attention_num">(.*)</span>') listre = re.compile(reg) mylist = re.findall(listre, html) mylist = mylist[0].strip() return mylist conn= MySQLdb.connect( host='192.168.1.1', port = 3306, user='root', passwd='root', db ='wooyunTongji', charset='utf8' ) mark = 0 for i in range(53022, 89250, 1): try: Url = 'http://192.168.1.106/wooyun/select.php?id='+str(i) Html = getHtml(Url) Htmleye = getHtmleye(Url) except: print 'error' if len(Html)>100: if len(OkTime(Html)) == 0: whotime = ignoreTime(Html)[0].strip() whostyle = '忽略' else: whotime = OkTime(Html)[0].strip() whostyle = '确认' if len(Bugrank(Html)) < 1: BugrankFal='0' else: BugrankFal=Bugrank(Html)[0] print GetTitle(Html)[0].strip() ,BugNum(Html)[0].strip() ,unquote(JiaFang(Html)[0].strip()) ,unquote(getoper(Html)) ,SubmitTime(Html)[0].strip() ,OpenTime(Html)[0].strip() ,BugClass(Html)[0].strip() ,level(Html)[0] ,BugrankFal ,BugState(Html) ,whotime ,whostyle ,BugMark(Html)[0].strip() #f = open('wooyunlist.txt', 'a') mark += 1 #f.close() cur = conn.cursor() mysql1='insert into alldata (id,title,BugNum,jiafang,oper,submittime,opentime,bugclass,level,bugrank,bugstate,oktime,okstyle,bugmark) values(%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s)' cur.execute(mysql1,(mark,GetTitle(Html)[0].strip(),BugNum(Html)[0].strip(),unquote(JiaFang(Html)[0].strip()),unquote(getoper(Html)),SubmitTime(Html)[0].strip(),OpenTime(Html)[0].strip(),BugClass(Html)[0].strip(),level(Html)[0],BugrankFal,BugState(Html),whotime,whostyle,BugMark(Html)[0].strip())) cur.close() conn.commit() print mark conn.close() print('Over!')