zoukankan      html  css  js  c++  java
  • wooyun本地数据抓取

    ----

    #-*-coding:utf-8-*-
    import re
    import urllib
    import MySQLdb
    import time
    from urllib import unquote
    
    
    def getHtml(url):
        page = urllib.urlopen(url)
        html = page.read()
        html = html.replace('
    ', '')
        html = html.replace('       ', ' ')
        html = html.replace('   ', '')
        html = html.replace('   ', '')
        #html = html.replace(' ','')
        return html
    
        return mylist
    
    
    def gettitle(mylist):
        reg = (r'<a href="/bugs/wooyun-.+">(.*?)</a></td>')
        listre = re.compile(reg)
        mytitle = re.findall(listre, mylist)
        return mytitle
    
    
    def getoper(html):
        reg = (r'/whitehats/(.*?)">')
        listre = re.compile(reg)
        mylist = re.findall(listre, html)
        return mylist[0]
    #-------------------------------------------------
    
    
    def GetTitle(html):
        reg = (r"漏洞标题:(.*?)</h3>")
        listre = re.compile(reg)
        mylist = re.findall(listre, html)
        return mylist
    
    def BugNum(html):
        reg = (r'http://wooyun.org/bugs/(.*?)">查看原始来源')
        listre = re.compile(reg)
        mylist = re.findall(listre, html)
        return mylist
    
    def JiaFang(html):
        reg = (r'http://www.wooyun.org/corps/(.*?)">')
        listre = re.compile(reg)
        mylist = re.findall(listre, html)
        return mylist
    
    
    def SubmitTime(html):
        reg = (r"提交时间:(.*?)</h3>")
        listre = re.compile(reg)
        mylist = re.findall(listre, html)
        return mylist
    
    
    def OpenTime(html):
        reg = (r"公开时间:(.*?)</h3>")
        listre = re.compile(reg)
        mylist = re.findall(listre, html)
        return mylist
    
    
    def BugClass(html):
        reg = (r"漏洞类型:(.*?)</h3>")
        listre = re.compile(reg)
        mylist = re.findall(listre, html)
        return mylist
    
    
    def level(html):
        reg = (r"危害等级:(.*?)</h3>")
        listre = re.compile(reg)
        mylist = re.findall(listre, html)
        return mylist
    
    
    def BugState(html):
        reg = (r"漏洞状态:(.*?)</h3>")
        listre = re.compile(reg)
        mylist = re.findall(listre, html)
        mylist = mylist[0].strip()
        return mylist
    
    
    def BugSave(html):
        reg = (r'<a id="collection_num">(.*?)</a>人收藏')
        listre = re.compile(reg)
        mylist = re.findall(listre, html)
        return mylist
    
    
    def OkTime(html):
        reg = (r"确认时间:(.*?)</p>")
        listre = re.compile(reg)
        mylist = re.findall(listre, html)
        return mylist
    
    def Bugrank(html):
        reg = (r"漏洞Rank:(.*?)</p>")
        listre = re.compile(reg)
        mylist = re.findall(listre, html)
        return mylist
    
    def BugMark(html):
        reg = (r"Tags标签:(.*?)</h3>")
        listre = re.compile(reg)
        mylist = re.findall(listre, html)
        return mylist
    
    def ignoreTime(html):
        reg = (r"忽略时间:(.*?)</p>")
        listre = re.compile(reg)
        mylist = re.findall(listre, html)
        if len(mylist)!=0:
            mylist=mylist
        else:
            mylist='1900-01-01 00:00:00'
        return mylist
    
    def Bugeye(html):
        reg = (r'<span id="attention_num">(.*)</span>')
        listre = re.compile(reg)
        mylist = re.findall(listre, html)
        mylist = mylist[0].strip()
        return mylist
    
    
    conn= MySQLdb.connect(
            host='192.168.1.1',
            port = 3306,
            user='root',
            passwd='root',
            db ='wooyunTongji',
            charset='utf8'
            )
    
    
    mark = 0
    
    for i in range(53022, 89250, 1):
        try:
            Url = 'http://192.168.1.106/wooyun/select.php?id='+str(i)
            Html = getHtml(Url)
            Htmleye = getHtmleye(Url)
        except:
            print 'error'
        if len(Html)>100:
            
            if len(OkTime(Html)) == 0:
                whotime = ignoreTime(Html)[0].strip()
                whostyle = '忽略'
            else:
                whotime = OkTime(Html)[0].strip()
                whostyle = '确认'
            
            if len(Bugrank(Html)) < 1:
                BugrankFal='0'
            else:
                BugrankFal=Bugrank(Html)[0]
            
            print GetTitle(Html)[0].strip() 
                ,BugNum(Html)[0].strip() 
                ,unquote(JiaFang(Html)[0].strip()) 
                ,unquote(getoper(Html)) 
                ,SubmitTime(Html)[0].strip() 
                ,OpenTime(Html)[0].strip() 
                ,BugClass(Html)[0].strip() 
                ,level(Html)[0] 
                ,BugrankFal
                ,BugState(Html) 
                ,whotime 
                ,whostyle 
                ,BugMark(Html)[0].strip()
    
            #f = open('wooyunlist.txt', 'a')
            mark += 1
            #f.close()
            cur = conn.cursor()
            mysql1='insert into alldata (id,title,BugNum,jiafang,oper,submittime,opentime,bugclass,level,bugrank,bugstate,oktime,okstyle,bugmark) values(%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s)'
            cur.execute(mysql1,(mark,GetTitle(Html)[0].strip(),BugNum(Html)[0].strip(),unquote(JiaFang(Html)[0].strip()),unquote(getoper(Html)),SubmitTime(Html)[0].strip(),OpenTime(Html)[0].strip(),BugClass(Html)[0].strip(),level(Html)[0],BugrankFal,BugState(Html),whotime,whostyle,BugMark(Html)[0].strip()))
    
            cur.close()
            conn.commit()
            print mark
    
    
    conn.close()
    
    
    print('Over!')
    

      

  • 相关阅读:
    System.IO.StreamWriter
    C# XML
    C# 泛型
    ACM将一个十进制的数转换为一个十六进制的数输出
    ACM打扫花园问题
    ACM用N个正方体来建造金字塔问可以建造多少层
    输入一串数字找出其中缺少的最小的两个数
    高效算法——B 抄书 copying books,uva714
    暴力求解——POJ 3134Power Calculus
    暴力求解——打表,暴力
  • 原文地址:https://www.cnblogs.com/crac/p/5748308.html
Copyright © 2011-2022 走看看