zoukankan      html  css  js  c++  java
  • 爬取乌云上所有人民币和乌云符号的漏洞(python脚本)

      1 import httplib
      2 from HTMLParser import HTMLParser
      3 import urlparse
      4 import urllib
      5 from bs4 import BeautifulSoup
      6 import re
      7 from time import sleep
      8 '''
      9 usage: 
     10 
     11 
     12 input your wooyun cookies,then just go crawl!!
     13 
     14 author: Elliott
     15 
     16 
     17 '''
     18 
     19 
     20 
     21 
     22 domain = 'wooyun.org'
     23 cookies = ''  # !!!!here   input your wooyun cookies
     24 user_agent = 'Mozilla/5.0 (Windows NT 6.1; WOW64; rv:29.0) Gecko/20100101 Firefox/29.0'
     25 
     26 
     27 
     28 def countnumber():  # the function to get page num
     29     global domain
     30     global cookies
     31     global user_agent
     32     conn = httplib.HTTPConnection(domain)
     33     conn.request('GET','/user.php?action=openbugs','',{'Cookie':cookies,'User-Agent':user_agent,'Referer':'http://wooyun.org/index.php','Host':'wooyun.org'})
     34     content = conn.getresponse()
     35     content = content.read()
     36     soup = BeautifulSoup(content)
     37     tag = soup.find_all('p',attrs={'class':'page'})
     38     if len(tag) == 0:
     39         tag = 'None'
     40     else:
     41         tag = str(tag[0])
     42     pattern = re.compile('>.*<a class="current">')
     43     result = pattern.findall(tag)
     44     if len(result) == 0:
     45         result = 'None'
     46     else:
     47         result = str(result[0])
     48     number = filter(str.isdigit, result)
     49     num = number[5:]  #get then total page number
     50     return int(num)
     51 
     52 
     53 def dealthepage(content):
     54     global domain
     55     global cookies
     56     global user_agent
     57     conn = httplib.HTTPConnection(domain)
     58     soup = BeautifulSoup(content)
     59     k = soup.find_all('a')
     60     item = k[27:47]
     61     pattern = re.compile('href="(.+?)"')
     62     hreaf = []
     63     for i in range(len(item)):
     64     ss = pattern.findall(str(item[i]))
     65         if len(ss) == 0:
     66             break
     67     hreaf.append(str(ss[0]))
     68     for i in hreaf:
     69         #sleep(0.5)
     70     conn.request('GET',i,'',{'Cookie':cookies,'User-Agent':user_agent,'Referer':'http://wooyun.org/index.php','Host':'wooyun.org'})
     71     content2 = conn.getresponse()
     72     content2 = content2.read()
     73     soup2 = BeautifulSoup(content2)
     74     imgtag = soup2.find_all(class_='credit')
     75     '''may be $ or cloud'''
     76     if len(imgtag) != 0:
     77         findcloud = re.compile('src="/images/credit.png"')
     78         findmoney = re.compile('src="/images/m(.?).png"')
     79         cloudnum = findcloud.findall(content2)
     80         moneylevel = findmoney.findall(content2)
     81         cloud = 0
     82         money = 0
     83         if len(cloudnum) != 0:
     84             if len(cloudnum) == 1:
     85                 cloud = 1
     86             if len(cloudnum) == 2:
     87                 cloud = 2
     88             if len(cloudnum) == 3:
     89                 cloud = 3
     90         if len(moneylevel) != 0:
     91             if len(moneylevel) == 1:
     92                 money = 1
     93             if len(moneylevel) == 2:
     94                 money = 2
     95             if len(moneylevel) == 3:
     96                 money = 3
     97         title = soup2.findAll(attrs={"class":"wybug_title"})
     98         if len(title) == 0:
     99             title = 'No Title'
    100         else:
    101             title = str(title[0])
    102         deltag = re.compile('r<[^>]+>')
    103         title = deltag.sub('',title)
    104         author = soup2.findAll(attrs={"class":"wybug_author"})
    105         if len(author) == 0:
    106             author = 'No name'
    107         else:
    108             author = str(author[0])
    109         author = deltag.sub('',author)
    110         date = soup2.findAll(attrs={"class":"wybug_date"})
    111         if len(date) == 0:
    112             date = 'No time'
    113         else:
    114             date = str(date[0])
    115         date = deltag.sub('',date)
    116         link = "http://www.wooyun.org"+i
    117         link = str(link)
    118         f = open("test.html","a+")
    119         s = "<tr><td>level:cloud="+str(cloud)+"money="+str(money)+"</td><th>"+date+"</th><td><a href='"+link+"'>"+title+"</a></td><th>"+author+"</th></tr><br>"
    120         f.write(s)
    121         f.close
    122 
    123 
    124 
    125 
    126 
    127 
    128 if __name__ == '__main__':
    129     num = countnumber()  #get page num
    130     for i in range(num):
    131         conn = httplib.HTTPConnection(domain)
    132         conn.request('GET','/user.php?action=openbugs&pNO='+str(i+1),'',{'Cookie':cookies,'User-Agent':user_agent,'Referer':'http://wooyun.org/index.php','Host':'wooyun.org'})
    133         content = conn.getresponse()
    134         content = content.read()
    135         dealthepage(content)
    136 
    137 
    138 
    139 
    140 
    141     

    附章效果图:

  • 相关阅读:
    boost::VS2017下编译和配置boost库
    cmake::编译一个工程
    cmake::helloworld
    python模块之urllib
    Python报错:UnicodeEncodeError 'gbk' codec can't encode character
    word页眉添加横线与删除横线
    练习五十九:字典排序
    练习五十八:列表的练习
    练习五十七:for循环 809??=800*?+9*?+1其中?代表的两位数,8*?的结果为两位数,9*?的结果为3位数。求?代表的两位数,及809??后的结果
    windows查看网络常用cmd命令
  • 原文地址:https://www.cnblogs.com/elliottc/p/4992700.html
Copyright © 2011-2022 走看看