zoukankan      html  css  js  c++  java
  • 改进 网站资源探测工具(添加代理)

    对网站资源探测工具进行调整,并且添加代理,防止多次访问ip被封的情况。

    #获取代理,并写入agents列
    def agent_list(url):
        global agent_lists
        agent_lists = []
        header = {'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:57.0) Gecko/20100101 Firefox/57.0'}
        r = requests.get(url,headers = header)
        agent_info = BeautifulSoup(r.content,'html.parser').find(id = "ip_list").find_all('tr')[1:]
        for i in range(len(agent_info)):
            info = agent_info[i].find_all('td')
            agents = {info[5].string : 'http://' + info[1].string}
            agent_lists.append(agents)

    修改后的 网站资源扫描工具:

    #!/usr/bin/env python3
    # -*- coding: utf-8 -*-
    # by 默不知然 2018-03-15
    
    import threading
    from threading import Thread
    from bs4 import BeautifulSoup
    import time
    import sys
    import requests
    import getopt
    import random
    
    #创建类,并对目标网站发起请求
    class scan_thread (threading.Thread):
        global real_url_list
        real_url_list = []
        def __init__(self,url):
            threading.Thread.__init__(self)
            self.url = url
    
        def run(self):
            try:
                proxy = random.sample(agent_lists,1)[0]
                r = requests.get(self.url, proxies = proxy)
                print(self.url,'------->>',str(r.status_code))
                if int(r.status_code) == 200:
                    real_url_list.append(self.url)
                l[0] = l[0] - 1
            except Exception as e:
                print(e)
    
    #获取字典并构造url并声明扫描线程
    def url_makeup(dicts,url,threshold):
        global url_list
        global l
        url_list = []
        l =[]
        l.append(0)
        dic = str(dicts)
        with open (dic,'r') as f:
            code_list = f.readlines()
            for i in code_list:
                url_list.append(url+i.replace('
    ','').replace('
    ',''))
        while len(url_list):
            try:
                if l[0] < threshold:
                    n = url_list.pop(0)
                    l[0] = l[0] + 1
                    thread = scan_thread(n)
                    thread.start()
            except KeyboardInterrupt:
                print('用户停止了程序,完成目录扫描。')
                sys.exit()
    
    #获取输入参数
    def get_args():
        global get_url
        global get_dicts
        global get_threshold
        try:
            options,args = getopt.getopt(sys.argv[1:],"u:f:n:")
        except getopt.GetoptError:
            print("错误参数")
            sys.exit()
        for option,arg in options:
            if option == '-u':
                get_url = arg
            if option == '-f':
                get_dicts = arg
            if option == '-n':
                get_threshold = int(arg)
    #获取代理,并写入agents列
    def agent_list(url):
        global agent_lists
        agent_lists = []
        header = {'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:57.0) Gecko/20100101 Firefox/57.0'}
        r = requests.get(url,headers = header)
        agent_info = BeautifulSoup(r.content,'html.parser').find(id = "ip_list").find_all('tr')[1:]
        for i in range(len(agent_info)):
            info = agent_info[i].find_all('td')
            agents = {info[5].string : 'http://' + info[1].string}
            agent_lists.append(agents)
    
    #主函数,运行扫描程序
    def main():
        agent_url = 'http://www.xicidaili.com/nn/1'
        agent_list(agent_url)
        get_args()
        url = get_url
        dicts = get_dicts
        threshold = get_threshold
        url_makeup(dicts,url,threshold)
        time.sleep(0.5)
        print('目标网站存在目录: ','
    ', real_url_list)
        with open(r'网站目录扫描结果.txt','w') as f:
            for i in real_url_list:
                f.write(i)
                f.write('
    ')
    
    if __name__ == '__main__':
        main()

    对某网站扫描结果:

  • 相关阅读:
    asp.net Ajax调用Aspx后台方法
    JS 通过字符串取得对应对象
    nginx js、css、图片 及 一些静态文件中出现 http://upstreamname:port 导致部分网页样式显示不正常
    jexus手动跨域设置
    HTTP Error 400. The request hostname is invalid
    at MySql.Data.MySqlClient.MySqlStream.ReadPacket 或 FUNCTION account.AddMinutes does not exist
    sql xml 入门
    Jexus .Net at System.Net.Sockets.Socket.Connect (System.Net.IPAddress[] addresses, System.Int32 port)
    关于SQL SERVER中的FLOAT转换为VARCHAR
    JS倒计时
  • 原文地址:https://www.cnblogs.com/kaiho/p/8578848.html
Copyright © 2011-2022 走看看