zoukankan      html  css  js  c++  java
  • 批量文本读取URL获取正常访问且保留对应IP

    #coding=utf-8 
    import sys 
    import requests 
    for i in range(3000,4999,1): 
      url = 'http://192.168.88.139:8888/20150602'+str(i)+'.html' 
      r = requests.get(url) 
      if r.status_code == 200: 
        print url 
        print r.content
    
     

     

    原文

    http://zone.wooyun.org/content/20885

    多线程+文本逐行读取+URL的IP转换+写入

    # -*-coding:utf-8-*-
    import os
    import sys
    import Queue
    import getopt
    import logging
    import requests
    import threading
    import time
    import socket
    print "start:" + (time.strftime("%H:%M:%S"))
    
    logging.basicConfig(
        level=logging.WARNING,
        format="[%(asctime)s] %(message)s"
    )
    
    class BatchThreads(threading.Thread):
        def __init__(self, queue):
            super(BatchThreads, self).__init__()
            self.queue = queue
    
        def run(self):
            while True:
                if self.queue.empty():
                    break
                else:
                    tempurl = self.queue.get()
                    #print tempurl
                    try:
                        url = 'http://'+tempurl
                        #print url
                        r = requests.get(url, timeout=5)
                        if r.status_code == 200 :
                            print url+' '+'access-comman:200'                       
                            #print tempurl
                            ip = socket.gethostbyname(tempurl)
                            #print ip
                            yes = open('yes.txt','a')
                            yes.write(url+'    ')
                            yes.write('    '+ip+'
    ')
                            yes.close()
    
                    except:
                        pass
                        print url+" error"
                        noaccess = open('noaccess.txt','a')
                        noaccess.write(url+'
    ')
                        noaccess.close()
    
    
    def batch_queue(_queue, _thread_number):
        with open('url-hz.txt') as f:
            urls = [line.strip() for line in f.readlines()]
        urls = set(filter(lambda url: url and not url.startswith("#"), urls))
        if urls:
            for url in urls:
                queue.put(url)
            if _thread_number > (queue.qsize() / 2):
                _thread_number = (queue.qsize())
    
            for _ in xrange(_thread_number):
                threads.append(BatchThreads(_queue))
    
            for t in threads:
                t.start()
            for t in threads:
                t.join()
    
    threads = []
    queue = Queue.Queue()
    thread_number = 20
    batch_queue(queue, thread_number)
    
    print"end:" + (time.strftime("%H:%M:%S"))
  • 相关阅读:
    【二分+字符串hs】[POI2000] 公共串
    【字符串匹配】【BKDRhash||KMP】
    【LCA】P4281 [AHOI2008]紧急集合 / 聚会
    【LCA专题】各种LCA求法
    【差分约束】POJ3159/LG P1993 小K的农场
    【差分约束】POJ1364/LG UVA515 king
    【差分约束】POJ1201/LG SP116 Intervals
    【差分约束】POJ3159 Candies
    【树形结构】LG P2052 [NOI2011]道路修建
    【拓扑排序+概率】LG P4316绿豆蛙的归宿
  • 原文地址:https://www.cnblogs.com/shellr00t/p/4629221.html
Copyright © 2011-2022 走看看