zoukankan      html  css  js  c++  java
  • 批量文本读取URL获取正常访问且保留对应IP

    #coding=utf-8 
    import sys 
    import requests 
    for i in range(3000,4999,1): 
      url = 'http://192.168.88.139:8888/20150602'+str(i)+'.html' 
      r = requests.get(url) 
      if r.status_code == 200: 
        print url 
        print r.content
    
     

     

    原文

    http://zone.wooyun.org/content/20885

    多线程+文本逐行读取+URL的IP转换+写入

    # -*-coding:utf-8-*-
    import os
    import sys
    import Queue
    import getopt
    import logging
    import requests
    import threading
    import time
    import socket
    print "start:" + (time.strftime("%H:%M:%S"))
    
    logging.basicConfig(
        level=logging.WARNING,
        format="[%(asctime)s] %(message)s"
    )
    
    class BatchThreads(threading.Thread):
        def __init__(self, queue):
            super(BatchThreads, self).__init__()
            self.queue = queue
    
        def run(self):
            while True:
                if self.queue.empty():
                    break
                else:
                    tempurl = self.queue.get()
                    #print tempurl
                    try:
                        url = 'http://'+tempurl
                        #print url
                        r = requests.get(url, timeout=5)
                        if r.status_code == 200 :
                            print url+' '+'access-comman:200'                       
                            #print tempurl
                            ip = socket.gethostbyname(tempurl)
                            #print ip
                            yes = open('yes.txt','a')
                            yes.write(url+'    ')
                            yes.write('    '+ip+'
    ')
                            yes.close()
    
                    except:
                        pass
                        print url+" error"
                        noaccess = open('noaccess.txt','a')
                        noaccess.write(url+'
    ')
                        noaccess.close()
    
    
    def batch_queue(_queue, _thread_number):
        with open('url-hz.txt') as f:
            urls = [line.strip() for line in f.readlines()]
        urls = set(filter(lambda url: url and not url.startswith("#"), urls))
        if urls:
            for url in urls:
                queue.put(url)
            if _thread_number > (queue.qsize() / 2):
                _thread_number = (queue.qsize())
    
            for _ in xrange(_thread_number):
                threads.append(BatchThreads(_queue))
    
            for t in threads:
                t.start()
            for t in threads:
                t.join()
    
    threads = []
    queue = Queue.Queue()
    thread_number = 20
    batch_queue(queue, thread_number)
    
    print"end:" + (time.strftime("%H:%M:%S"))
  • 相关阅读:
    HTTP——Web服务器、代理、缓存
    nginx配置文件详解2
    nginx配置文件详解
    shell笔记2
    django笔记
    python 发请求,urllib,urllib2
    nginx配置
    python os模块学习
    mac 终端命令小结
    mac常用命令笔记
  • 原文地址:https://www.cnblogs.com/shellr00t/p/4629221.html
Copyright © 2011-2022 走看看