zoukankan      html  css  js  c++  java
  • Python抓取淘宝IP地址数据

    def fetch(ip):
        url = 'http://ip.taobao.com/service/getIpInfo.php?ip=' + ip
        result = []
        try:
            response = urllib.urlopen(url).read()
            jsondata = json.loads(response)
            if jsondata[u'code'] == 0:
                result.append(jsondata[u'data'][u'ip'].encode('utf-8'))            
                result.append(jsondata[u'data'][u'country'].encode('utf-8'))
                result.append(jsondata[u'data'][u'country_id'].encode('utf-8'))
                result.append(jsondata[u'data'][u'area'].encode('utf-8'))
                result.append(jsondata[u'data'][u'area_id'].encode('utf-8'))
                result.append(jsondata[u'data'][u'region'].encode('utf-8'))
                result.append(jsondata[u'data'][u'region_id'].encode('utf-8'))
                result.append(jsondata[u'data'][u'city'].encode('utf-8'))
                result.append(jsondata[u'data'][u'city_id'].encode('utf-8'))
                result.append(jsondata[u'data'][u'county'].encode('utf-8'))
                result.append(jsondata[u'data'][u'county_id'].encode('utf-8'))
                result.append(jsondata[u'data'][u'isp'].encode('utf-8'))
                result.append(jsondata[u'data'][u'isp_id'].encode('utf-8'))            
            else:
                return 0, result
        except:
            logging.exception("Url open failed:" + url)
            return 0, result
        return 1, result
     
    def worker(ratelimit, jobs, results, progress):
        global cancel
        while not cancel:
            try:
                ratelimit.ratecontrol()
                ip = jobs.get(timeout=2) # Wait 2 seconds
                ok, result = fetch(ip)
                if not ok:
                    logging.error("Fetch information failed, ip:{}".format(ip))
                    progress.put("") # Notify the progress even it failed
                elif result is not None:
                    results.put(" ".join(result))
                jobs.task_done()    # Notify one item
            except Queue.Empty:
                pass
            except:
                logging.exception("Unknown Error!")
    def process(target, results, progress):
        global cancel
        while not cancel:
            try:
                line = results.get(timeout=5)
            except Queue.Empty:
                pass
            else:
                print >>target, line
                progress.put("")
                results.task_done()
    def progproc(progressbar, count, progress):
        """
        Since ProgressBar is not a thread-safe class, we use a Queue to do the counting job, like
        two other threads. Use this thread do the printing of progress bar. By the way, it will
        print to stderr, which does not conflict with the default result output(stdout).
        """
        idx = 1
        while True:
            try:
                progress.get(timeout=5)
            except Queue.Empty:
                pass
            else:
                progressbar.update(idx)
                idx += 1
  • 相关阅读:
    Tomcat造成404
    ajax缺少@ResponseBody注解前台404,业务可以运行
    几种常见的Runtime Exception
    SQL注入通俗讲解
    MYSQL数据库导入大数据量sql文件失败的解决方案
    css选择器
    http端口
    基础算法之最大子列求和问题
    基础算法之链表逆序
    Prolog&Epilog
  • 原文地址:https://www.cnblogs.com/chenjingyi/p/5794736.html
Copyright © 2011-2022 走看看