zoukankan      html  css  js  c++  java
  • 手动修改key 伪修改内存变量

    # -*- coding: UTF-8 -*-
    import math
    import random
    import sys
    import threading
    import time
    from time import ctime, sleep
    import requests
    import xlrd
    
    target_citycode_list = ['010', '021', '020', '0755']
    
    # target_citycode_list = ['0755']
    adcode_dic = {}
    FEXCEL = '高德地图API_城市编码对照表.xlsx'
    data = xlrd.open_workbook(FEXCEL)
    table = data.sheets()[1]
    nrows = table.nrows
    ncols = table.ncols
    for i in range(0, nrows):
        l = table.row_values(i)
        name_ = l[0]
        adcode = l[1]
        citycode = l[2]
        if citycode in target_citycode_list:
            if citycode not in adcode_dic:
                adcode_dic[citycode] = {}
            adcode_dic[citycode][adcode] = {}
            adcode_dic[citycode][adcode]['name'] = name_
            adcode_dic[citycode][adcode]['adcode'] = adcode
    
    REQUEST_LIST = []
    for i in adcode_dic:
        for ii in adcode_dic[i]:
            REQUEST_LIST.append(adcode_dic[i][ii]['adcode'])
    REQUEST_LEN = len(REQUEST_LIST)
    EACH_THREAD_REQUEST_NUM = 1
    
    MAX_PAGINATION = 100
    QPS = 50
    QPS_TIME_UNIT = 1
    # http://lbs.amap.com/api/webservice/guide/tools/info
    INFOCODE_OK = '10000'
    KEY_POOL_LIST = []
    touse_key = ''
    
    
    def dynamic_write_pool_file():
        global KEY_POOL_LIST
        file_name_key_pool = 'key_pool.pool'
        keypoollist_old = KEY_POOL_LIST
        KEY_POOL_LIST = []
        f = open(file_name_key_pool, 'r', encoding='utf-8')
        KEY_POOL_LIST = []
        for i in f:
            try:
                key = i.split('	')[1].split()
                KEY_POOL_LIST.append(key[0])
            except Exception:
                print(Exception)
        f.closed
        d1 = keypoollist_old.reverse()
        d2 = KEY_POOL_LIST.reverse()
        print(63, d1)
        print(64, d2)
        if d1 == d2:
            print(time.time(), '-old')
        else:
            print(time.time(), '66POOL-new')
            # if (d1>d2)-(d1<d2) == 0:
            #     print('64POOL-new')
            # else:
            #     print('66POOL-old')
            # # if cmp(KEY_POOL_LIST_old.reverse(),KEY_POOL_LIST.reverse())==0:
            #     print('64POOL-new')
            # else:
            #     print('66POOL-new')
            # if KEY_POOL_LIST == KEY_POOL_LIST_old:
            #     pass
            # else:
            #     print('POOL-new')
    
    
    dynamic_write_pool_file()
    
    URL_TYPE = 'http://restapi.amap.com/v3/place/text'
    touse_key = ''
    # keywords = '&keywords='
    OFFSET_NUM = 24
    OFFSET = '&offset=%s' % (OFFSET_NUM)
    CITYLIMIT = '&citylimit=true'
    EXTENTION = '&extention=all'
    
    # 120000    商务住宅    商务住宅相关    商务住宅相关
    # 120100    商务住宅    产业园区    产业园区
    # 120200    商务住宅    楼宇    楼宇相关
    # 120201    商务住宅    楼宇    商务写字楼
    # 120202    商务住宅    楼宇    工业大厦建筑物
    # 120203    商务住宅    楼宇    商住两用楼宇
    # 120300    商务住宅    住宅区    住宅区
    # 120301    商务住宅    住宅区    别墅
    # 120302    商务住宅    住宅区    住宅小区
    # 120303    商务住宅    住宅区    宿舍
    # 120304    商务住宅    住宅区    社区中心
    
    # 由于高德至多返回1000条,所以type值以最小粒度请求,逐个请求
    POI_TYPES_LIST = ['120000', '120100', '120200', '120201', '120202', '120203', '120300', '120301', '120302', '120303',
                      '120304']
    # POI_TYPES = '&types=120000|120100|120200|120201|120202120203|120300|120301|120302|120303|120304'
    
    URL_FOR_CHANGE_KEY = 'http://restapi.amap.com/v3/place/text?key=%s&types=060100&city=010&OFFSET=1'
    change_key_qps = 0
    
    
    def change_key():
        global touse_key, change_key_qps, KEY_POOL_LIST
    
        dynamic_write_pool_file()
        # 高德没有遵守自己的QPS/日限策略;所不能通过其返回码,来控制key的使用;
        pool_num = len(KEY_POOL_LIST)
        mean_use_key = random.randint(0, pool_num)
        for i in range(mean_use_key, pool_num, 1):
            key = KEY_POOL_LIST[i]
            if key == touse_key:
                if i == pool_num:
                    change_key()
                    return
                else:
                    continue
            touse_key = key
            url = URL_FOR_CHANGE_KEY % (touse_key)
            try:
                change_key_qps += 1
                if change_key_qps % QPS == 0:
                    sleep(QPS_TIME_UNIT)
                r = requests.get(url)
                json_ = r.json()
            except Exception:
                print('requests.get(url)', Exception)
                change_key()
                return
            infocode = json_['infocode']
            if not infocode == INFOCODE_OK:
                if i == pool_num:
                    sys.exit('NOInvalidKEY')
                change_key()
                return
            return
    
    
    requests_counter = 0
    todo_list = REQUEST_LIST
    
    # {adcode:[[],[]]}
    tosupply_dic = {}
    
    
    def supply_dic(request):
        global tosupply_dic, requests_counter, todo_list, touse_key, POI_TYPES_LIST, OFFSET_NUM
        if requests_counter == 0:
            change_key()
        for type in POI_TYPES_LIST:
            url = '%s?key=%s&city=%s&type=%s%s%s' % (URL_TYPE, touse_key, request, type, OFFSET, CITYLIMIT)
            if requests_counter % QPS == 0:
                sleep(QPS_TIME_UNIT)
            try:
                requests_counter += 1
                r = requests.get(url)
                r_json = r.json()
            except Exception:
                # 冗余
                if request not in todo_list:
                    todo_list.append(request)
            infocode = r_json['infocode']
            if infocode == '10000':
                count = r_json['count']
                page_count = math.ceil(int(count) / OFFSET_NUM)
                if page_count > 0:
                    for page in range(1, page_count, 1):
                        url_ = '%s&page=%s' % (url, page)
                        print(url_)
                        try:
                            requests_counter += 1
                            r_ = requests.get(url_)
                            r_json_ = r_.json()
                        except Exception:
                            # 冗余
                            if request not in todo_list:
                                todo_list.append(request)
                        infocode_ = r_json_['infocode']
                        if infocode_ == '10000':
                            pois_list = r_json['pois']
                            if request not in tosupply_dic:
                                tosupply_dic[request] = []
                            tosupply_dic[request].append(pois_list)
                            if request in todo_list:
                                list_index = todo_list.index(request)
                                del todo_list[list_index]
                        else:
                            if request not in todo_list:
                                todo_list.append(request)
                            change_key()
                else:
                    if request not in todo_list:
                        todo_list.append(request)
                    change_key()
    
    
    MAX_EXCEPTION_URL_NUM = 0
    
    
    def deal_exception_list():
        global todo_list
        print(todo_list)
        len_ = len(todo_list)
        if len_ > MAX_EXCEPTION_URL_NUM:
            for nloop in range(0, len_, 1):
                adcode = REQUEST_LIST[nloop]
                supply_dic(adcode)
        else:
            return
        deal_exception_list()
    
    
    class MyThread(threading.Thread):
        def __init__(self, func, args, name=''):
            threading.Thread.__init__(self)
            self.name = name
            self.func = func
            self.args = args
    
        def run(self):
            self.func(self.args)
    
    
    def main():
        print('starting at:', ctime())
        threads_list = []
        thread_sum = math.ceil(REQUEST_LEN / EACH_THREAD_REQUEST_NUM)
        for nloop in range(0, thread_sum, 1):
            adcode = REQUEST_LIST[nloop]
            print(184, adcode)
            thread_instance = MyThread(supply_dic, (adcode), supply_dic.__name__)
            threads_list.append(thread_instance)
        # 主进程将在所有非守护进程退出后,退出
        for t in threads_list:
            t.setDaemon = False
            t.start()
        # wait for all thrades to finish
        for t in threads_list:
            t.join()
        deal_exception_list()
    
        FGEN = 'GEN_GD_business_building.csv'
        fo = open(FGEN, 'w', encoding='utf-8-sig')
        fo.write(
            'id,name,type,typecode,biz_type,address,location,tel,distance,biz_ext,pname,cityname,adname,shopid,shopinfo,poiweight
    ')
        fo.closed
        fo = open(FGEN, 'a', encoding='utf-8-sig')
        for request in tosupply_dic:
            l = tosupply_dic[request]
            for ll in l:
                for dic_ in ll:
                    str = '%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s
    ' % (
                        dic_['id'], dic_['name'], dic_['type'], dic_['typecode'], dic_['biz_type'],
                        dic_['address'], dic_['location'].replace(',', ' '), dic_['tel'], dic_['distance'], dic_['biz_ext'],
                        dic_['pname'],
                        dic_['cityname'],
                        dic_['adname'],
                        dic_['shopid'],
                        dic_['shopinfo'],
                        dic_['poiweight'])
                    fo.write(str)
        fo.closed
    
    
    if __name__ == '__main__':
        main()
  • 相关阅读:
    vue项目引用less报错
    vue dev配置代理会报404
    为什么需要用到消息队列
    理解kafka消费者
    WebSocket和long poll、ajax轮询的区别
    数据库开发——MySQL——慢查询优化
    数据库开发——MySQL——索引原理
    数据库开发——MySQL——函数与流程控制
    数据库开发——MySQL——内置功能
    数据库开发——MySQL——pymysql模块
  • 原文地址:https://www.cnblogs.com/rsapaper/p/7265042.html
Copyright © 2011-2022 走看看