zoukankan      html  css  js  c++  java
  • ORDER BY today_used ASC' % (MAX_USED_TIMES)

    python D:pyminecleanspider_mapget_bd_uid_rest_b.py
    
    python D:pyminecleanspider_mapget_bd_uid_rest.py
    
    python D:pyminecleanspider_mapget_bd_uid_28_other20_b.py
     
    #MAX_USED_TIMES = 1900
    python D:pyminecleanspider_mapget_bd_uid_28_other20.py
    
    python D:pyminecleanspider_mapget_bd_uid.py
    
    python D:pyminecleanspider_mapget_bd_uid.py
    
    python D:pyminecleanspider_mapget_bd_uid.py
    

      

    import xlrd
    import time
    import sys
    import os
    import requests
    import sqlite3
    import threading
    
    curPath = os.path.abspath(os.path.dirname(__file__))
    rootPath = os.path.split(curPath)[0]
    sys.path.append(rootPath)
    
    MAX_USED_TIMES, overrun_str, DB_KEY_EXHAUST = 1900, '天配额超限,限制访问', 'DB_KEY_EXHAUST'
    
    db = 'py_bdspider_status.db'
    db = '%s\%s' % (curPath, db)
    
    pcity_list = []
    pcity_file = '%s\%s' % (curPath, '省会城市.txt')
    with open(pcity_file, 'r', encoding='utf-8') as pf:
        c_ = 0
        for i in pf:
            c_ += 1
            if c_ == 3:
                c_ = 0
                pcity_list.append(i.replace(' ', '').replace('
    ', '') + '市')
    pcity_sorted_list = sorted(pcity_list)
    
    target_city_list_big = ['广州市', '厦门市', '深圳市', '北京市', '杭州市', '成都市', '上海市', '西安市']
    target_city_list_pass = target_city_list_big
    
    for i in pcity_list:
        if i not in target_city_list_big:
            target_city_list_pass.append(i)
    
    
    # def db_init_key_table():
    #     conn = sqlite3.connect(db)
    #     c = conn.cursor()
    #     sql = 'DELETE  FROM  baidu_map_key_used'
    #     c.execute(sql)
    #     conn.commit()
    #     pcity_file = '%s\%s' % (curPath, 'bdmap_key.txt')
    #     with open(pcity_file, 'r', encoding='utf-8') as pf:
    #         c_ = 0
    #         for i in pf:
    #             if len(i) < 4:
    #                 continue
    #             author, key = i.replace('
    ', '').split('	')
    #             localtime_ = time.strftime("%y%m%d%H%M%S", time.localtime())
    #             sql = 'INSERT INTO baidu_map_key_used (author,key,update_time,today_used) VALUES ("%s","%s","%s",%s) ' % (
    #                 author, key, localtime_, 0)
    #             c.execute(sql)
    #     conn.commit()
    #     conn.close()
    
    
    # db_init_key_table()
    # target_city_list = target_city_list[0:11]
    # target_city_list = target_city_list[0:11]
    
    
    
    def db_get_one_effective():
        conn = sqlite3.connect(db)
        c = conn.cursor()
        sql = 'SELECT key FROM baidu_map_key_used WHERE today_used<=%s ORDER BY today_used ASC' % (MAX_USED_TIMES)
    
        res = c.execute(sql).fetchone()
        if res is None:
            return DB_KEY_EXHAUST
        else:
            return res[0]
        conn.close
    
    
    def db_update_one_today_used(key):
        conn = sqlite3.connect(db)
        c = conn.cursor()
        localtime_ = time.strftime("%y%m%d%H%M%S", time.localtime())
        sql = 'UPDATE baidu_map_key_used SET today_used = today_used+1 ,update_time=%s WHERE key="%s" ' % (
            localtime_, key)
        c.execute(sql)
        conn.commit()
        conn.close()
    
    
    dir_, dir_exception = 'baidu_map_uid', 'baidu_map_uid_exception'
    requested_file_list = []
    requested_file_dir_str, requested_file_dir_exception_str = '%s\%s\' % (curPath, dir_), '%s\%s\' % (
        curPath, dir_exception)
    requested_file_dir = os.listdir(requested_file_dir_str)
    
    
    def chk_if_requested_file():
        for f in requested_file_dir:
            to_in = f.split('.txt')[0]
            if to_in not in requested_file_list:
                requested_file_list.append(to_in)
    
    
    chk_if_requested_file()
    
    
    def write_requested_res(request_name, str_, type_='.txt'):
        fname = '%s%s%s' % (requested_file_dir_str, request_name, type_)
        # 上海市虹口区岳阳医院?.txt
        fname = fname.replace('?', '')
        with open(fname, 'w', encoding='utf-8') as ft:
            ft.write(str_)
        print('ok', threading.get_ident(), request_name)
    
    
    def write_requested_exception_res(request_name, str_, type_='.txt'):
        fname = '%s%s%s' % (requested_file_dir_exception_str, request_name, type_)
        # 上海市虹口区岳阳医院?.txt
        fname = fname.replace('?', '')
        with open(fname, 'w', encoding='utf-8') as ft:
            ft.write(str_)
    
    
    request_dic = {}
    
    target_city_list = []
    
    
    def gen_request_dic_list():
        fname_source = '官方上传任务.csv_py170829093808-BD_request_name-REDUCTION170829142821'
        fname_open = '%s\%s' % (curPath, fname_source)
        FEXCEL = '%s%s' % (fname_open, '.xlsx')
        data = xlrd.open_workbook(FEXCEL)
        table = data.sheets()[0]
        nrows, ncols = table.nrows, table.ncols
        for i in range(1, nrows):
            l = table.row_values(i)
            dbid, area_code, name_, request_name, type_, city, district, addr, street = l
            # if city not in target_city_list:
            #     continue
            if city in target_city_list_pass:
                continue
            if city not in target_city_list:
                target_city_list.append(city)
            request_name_chk = '%s%s%s' % (city, district, request_name)
            if request_name_chk in requested_file_list:
                continue
            if city not in request_dic:
                request_dic[city] = {}
            if district not in request_dic[city]:
                request_dic[city][district] = {}
                request_dic[city][district] = []
            if request_name not in request_dic[city][district]:
                request_dic[city][district].append(request_name)
    
    
    gen_request_dic_list()
    
    fname_source = '官方上传任务.csv_py170829093808-BD_request_name-REDUCTION170829142821'
    
    # http://api.map.baidu.com/place/v2/suggestion?query=瀛嘉天下&region=重庆市&city_limit=true&output=json&ak=oy2Q7IluhhwTGlz6l8pXYv6a0m6hXxr1
    base_url = 'http://api.map.baidu.com/place/v2/suggestion?query=R-QUERY&region=R-CITY&city_limit=true&output=json&ak=R-AK'
    
    
    def fun_(city):
        for district in request_dic[city]:
            for request_name in request_dic[city][district]:
                request_name_chk = '%s%s%s' % (city, district, request_name)
                chk_if_requested_file()
                if request_name_chk in requested_file_list:
                    continue
                ak = db_get_one_effective()
                if ak == DB_KEY_EXHAUST:
                    print(DB_KEY_EXHAUST)
                    break
                else:
                    url_ = base_url.replace('R-QUERY', request_name).replace('R-CITY', city).replace('R-AK', ak)
                try:
                    bd_res_json_str = requests.get(url_).text
                    db_update_one_today_used(ak)
                    write_requested_res(request_name_chk, bd_res_json_str)
                except Exception:
                    bd_res_json_str = '请求百度-异常'
                    write_requested_exception_res(request_name_chk, bd_res_json_str)
                    print(bd_res_json_str)
    
    
    class MyThread(threading.Thread):
        def __init__(self, func, args):
            threading.Thread.__init__(self)
            self.func, self.args = func, args
    
        def run(self):
            self.func(self.args)
    
    
    thread_sum = len(target_city_list)
    
    
    def main():
        threads_list = []
        for nloop in range(0, thread_sum, 1):
            city = target_city_list[nloop]
            thread_instance = MyThread(fun_, (city))
            threads_list.append(thread_instance)
        for t in threads_list:
            t.setDaemon = False
            t.start()
        for t in threads_list:
            t.join()
    
    
    if __name__ == '__main__':
        main()
    

      

    import xlrd
    import time
    import sys
    import os
    import requests
    import sqlite3
    import threading
    
    curPath = os.path.abspath(os.path.dirname(__file__))
    rootPath = os.path.split(curPath)[0]
    sys.path.append(rootPath)
    
    MAX_USED_TIMES, overrun_str, DB_KEY_EXHAUST = 1900, '天配额超限,限制访问', 'DB_KEY_EXHAUST'
    
    db = 'py_bdspider_status.db'
    db = '%s\%s' % (curPath, db)
    
    pcity_list = []
    pcity_file = '%s\%s' % (curPath, '省会城市.txt')
    with open(pcity_file, 'r', encoding='utf-8') as pf:
        c_ = 0
        for i in pf:
            c_ += 1
            if c_ == 3:
                c_ = 0
                pcity_list.append(i.replace(' ', '').replace('
    ', '') + '')
    pcity_sorted_list = sorted(pcity_list)
    
    target_city_list_big = ['广州市', '厦门市', '深圳市', '北京市', '杭州市', '成都市', '上海市', '西安市']
    target_city_list = []
    
    for i in pcity_list:
        if i not in target_city_list_big:
            target_city_list.append(i)
    
    # def db_init_key_table():
    #     conn = sqlite3.connect(db)
    #     c = conn.cursor()
    #     sql = 'DELETE  FROM  baidu_map_key_used'
    #     c.execute(sql)
    #     conn.commit()
    #     pcity_file = '%s\%s' % (curPath, 'bdmap_key.txt')
    #     with open(pcity_file, 'r', encoding='utf-8') as pf:
    #         c_ = 0
    #         for i in pf:
    #             if len(i) < 4:
    #                 continue
    #             author, key = i.replace('
    ', '').split('	')
    #             localtime_ = time.strftime("%y%m%d%H%M%S", time.localtime())
    #             sql = 'INSERT INTO baidu_map_key_used (author,key,update_time,today_used) VALUES ("%s","%s","%s",%s) ' % (
    #                 author, key, localtime_, 0)
    #             c.execute(sql)
    #     conn.commit()
    #     conn.close()
    
    
    # db_init_key_table()
    target_city_list = target_city_list[11:]
    
    
    def db_get_one_effective():
        conn = sqlite3.connect(db)
        c = conn.cursor()
        sql = 'SELECT key FROM baidu_map_key_used WHERE today_used<=%s ' % (MAX_USED_TIMES)
        res = c.execute(sql).fetchone()
        if res is None:
            return DB_KEY_EXHAUST
        else:
            return res[0]
        conn.close
    
    
    def db_update_one_today_used(key):
        conn = sqlite3.connect(db)
        c = conn.cursor()
        localtime_ = time.strftime("%y%m%d%H%M%S", time.localtime())
        sql = 'UPDATE baidu_map_key_used SET today_used = today_used+1 ,update_time=%s WHERE key="%s" ' % (
            localtime_, key)
        c.execute(sql)
        conn.commit()
        conn.close()
    
    
    dir_, dir_exception = 'baidu_map_uid', 'baidu_map_uid_exception'
    requested_file_list = []
    requested_file_dir_str, requested_file_dir_exception_str = '%s\%s\' % (curPath, dir_), '%s\%s\' % (
        curPath, dir_exception)
    requested_file_dir = os.listdir(requested_file_dir_str)
    
    
    def chk_if_requested_file():
        for f in requested_file_dir:
            to_in = f.split('.txt')[0]
            if to_in not in requested_file_list:
                requested_file_list.append(to_in)
    
    
    chk_if_requested_file()
    
    
    def write_requested_res(request_name, str_, type_='.txt'):
        fname = '%s%s%s' % (requested_file_dir_str, request_name, type_)
        # 上海市虹口区岳阳医院?.txt
        fname = fname.replace('?', '')
        with open(fname, 'w', encoding='utf-8') as ft:
            ft.write(str_)
        print('ok', threading.get_ident(), request_name)
    
    
    def write_requested_exception_res(request_name, str_, type_='.txt'):
        fname = '%s%s%s' % (requested_file_dir_exception_str, request_name, type_)
        # 上海市虹口区岳阳医院?.txt
        fname = fname.replace('?', '')
        with open(fname, 'w', encoding='utf-8') as ft:
            ft.write(str_)
    
    
    request_dic = {}
    
    
    def gen_request_dic_list():
        fname_source = '官方上传任务.csv_py170829093808-BD_request_name-REDUCTION170829142821'
        fname_open = '%s\%s' % (curPath, fname_source)
        FEXCEL = '%s%s' % (fname_open, '.xlsx')
        data = xlrd.open_workbook(FEXCEL)
        table = data.sheets()[0]
        nrows, ncols = table.nrows, table.ncols
        for i in range(1, nrows):
            l = table.row_values(i)
            dbid, area_code, name_, request_name, type_, city, district, addr, street = l
            if city not in target_city_list:
                continue
            request_name_chk = '%s%s%s' % (city, district, request_name)
            if request_name_chk in requested_file_list:
                continue
            if city not in request_dic:
                request_dic[city] = {}
            if district not in request_dic[city]:
                request_dic[city][district] = {}
                request_dic[city][district] = []
            if request_name not in request_dic[city][district]:
                request_dic[city][district].append(request_name)
    
    
    gen_request_dic_list()
    
    fname_source = '官方上传任务.csv_py170829093808-BD_request_name-REDUCTION170829142821'
    
    # http://api.map.baidu.com/place/v2/suggestion?query=瀛嘉天下&region=重庆市&city_limit=true&output=json&ak=oy2Q7IluhhwTGlz6l8pXYv6a0m6hXxr1
    base_url = 'http://api.map.baidu.com/place/v2/suggestion?query=R-QUERY&region=R-CITY&city_limit=true&output=json&ak=R-AK'
    
    
    def fun_(city):
        for district in request_dic[city]:
            for request_name in request_dic[city][district]:
                request_name_chk = '%s%s%s' % (city, district, request_name)
                chk_if_requested_file()
                if request_name_chk in requested_file_list:
                    continue
                ak = db_get_one_effective()
                if ak == DB_KEY_EXHAUST:
                    print(DB_KEY_EXHAUST)
                    break
                else:
                    url_ = base_url.replace('R-QUERY', request_name).replace('R-CITY', city).replace('R-AK', ak)
                try:
                    bd_res_json_str = requests.get(url_).text
                    db_update_one_today_used(ak)
                    write_requested_res(request_name_chk, bd_res_json_str)
                except Exception:
                    bd_res_json_str = '请求百度-异常'
                    write_requested_exception_res(request_name_chk, bd_res_json_str)
                    print(bd_res_json_str)
    
    
    class MyThread(threading.Thread):
        def __init__(self, func, args):
            threading.Thread.__init__(self)
            self.func, self.args = func, args
    
        def run(self):
            self.func(self.args)
    
    
    thread_sum = len(target_city_list)
    
    
    def main():
        threads_list = []
        for nloop in range(0, thread_sum, 1):
            city = target_city_list[nloop]
            if city not in request_dic:
                continue
            thread_instance = MyThread(fun_, (city))
            threads_list.append(thread_instance)
        for t in threads_list:
            t.setDaemon = False
            t.start()
        for t in threads_list:
            t.join()
    
    
    if __name__ == '__main__':
        main()
    import xlrd
    import time
    import sys
    import os
    import requests
    import sqlite3
    import threading
    
    curPath = os.path.abspath(os.path.dirname(__file__))
    rootPath = os.path.split(curPath)[0]
    sys.path.append(rootPath)
    
    MAX_USED_TIMES, overrun_str, DB_KEY_EXHAUST = 1900, '天配额超限,限制访问', 'DB_KEY_EXHAUST'
    
    db = 'py_bdspider_status.db'
    db = '%s\%s' % (curPath, db)
    
    pcity_list = []
    pcity_file = '%s\%s' % (curPath, '省会城市.txt')
    with open(pcity_file, 'r', encoding='utf-8') as pf:
        c_ = 0
        for i in pf:
            c_ += 1
            if c_ == 3:
                c_ = 0
                pcity_list.append(i.replace(' ', '').replace('
    ', '') + '市')
    pcity_sorted_list = sorted(pcity_list)
    
    target_city_list_big = ['广州市', '厦门市', '深圳市', '北京市', '杭州市', '成都市', '上海市', '西安市']
    target_city_list = []
    
    for i in pcity_list:
        if i not in target_city_list_big:
            target_city_list.append(i)
    
    # def db_init_key_table():
    #     conn = sqlite3.connect(db)
    #     c = conn.cursor()
    #     sql = 'DELETE  FROM  baidu_map_key_used'
    #     c.execute(sql)
    #     conn.commit()
    #     pcity_file = '%s\%s' % (curPath, 'bdmap_key.txt')
    #     with open(pcity_file, 'r', encoding='utf-8') as pf:
    #         c_ = 0
    #         for i in pf:
    #             if len(i) < 4:
    #                 continue
    #             author, key = i.replace('
    ', '').split('	')
    #             localtime_ = time.strftime("%y%m%d%H%M%S", time.localtime())
    #             sql = 'INSERT INTO baidu_map_key_used (author,key,update_time,today_used) VALUES ("%s","%s","%s",%s) ' % (
    #                 author, key, localtime_, 0)
    #             c.execute(sql)
    #     conn.commit()
    #     conn.close()
    
    
    # db_init_key_table()
    # target_city_list = target_city_list[0:11]
    # target_city_list = target_city_list[0:11]
    target_city_list =target_city_list[11:]
    
    def db_get_one_effective():
        conn = sqlite3.connect(db)
        c = conn.cursor()
        sql = 'SELECT key FROM baidu_map_key_used WHERE today_used<=%s ORDER BY today_used ASC' % (MAX_USED_TIMES)
    
        res = c.execute(sql).fetchone()
        if res is None:
            return DB_KEY_EXHAUST
        else:
            return res[0]
        conn.close
    
    
    def db_update_one_today_used(key):
        conn = sqlite3.connect(db)
        c = conn.cursor()
        localtime_ = time.strftime("%y%m%d%H%M%S", time.localtime())
        sql = 'UPDATE baidu_map_key_used SET today_used = today_used+1 ,update_time=%s WHERE key="%s" ' % (
            localtime_, key)
        c.execute(sql)
        conn.commit()
        conn.close()
    
    
    dir_, dir_exception = 'baidu_map_uid', 'baidu_map_uid_exception'
    requested_file_list = []
    requested_file_dir_str, requested_file_dir_exception_str = '%s\%s\' % (curPath, dir_), '%s\%s\' % (
        curPath, dir_exception)
    requested_file_dir = os.listdir(requested_file_dir_str)
    
    
    def chk_if_requested_file():
        for f in requested_file_dir:
            to_in = f.split('.txt')[0]
            if to_in not in requested_file_list:
                requested_file_list.append(to_in)
    
    
    chk_if_requested_file()
    
    
    def write_requested_res(request_name, str_, type_='.txt'):
        fname = '%s%s%s' % (requested_file_dir_str, request_name, type_)
        # 上海市虹口区岳阳医院?.txt
        fname = fname.replace('?', '')
        with open(fname, 'w', encoding='utf-8') as ft:
            ft.write(str_)
        print('ok', threading.get_ident(), request_name)
    
    
    def write_requested_exception_res(request_name, str_, type_='.txt'):
        fname = '%s%s%s' % (requested_file_dir_exception_str, request_name, type_)
        # 上海市虹口区岳阳医院?.txt
        fname = fname.replace('?', '')
        with open(fname, 'w', encoding='utf-8') as ft:
            ft.write(str_)
    
    
    request_dic = {}
    
    
    def gen_request_dic_list():
        fname_source = '官方上传任务.csv_py170829093808-BD_request_name-REDUCTION170829142821'
        fname_open = '%s\%s' % (curPath, fname_source)
        FEXCEL = '%s%s' % (fname_open, '.xlsx')
        data = xlrd.open_workbook(FEXCEL)
        table = data.sheets()[0]
        nrows, ncols = table.nrows, table.ncols
        for i in range(1, nrows):
            l = table.row_values(i)
            dbid, area_code, name_, request_name, type_, city, district, addr, street = l
            if city not in target_city_list:
                continue
            request_name_chk = '%s%s%s' % (city, district, request_name)
            if request_name_chk in requested_file_list:
                continue
            if city not in request_dic:
                request_dic[city] = {}
            if district not in request_dic[city]:
                request_dic[city][district] = {}
                request_dic[city][district] = []
            if request_name not in request_dic[city][district]:
                request_dic[city][district].append(request_name)
    
    
    gen_request_dic_list()
    
    fname_source = '官方上传任务.csv_py170829093808-BD_request_name-REDUCTION170829142821'
    
    # http://api.map.baidu.com/place/v2/suggestion?query=瀛嘉天下&region=重庆市&city_limit=true&output=json&ak=oy2Q7IluhhwTGlz6l8pXYv6a0m6hXxr1
    base_url = 'http://api.map.baidu.com/place/v2/suggestion?query=R-QUERY&region=R-CITY&city_limit=true&output=json&ak=R-AK'
    
    
    def fun_(city):
        for district in request_dic[city]:
            for request_name in request_dic[city][district]:
                request_name_chk = '%s%s%s' % (city, district, request_name)
                chk_if_requested_file()
                if request_name_chk in requested_file_list:
                    continue
                ak = db_get_one_effective()
                if ak == DB_KEY_EXHAUST:
                    print(DB_KEY_EXHAUST)
                    break
                else:
                    url_ = base_url.replace('R-QUERY', request_name).replace('R-CITY', city).replace('R-AK', ak)
                try:
                    bd_res_json_str = requests.get(url_).text
                    db_update_one_today_used(ak)
                    write_requested_res(request_name_chk, bd_res_json_str)
                except Exception:
                    bd_res_json_str = '请求百度-异常'
                    write_requested_exception_res(request_name_chk, bd_res_json_str)
                    print(bd_res_json_str)
    
    
    class MyThread(threading.Thread):
        def __init__(self, func, args):
            threading.Thread.__init__(self)
            self.func, self.args = func, args
    
        def run(self):
            self.func(self.args)
    
    
    thread_sum = len(target_city_list)
    
    
    def main():
        threads_list = []
        for nloop in range(0, thread_sum, 1):
            city = target_city_list[nloop]
            thread_instance = MyThread(fun_, (city))
            threads_list.append(thread_instance)
        for t in threads_list:
            t.setDaemon = False
            t.start()
        for t in threads_list:
            t.join()
    
    
    if __name__ == '__main__':
        main()
    

      

  • 相关阅读:
    Eclipse智能提示及快捷键
    Activity生命周期
    【highlight.js】页面代码高亮插件
    【Flask】 flask-socketio实现WebSocket
    【treeview】 基于jQuery的简单树形插件
    【Zabbix】大规模监控误报发生时的处理方案
    【Java】 重拾Java入门
    【Flask】 网站的用户管理
    【Flask】 结合wtforms的文件上传表单
    【Python】 Web开发框架的基本概念与开发的准备工作
  • 原文地址:https://www.cnblogs.com/rsapaper/p/7450045.html
Copyright © 2011-2022 走看看