zoukankan      html  css  js  c++  java
  • 5.5

    Python爬取高德大数据各项数据(拥堵区域信息,拥堵路段信息,所有重点城市信息等)

    import datetime
    import json
    import threading
    import traceback
    import time
    from shlex import join
    
    from selenium import webdriver
    from selenium.webdriver import ChromeOptions
    from selenium.webdriver.chrome.options import Options
    from selenium.webdriver import Chrome
    import pymysql
    import requests
    from concurrent.futures import ThreadPoolExecutor
    from lxml import html
    
    import re
    option = ChromeOptions()
    option.add_experimental_option("excludeSwitches", ["enable-automation"])
    option.add_argument("--headless")
    option.add_argument("--disable-gpu")
    option.add_argument('window-size=1920x3000')  # 指定浏览器分辨率
    option.add_argument('--disable-gpu')  # 谷歌文档提到需要加上这个属性来规避bug
    option.add_argument('--hide-scrollbars')  # 隐藏滚动条, 应对一些特殊页面
    option.add_argument('blink-settings=imagesEnabled=false')  # 不加载图片, 提升速度
    option.add_argument('--headless')  # 浏览器不提供可视化页面. linux下如果系统不支持可视化不加这条会启动失败
    option.binary_location = r"C:UsersLenovoAppDataLocalGoogleChromeApplicationchrome.exe"  # 手动指定使用的浏览器位置
    
    
    web = Chrome(options=option)  # 把参数配置设置到浏览器中
    
    etree = html.etree
    web.get("https://trp.autonavi.com/diagnosis/rank.do")
    
    ul=1
    js=web.page_source
    obj1 = re.compile(r'<html><head></head><body><pre style="word-wrap: break-word; white-space: pre-wrap;">(?P<ul>.*?)</pre></body></html>', re.S)
    result1 = obj1.finditer(js)
    for it in result1:
        ul = it.group('ul')
    j=json.loads(ul)
    #print(j)
    code=[]
    cityName=[]
    jiankangzhishu=[]
    yongduzhishu=[]
    speed=[]
    for i in range(0,101):
        code.append(j[i]["adcode"])
        cityName.append(j[i]['cityName'])
        jiankangzhishu.append(j[i]["healthValue"])
        yongduzhishu.append(j[i]['idx1'])
        speed.append(j[i]['realSpeed'])
    
    
    
    def get_conn():
        conn = pymysql.connect(
            host="localhost",
            user="root",
            password="123456",
            db="jtsk",
            charset="utf8",
            port=3306,
        )
        cursor = conn.cursor()
        return conn, cursor
    
    
    def close_conn(conn, cursor):
        if cursor:
            cursor.close()
        if conn:
            conn.close()
    
    def cityquyu():
        cursor = None
        conn = None
        try:
            tm = time.strftime("%Y-%m-%d %H:%M", time.localtime())
            conn, cursor = get_conn()
            conn1, cursor1 = get_conn()
            sql = 'delete from cityquyu'
            cursor1.execute(sql)
            conn1.commit()
            close_conn(conn1, cursor1)
            for j in range(0,101):
                url=f'https://trp.autonavi.com/ajax/districtRank.do?linksType=1&cityCode={code[j]}'
                resp=requests.get(url)
                js=resp.json()
                for i in js:
                    cityname=cityName[j]
                    name=i['name']
                    index=float(i['index'])
    
                    speed=float(i['speed'])
                    sql = "insert into cityquyu(city,name,zhishu,speed,time ) values(%s,%s,%s,%s,%s)"
                    sql1="insert into cityquyu_all(city,name,zhishu,speed,time ) values(%s,%s,%s,%s,%s)"
                    cursor.execute(sql, [cityname,name,index,speed,tm])
                    conn1, cursor1 = get_conn()
                    cursor1.execute(sql1 ,[cityname,name,index,speed,tm])
                    conn1.commit()
                    conn.commit()  # 提交事务 update delete insert操作
                    close_conn(conn1, cursor1)
        except:
                traceback.print_exc()
        finally:
            close_conn(conn, cursor)
    def roadyuce():
        cursor = None
        conn = None
        try:
            tm =(datetime.datetime.now()+datetime.timedelta(days=1)).strftime('%Y-%m-%d')#明天
            conn, cursor = get_conn()
            conn1, cursor1 = get_conn()
            sql = 'delete from roadyuce'
            cursor1.execute(sql)
            conn1.commit()
            close_conn(conn1, cursor1)
            for j in range(0, 101):
                url = f'https://trp.autonavi.com/ajax/getCityRoadTop.do?adcode={code[j]}&date={tm}'
                resp = requests.get(url)
                js = resp.json()
                for i in range(0, len(js) - 1):
                    sql = """
                            insert into roadyuce(name,roadname,zhishu,speed,dir,zuobiao) values(%s,%s,%s,%s,%s,%s)
                            """
                    cursor.execute(sql, [cityName[j], js[i]["cityName"], js[i]["idx"], js[i]["speed"],
                                             js[i]["dir"] ,str(js[i]["lnglats"])])
                    conn.commit()  # 提交事务 update delete insert操作
        except:
            traceback.print_exc()
        finally:
            close_conn(conn, cursor)
    def qxcity1():
        cursor = None
        conn = None
        try:
            conn, cursor = get_conn()
            conn1, cursor1 = get_conn()
            sql = 'delete from qxcity1'
            cursor1.execute(sql)
            conn1.commit()
            close_conn(conn1, cursor1)
            url='https://trp.autonavi.com/cityTravel/inAndOutCity.do?adcode=100000&dt=2021-05-11&willReal=WILL&inOut=IN&size=50'
            url1='https://trp.autonavi.com/cityTravel/inAndOutCity.do?adcode=100000&dt=2021-05-11&willReal=WILL&size=50&inOut=OUT'
            web.get(url)
            ul = 1
            js = web.page_source
            obj1 = re.compile(
                r'<html><head></head><body><pre style="word-wrap: break-word; white-space: pre-wrap;">(?P<ul>.*?)</pre></body></html>',
                re.S)
            result1 = obj1.finditer(js)
            for it in result1:
                ul = it.group('ul')
            j = json.loads(ul)
            for i in range(0,len(j)):
                sql = "insert into qxcity1(code,name,zhishu) values(%s,%s,%s)"
                cursor.execute(sql,
                               [j[i]["adcode"], j[i]["name"],j[i]["willIdx"]])
                conn.commit()  # 提交事务 update delete insert操作
        except:
            traceback.print_exc()
        finally:
            close_conn(conn, cursor)
    def qxcity2():
        cursor = None
        conn = None
        try:
            conn, cursor = get_conn()
            conn1, cursor1 = get_conn()
            sql = 'delete from qxcity2'
            cursor1.execute(sql)
            conn1.commit()
            close_conn(conn1, cursor1)
    
            url='https://trp.autonavi.com/cityTravel/inAndOutCity.do?adcode=100000&dt=2021-05-11&willReal=WILL&size=50&inOut=OUT'
            web.get(url)
            ul = 1
            js = web.page_source
            obj1 = re.compile(
                r'<html><head></head><body><pre style="word-wrap: break-word; white-space: pre-wrap;">(?P<ul>.*?)</pre></body></html>',
                re.S)
            result1 = obj1.finditer(js)
            for it in result1:
                ul = it.group('ul')
            j = json.loads(ul)
            for i in range(0,len(j)):
                sql = "insert into qxcity2(code,name,zhishu) values(%s,%s,%s)"
                cursor.execute(sql,
                               [j[i]["adcode"], j[i]["name"],j[i]["willIdx"]])
                conn.commit()  # 提交事务 update delete insert操作
        except:
            traceback.print_exc()
        finally:
            close_conn(conn, cursor)
    def cityroad():
        cursor = None
        conn = None
        try:
            tm = time.strftime("%Y-%m-%d %H:%M", time.localtime())
            conn, cursor = get_conn()
            conn1, cursor1 = get_conn()
            sql = 'delete from cityroad'
            cursor1.execute(sql)
            conn1.commit()
            close_conn(conn1, cursor1)
            for j in range(0,101):
                url=f'https://trp.autonavi.com/ajax/roadRank.do?roadType=0&timeType=0&cityCode={code[j]}'
                resp=requests.get(url)
                js=resp.json()['tableData']
                for i in range(0,len(js)-1):
                    # print(js[i]['name'],js[i]["dir"],js[i]["index"],js[i]["speed"],js[i]["length"])
                    # print(type(js[i]['name']))
                    # print(type(js[i]["dir"]))
                    # print(type(js[i]["index"]))
                    # print(type(js[i]["speed"]))
                    # print(type(js[i]["length"]))
                    x=str(js[i]["coords"])
                    sql="""
                    insert into cityroad(city,name,dir,zhishu,speed,length,time,coords) values(%s,%s,%s,%s,%s,%s,%s,%s)
                    """
                    cursor.execute(sql, [cityName[j],js[i]["name"],js[i]["dir"],js[i]["index"],js[i]["speed"],js[i]["length"],tm,x])
                    conn.commit()  # 提交事务 update delete insert操作
                    conn1, cursor1 = get_conn()
                    sql1 ="""
                    insert into cityroad_all(city,name,dir,zhishu,speed,length,time,coords) values(%s,%s,%s,%s,%s,%s,%s,%s)
                    """
                    cursor1.execute(sql1, [cityName[j],js[i]["name"],js[i]["dir"],js[i]["index"],js[i]["speed"],js[i]["length"],tm,x])
                    conn1.commit()
                    close_conn(conn1, cursor1)
        except:
                traceback.print_exc()
        finally:
            close_conn(conn, cursor)
    
    def qrcity():
        cursor = None
        conn = None
        try:
            conn, cursor = get_conn()
            conn1, cursor1 = get_conn()
            sql = 'delete from qrcity'
            cursor1.execute(sql)
            conn1.commit()
            close_conn(conn1, cursor1)
    
            url =  'https://trp.autonavi.com/cityTravel/inAndOutCity.do?adcode=100000&dt=2021-05-11&willReal=WILL&inOut=IN&size=50'
            web.get(url)
            ul = 1
            js = web.page_source
            obj1 = re.compile(
                r'<html><head></head><body><pre style="word-wrap: break-word; white-space: pre-wrap;">(?P<ul>.*?)</pre></body></html>',
                re.S)
            result1 = obj1.finditer(js)
            for it in result1:
                ul = it.group('ul')
            j = json.loads(ul)
            for i in range(0, len(j)):
               url1=f'https://trp.autonavi.com/cityTravel/inAndOutCity.do?adcode={j[i]["adcode"]}&dt=2021-05-11&willReal=WILL&size=20&inOut=IN'
               web.get(url1)
               ul1 = 1
               js1 = web.page_source
               obj11 = re.compile(
                   r'<html><head></head><body><pre style="word-wrap: break-word; white-space: pre-wrap;">(?P<ul>.*?)</pre></body></html>',
                   re.S)
               result11 = obj11.finditer(js1)
               for it in result11:
                   ul1 = it.group('ul')
               j1 = json.loads(ul1)
               for k in range(0,20):
                   sql = "insert into qrcity(city,name) values(%s,%s)"
                   cursor.execute(sql, [j[i]["name"],j1[k]["name"]])
                   conn.commit()  # 提交事务 update delete insert操作
    
    
    
        except:
            traceback.print_exc()
        finally:
            close_conn(conn, cursor)
    def qccity():
        cursor = None
        conn = None
        try:
            conn, cursor = get_conn()
            conn1, cursor1 = get_conn()
            sql = 'delete from qccity'
            cursor1.execute(sql)
            conn1.commit()
            close_conn(conn1, cursor1)
    
            url = 'https://trp.autonavi.com/cityTravel/inAndOutCity.do?adcode=100000&dt=2021-05-11&willReal=WILL&size=50&inOut=OUT'
    
            web.get(url)
            ul = 1
            js = web.page_source
            obj1 = re.compile(
                r'<html><head></head><body><pre style="word-wrap: break-word; white-space: pre-wrap;">(?P<ul>.*?)</pre></body></html>',
                re.S)
            result1 = obj1.finditer(js)
            for it in result1:
                ul = it.group('ul')
            j = json.loads(ul)
            for i in range(0, len(j)):
               url1=f'https://trp.autonavi.com/cityTravel/inAndOutCity.do?adcode={j[i]["adcode"]}&dt=2021-05-11&willReal=WILL&inOut=OUT&size=20'
    
               web.get(url1)
               ul1 = 1
               js1 = web.page_source
               obj11 = re.compile(
                   r'<html><head></head><body><pre style="word-wrap: break-word; white-space: pre-wrap;">(?P<ul>.*?)</pre></body></html>',
                   re.S)
               result11 = obj11.finditer(js1)
               for it in result11:
                   ul1 = it.group('ul')
               j1 = json.loads(ul1)
               for k in range(0,20):
                   sql = "insert into qccity(city,name) values(%s,%s)"
                   cursor.execute(sql, [j[i]["name"],j1[k]["name"]])
                   conn.commit()  # 提交事务 update delete insert操作
    
    
    
        except:
            traceback.print_exc()
        finally:
            close_conn(conn, cursor)
    def update_city():
        cursor = None
        conn = None
        try:
            tm=time.strftime("%Y-%m-%d %H:%M", time.localtime())
            conn, cursor = get_conn()
            conn1,cursor1=get_conn()
            sql='delete from city'
            cursor1.execute(sql)
            conn1.commit()
            close_conn(conn1, cursor1)
            for i in range(0,101):
                sql = "insert into city(code,cityName,jiankangzhishu,yongduzhishu,speed,time) values(%s,%s,%s,%s,%s,%s)"
                cursor.execute(sql,[code[i],cityName[i],jiankangzhishu[i],yongduzhishu[i],speed[i],tm])
                conn.commit()  # 提交事务 update delete insert操作
        except:
            traceback.print_exc()
        finally:
            close_conn(conn, cursor)
    
    
    def allrw():
        print('===================================================================================')
    
        thr = threading.Timer(300, allrw)  # 每5分钟
        thr.start()
    
        now1 = datetime.datetime.now()
        print(f'{now1} ----- 开始执行')
        update_city()
        cityquyu()
        cityroad()
        roadyuce()
        now2 = datetime.datetime.now()
        print(f'{now2} ----- 执行结束                       用时{now2 - now1}')
        print('===================================================================================
    
    
    ')
    if __name__ == '__main__':
       allrw()
    
    
       print("ok")
  • 相关阅读:
    LCA+线段树/树状数组 POJ2763 Housewife Wind
    图论 洛谷P2052 道路修建
    动态规划 洛谷P2365 任务安排
    GCD问题 洛谷P1372 又是毕业季I & P1414 又是毕业季II
    动态规划 洛谷P1140 相似基因
    动态规划 洛谷P1868 饥饿的奶牛
    动态规划 P1280 尼克的任务
    倍增LCA BZOJ1776 cowpol奶牛政坛
    P1416 攻击火星
    搜索 洛谷 P1434滑雪
  • 原文地址:https://www.cnblogs.com/zhaoyids/p/14905353.html
Copyright © 2011-2022 走看看