zoukankan      html  css  js  c++  java
  • 数据爬取

    import pymysql
    import time
    import json
    import traceback
    import sys
    import requests
    import io

    sys.stdout = io.TextIOWrapper(sys.stdout.buffer,encoding='utf-8')
    def get_tencent_data():
    """
    :return: 返回历史数据和当日详细数据
    """
    url = 'https://view.inews.qq.com/g2/getOnsInfo?name=disease_other' #详细
    url2='https://view.inews.qq.com/g2/getOnsInfo?name=disease_h5' #历史
    headers = {
    'user-agent': 'Mozilla/5.0 (Linux; Android 6.0; Nexus 5 Build/MRA58N) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/73.0.3683.86 Mobile Safari/537.36',
    'Referer':' https://news.qq.com/zt2020/page/feiyan.htm'
    }
    r = requests.get(url, headers)
    r2=requests.get(url2,headers)
    res = json.loads(r.text) # json字符串转字典
    res2=json.loads(r2.text)
    data_all = json.loads(res['data'])
    data_all2=json.loads(res2['data'])
    print(data_all.keys())
    history = {} # 历史数据
    for i in data_all["chinaDayList"]:
    ds = "2020." + i["date"]
    tup = time.strptime(ds, "%Y.%m.%d")
    ds = time.strftime("%Y-%m-%d", tup) # 改变时间格式,不然插入数据库会报错,数据库是datetime类型
    confirm = i["confirm"]
    suspect = i["suspect"]
    heal = i["heal"]
    dead = i["dead"]
    history[ds] = {"confirm": confirm, "suspect": suspect, "heal": heal, "dead": dead}
    for i in data_all2["chinaDayAddList"]:
    ds = "2020." + i["date"]
    tup = time.strptime(ds, "%Y.%m.%d")
    ds = time.strftime("%Y-%m-%d", tup)
    confirm = i["confirm"]
    suspect = i["suspect"]
    heal = i["heal"]
    dead = i["dead"]
    history[ds].update({"confirm_add": confirm, "suspect_add": suspect, "heal_add": heal, "dead_add": dead})



    details = [] # 当日详细数据
    update_time = data_all2["lastUpdateTime"]

    data_country = data_all2["areaTree"] # list 25个国
    data_province = data_country[0]["children"] # 中国各省
    for pro_infos in data_province:
    province = pro_infos["name"] # 省名
    for city_infos in pro_infos["children"]:
    city = city_infos["name"]
    confirm = city_infos["total"]["confirm"]
    confirm_add = city_infos["today"]["confirm"]
    heal = city_infos["total"]["heal"]
    dead = city_infos["total"]["dead"]
    details.append([update_time,province, city, confirm, confirm_add, heal, dead])
    #print(list(history.keys())[0])
    return history,foreign ,details



    def get_conn():
    """
    :return: 连接,游标l
    """
    # 创建连接
    conn = pymysql.connect(host="localhost",
    user="root",
    password="123456",
    db="cov",
    charset="utf8")
    # 创建游标
    cursor = conn.cursor() # 执行完毕返回的结果集默认以元组显示
    return conn, cursor


    def close_conn(conn, cursor):
    if cursor:
    cursor.close()
    if conn:
    conn.close()

    def update_details():
    """
    更新 details 表
    :return:
    """
    cursor = None
    conn = None
    try:
    li = get_tencent_data()[2] # 0 是历史数据字典,1 最新详细数据列表
    conn, cursor = get_conn()
    sql = "insert into details(update_time,province,city,confirm,confirm_add,heal,dead) values(%s,%s,%s,%s,%s,%s,%s)"
    sql_query = 'select %s=(select update_time from details order by id desc limit 1)' #对比当前最大时间戳
    cursor.execute(sql_query,li[0][0])
    if not cursor.fetchone()[0]:
    print(f"{time.asctime()}开始更新最新数据")
    for item in li:
    cursor.execute(sql, item)
    conn.commit() # 提交事务 update delete insert操作
    print(f"{time.asctime()}更新最新数据完毕")
    else:
    print(f"{time.asctime()}已是最新数据!")
    except:
    traceback.print_exc()
    finally:
    close_conn(conn, cursor)
    #[ds,country, confirm, suspect, heal, dead]
    def update_foreign():
    """
    更新 details 表
    :return:
    """
    #update_time,country,continent,confirm,dead,suspect,heal
    cursor = None
    conn = None
    try:
    li = get_tencent_data()[2] # 0 是历史数据字典,1 最新详细数据列表
    conn, cursor = get_conn()
    sql = "insert into foreign(update_time,country,continent,confirm,dead,suspect,heal) values(%s,%s,%s,%s,%s,%s,%s)"
    sql_query = 'select %s=(select update_time from foreign order by id desc limit 1)' #对比当前最大时间戳
    cursor.execute(sql_query,li[0][0])
    if not cursor.fetchone()[0]:
    print(f"{time.asctime()}开始更新最新数据")
    for item in li:
    cursor.execute(sql, item)
    conn.commit() # 提交事务 update delete insert操作
    print(f"{time.asctime()}更新最新数据完毕")
    else:
    print(f"{time.asctime()}已是最新数据!")
    except:
    traceback.print_exc()
    finally:
    close_conn(conn, cursor)
    def insert_history():
    """
    插入历史数据
    :return:
    """
    cursor = None
    conn = None
    try:
    dic = get_tencent_data()[0] # 0 是历史数据字典,1 最新详细数据列表
    print(f"{time.asctime()}开始插入历史数据")
    conn, cursor = get_conn()
    sql = "insert into history values(%s,%s,%s,%s,%s,%s,%s,%s,%s)"
    for k, v in dic.items():
    # item 格式 {'2020-01-13': {'confirm': 41, 'suspect': 0, 'heal': 0, 'dead': 1}
    cursor.execute(sql, [k, v.get("confirm"), v.get("confirm_add"), v.get("suspect"),
    v.get("suspect_add"), v.get("heal"), v.get("heal_add"),
    v.get("dead"), v.get("dead_add")])

    conn.commit() # 提交事务 update delete insert操作
    print(f"{time.asctime()}插入历史数据完毕")
    except:
    traceback.print_exc()
    finally:
    close_conn(conn, cursor)
    def update_history():
    """
    更新历史数据
    :return:
    """
    cursor = None
    conn = None
    try:
    dic = get_tencent_data()[0] # 0 是历史数据字典,1 最新详细数据列表
    print(f"{time.asctime()}开始更新历史数据")
    conn, cursor = get_conn()
    sql = "insert into history values(%s,%s,%s,%s,%s,%s,%s,%s,%s)"
    sql_query = "select confirm from history where update_time=%s"
    for k, v in dic.items():
    # item 格式 {'2020-01-13': {'confirm': 41, 'suspect': 0, 'heal': 0, 'dead': 1}
    if not cursor.execute(sql_query, k):
    cursor.execute(sql, [k, v.get("confirm"), v.get("confirm_add"), v.get("suspect"),
    v.get("suspect_add"), v.get("heal"), v.get("heal_add"),
    v.get("dead"), v.get("dead_add")])
    conn.commit() # 提交事务 update delete insert操作
    print(f"{time.asctime()}历史数据更新完毕")
    except:
    traceback.print_exc()
    finally:
    close_conn(conn, cursor)
    update_details()
    update_history()
  • 相关阅读:
    Python统计字符串中出现次数最多的人名
    初探CORBA组件化编程
    shell脚本—基础知识,变量
    Java多线程--线程交替
    Qt中采用多线程实现Socket编程
    Python字符串格式化--formate()的应用
    JAVA中浅复制与深复制
    Python这些问题你会吗?
    PHP控制反转(IOC)和依赖注入(DI
    Go 语言指针
  • 原文地址:https://www.cnblogs.com/yanwenhui/p/13087006.html
Copyright © 2011-2022 走看看