zoukankan      html  css  js  c++  java
  • python之天气爬虫

    代码已调试通过

    # 导入第三方包
    import random
    import re
    import time
    import pandas as pd
    import requests
    
    # 构造请求头
    headers = {
        'Accept': '*/*',
        'Accept -Enconding': 'gzip,deflate',
        'Accept-Language': 'zh-CN,zh;q=0.9',
        'conection': 'keep-alive',
        'User-Agent': 'Mozilla/5.0  (windows NT 6.1;  WOW64)  AppleWebKit/537.36 (KHTML,like Gecko) Chrome/63.0.3236.0 '
                      'Safari/537.36 '
    }
    # 生成所有需要抓取的链接
    urls = []
    for year in range(2012, 2019):
        for month in range(1, 13):
            if year <= 2016:
                urls.append('http://tianqi.2345.com/t/wea_history/js/58362_%s%s.js' % (year, month))
            else:
                if month < 10:
                    print("未获取天气数据")
                    break;
    info = []
    for url in urls:
        random.randint(3, 6)
        response = requests.get(url, headers=headers).text  # 发送url链接的请求,并返回响应数据
        print(response)
        city=re.findall("city:'(.*?)',", "".join(response))  # 正则表达式获取城市
        ymd = re.findall("ymd:'(.*?)',", "".join(response))  # 正则表达式获取日期数据
        high = re.findall(",bWendu:'(.*?)',", "".join(response))  # 正则表达式获取最高气温数据,正则表达式不加最前面的逗号,容易多匹配avgbWendu字段
        low = re.findall(",yWendu:'(.*?)',", "".join(response))  # 正则表达式获取最低气温数据
        tianqi = re.findall("tianqi:'(.*?)',", "".join(response))  # 正则表达式获取天气状况数据
        fengxiang = re.findall("fengxiang:'(.*?)',", "".join(response))  # 正则表达式获取风向数据
        aqi = re.findall("aqi:'(.*?)',", "".join(response))  # 正则表达式获取空气质量指标数据
        aqiInfo = re.findall("aqiInfo:'(.*?)',", "".join(response))  # 正则表达式获取空气质量说明数据
        aqiLevel = re.findall("aqiLevel:'(.*?)'}", "".join(response))  # 正则表达式获取空气质量水平数据
        maxWendu=re.findall("maxWendu:'(.*?)',", "".join(response))  # 正则表达式获取最高 温度
        minWendu = re.findall("maxWendu:'(.*?)',", "".join(response))  # 正则表达式获取最低温度
        avgbWendu = re.findall("avgbWendu:'(.*?)',", "".join(response))  # 正则表达式获取平均白天温度
    
        # 犹豫 2012-2015没有空气质量相关的数据,故需要分开处理
        # 循环并通过正则匹配获取相关数据
    
    
        if len(aqi) == 0:
            fengli = re.findall("fengli:'(.*?)'}", "".join(response))  # 正则表达式获取风力数据
            avgyWendu = re.findall("avgyWendu:'(.*?)'}", "".join(response))  # 正则表达式获取平均夜里温度
            aqi = ''
            aqiInfo = ''
            aqiLevel = ''
            df = pd.DataFrame.from_dict(
                {'city': city, 'ymd': ymd, 'high': high, 'low': low, 'tianqi': tianqi, 'fengxiang': fengxiang,
                 'fengli': fengli, 'aqi': aqi,
                 'aqiInfo': aqiInfo, 'aqiLevel': aqiLevel, 'maxWendu': maxWendu, 'minWendu': minWendu,
                 'avgbWendu': avgbWendu, 'avgyWendu': avgyWendu}, orient='index')
            pl = df.transpose()
            info.append(pl)
    
        else:
            fengli = re.findall("fengli:'(.*?)',", "".join(response))  # 正则表达式获取风力数据
            avgyWendu = re.findall("avgyWendu:'(.*?)',", "".join(response))  # 正则表达式获取平均夜里温度
            df = pd.DataFrame.from_dict(
            {'city':city,'ymd': ymd, 'high': high, 'low': low, 'tianqi': tianqi, 'fengxiang': fengxiang, 'fengli': fengli, 'aqi': aqi,
             'aqiInfo': aqiInfo, 'aqiLevel': aqiLevel,'maxWendu':maxWendu,'minWendu':minWendu,'avgbWendu':avgbWendu,'avgyWendu':avgyWendu}, orient='index')  #
            pl = df.transpose()
            info.append(pl)
    
    time.sleep(3)  # 每循环一次,都随机停顿几秒
    # 将存储的所有天气数据进行合并,生成数据表格
    weather = pd.concat(info)
    # 数据导出
    time = time.strftime('%Y%m%d%H%M%S', time.localtime(time.time()))
    weather.to_csv('weather_new' + time + '.csv', index=False)
    

      运行结果如下:

  • 相关阅读:
    BZOJ 1731: [Usaco2005 dec]Layout 排队布局
    P2294 [HNOI2005]狡猾的商人
    P1993 小K的农场
    P1250 种树
    TCP/IP的排头兵――地址解析协议(ARP) (转载)
    "git rm" 和 "rm" 的区别(转载)
    Android 在eclipse中没有出现AVD的解决方法(转载)
    浅谈C++多态性(转载)
    Ubuntu搭建Eclipse+JDK+SDK的Android (转载)
    .gitignore(转载)
  • 原文地址:https://www.cnblogs.com/mumianhuasayyes/p/15802301.html
Copyright © 2011-2022 走看看