zoukankan      html  css  js  c++  java
  • Python之爬取天气预报并生成图表

      使用Python爬虫去天气预报网站爬取天气数据存储至MySQL然后使用pyecharts实现绘图

      本次代码可以在gitee下载https://gitee.com/liuyueming/weatherSpider.git

      一,环境查看

      Python版本

    C:\Users\liuym\Desktop\weatherSpider>python --version
    Python 3.6.6
    

      MySQL版本

     mysql --version
    mysql  Ver 14.14 Distrib 5.7.22, for Linux (x86_64) using  EditLine wrapper
    

      二,代码

      安装模块

    pip3 install pymysql
    pip3 install bs4
    pip3 install lxml
    pip3 install requests
    pip3 install pyecharts
    

      运行过程中遇到没有安装的库使用pip install安装即可

      本次爬取的天气预报网站为 http://www.tianqihoubao.com/  

      主程序main.py

    import pymysql
    import requests
    from bs4 import BeautifulSoup
     
    db = pymysql.connect(host="localhost", user="root", passwd="123456", db="weather", charset='utf8' )
    cursor = db.cursor()
     
    #获取网页信息
    def get_html(url):
        html = requests.get(url)
        html.encoding = html.apparent_encoding
        soup = BeautifulSoup(html.text, 'lxml')
        return soup
     
    year = ['2020']
     
    month = ['01', '02', '03', '04','05', '06', '07', '08', '09', '10', '11', '12']
     
     
    time = [y+x for y in year for x in month] 
    for date in time:
        url = 'http://www.tianqihoubao.com/lishi/nanchang/month/'+ date +'.html'
        soup = get_html(url)
        sup = soup.find('table',attrs={'class':'b'})
        tr = sup.find_all('tr')
        for trl in tr[1:]:
            td = trl.find_all('td')
            href = td[0].find('a')['href'] #获取链接信息
            title = td[0].find('a')['title'] #获取名称
            weather = td[1].get_text().replace('\r\n','').replace(' ','') #获取天气状况
            wendu = td[2].get_text().strip().replace(' ','').replace('\r\n','')#获取温度
            fengli = td[3].get_text().strip().replace(' ','').replace('\r\n','') #获取风力大小       
     
            sql = """insert into weather_spider(time_local, link, weather_type, temperature, wind_power) \
                    values(%s, %s, %s, %s, %s)"""
            cursor.execute(sql, (title, href, weather, wendu, fengli))
            db.commit()
    db.close
    print('爬取完成')
    

      代码解析

    db = pymysql.connect(host="localhost", user="root", passwd="123456", db="weather", charset='utf8' ) # 数据库连接信息,根据实际情况修改
    

      

    year = ['2020'] # 需要爬取的年份信息
    

      

    url = 'http://www.tianqihoubao.com/lishi/nanchang/month/'+ date +'.html' # 需要爬取的城市信息 本次为南昌
    

      生成html程序myVisualize.py

    import pymysql
    import pyecharts.options as opts
    from pyecharts.charts import Line, Pie
    
    def create_temp():
        db = pymysql.connect(host="localhist", user="root", passwd="123456", db="weather", charset='utf8' )
        cursor = db.cursor()
        cursor.execute('SELECT * FROM weather_spider;')
        data = cursor.fetchall()
        max_temp_list = []
        min_temp_list = []
        day_list = []
        for d in data:
            max_temp_list.append(d[3].split('/')[0].replace('℃', ''))
            min_temp_list.append(d[3].split('/')[1].replace('℃', ''))
            day_list.append(d[0][:11])
        line = Line()
        line.add_xaxis(day_list)
        line.add_yaxis(series_name="最高气温", y_axis=max_temp_list, is_symbol_show = False, 
            markpoint_opts=opts.MarkPointOpts(
                data=[
                    opts.MarkPointItem(type_="max", name="最大值"),
                    opts.MarkPointItem(type_="min", name="最小值"),
                ]
            ),
            markline_opts=opts.MarkLineOpts(
                data=[opts.MarkLineItem(type_="average", name="平均值")]
            ))
        line.add_yaxis(series_name="最低气温", y_axis=min_temp_list, is_symbol_show = False, 
            markpoint_opts=opts.MarkPointOpts(
                data=[
                    opts.MarkPointItem(type_="max", name="最大值"),
                    opts.MarkPointItem(type_="min", name="最小值"),
                ]
            ),
            markline_opts=opts.MarkLineOpts(
                data=[opts.MarkLineItem(type_="average", name="平均值")]
            ))
        line.set_global_opts(yaxis_opts=opts.AxisOpts(name="温度(℃)"), 
            title_opts=opts.TitleOpts(title="南昌气温变化表"), 
            tooltip_opts=opts.TooltipOpts(trigger="axis"))
        
        line.render('南昌2020气温变化表.html')
        print('气温图生成成功')
        db.close()
        cursor.close()
    
    def create_weather():
        db = pymysql.connect(host="localhost", user="root", passwd="123456", db="weather", charset='utf8' )
        cursor = db.cursor()
        attr = ["雨", "多云", "晴", "阴", "雪", "雾", "霾"]
        rain = cursor.execute('SELECT * FROM weather_spider WHERE weather_type like "%雨%";')
        cloud = cursor.execute('SELECT * FROM weather_spider WHERE weather_type like "%多云%";')
        sun = cursor.execute('SELECT * FROM weather_spider WHERE weather_type like "%晴%";')
        overcast = cursor.execute('SELECT * FROM weather_spider WHERE weather_type like "%阴%";')
        snow = cursor.execute('SELECT * FROM weather_spider WHERE weather_type like "%雪%";')
        fog = cursor.execute('SELECT * FROM weather_spider WHERE weather_type like "%雾%";')
        smog = cursor.execute('SELECT * FROM weather_spider WHERE weather_type like "%霾%";')
        weather = [rain, cloud, sun, overcast, snow, fog, smog]
        pie = (
            Pie()
            .add("", [list(z) for z in zip(attr, [rain, cloud, sun, overcast, snow, fog, smog])])
            .set_global_opts(title_opts=opts.TitleOpts(title="天气占比表"))
            .set_series_opts(label_opts=opts.LabelOpts(formatter="{b}: {c}"))
        )
        
        pie.render('南昌2020天气占比表.html')
        print('天气图生成成功')
        db.close()
        cursor.close()
    
    
    if __name__ == '__main__':
        create_temp()
        create_weather()
    

      MySQL操作(安装MySQL不详述)

      创建库

    create databese weather;
    

      导入表

    mysql -uroot -pioYbcZ1u -h127.0.0.1 weather < weather.sql
    

      表语句sql如下weather.sql

    DROP TABLE IF EXISTS `weather_spider`;
    CREATE TABLE `weather_spider` (
      `time_local` varchar(255) DEFAULT NULL,
      `link` varchar(255) DEFAULT NULL,
      `weather_type` varchar(255) DEFAULT NULL,
      `temperature` varchar(255) DEFAULT NULL,
      `wind_power` varchar(255) DEFAULT NULL
    ) ENGINE=InnoDB DEFAULT CHARSET=utf8 ROW_FORMAT=DYNAMIC;
    

      三,运行

      运行主程序

    python main.py
    

      运行正常会往MySQL数据库写入数据,登录数据库搜索查看

     select * from weather_spider;
    

       运行生成html程序

    python myVisualize.py
    气温图生成成功
    天气图生成成功
    

      在当前目录会生成html,打开查看

     

  • 相关阅读:
    22.112.leetcode_path_sum
    21.leetcode111_minimum_depth_of_binary_tree
    20.leetcode110_balanced_binary_tree
    19.leetcode108_convert_sorted_array_to_binary_search_tree
    论文阅读 | RoBERTa: A Robustly Optimized BERT Pretraining Approach
    CheckList:ACL 2020 Best Paper
    激活函数综述
    盘点深度学习中的损失函数
    逻辑回归
    机器学习之参数估计
  • 原文地址:https://www.cnblogs.com/minseo/p/15723258.html
Copyright © 2011-2022 走看看