zoukankan      html  css  js  c++  java
  • 关于如何只用python获取网页天气(数据)的方法

    获取网页数据无非就三步!

    第一步:首先通过python获取到前端html完整代码!(需要用到request模块)

    第二步:通过获取到的html代码进行过滤,获取到有用天气数据 (需要用到bs4模块下的BeautifulSoup)

    第三步:获取到的天气数据,进行本地化保存

    PS:其它用到的模块:time、random、socket、csv、http.client

    废话不多说,直接上代码!

    首先,导入引用模块

    from bs4 import BeautifulSoup
    import requests, time, random, socket, csv
    import http.client

    第一步:首先通过python获取到前端html完整代码!(需要用到request模块)

    # 获取请求网址的完整HTML代码
    def htmlcontent(url, data=None):
        header = {
            'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8',
            'Accept-Encoding': 'gzip, deflate, sdch',
            'Accept-Language': 'zh-CN,zh;q=0.8',
            'Connection': 'keep-alive',
            'User-Agent': 'Mozilla/5.0 (Windows NT 6.3; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/43.0.235'
        }   # request 的请求头
        timeout = random.choice(range(80, 180))
        while True:
            try:
                rep = requests.get(url, headers=header, timeout=timeout)   # 请求url地址,获得返回response信息
                rep.encoding = 'utf-8'
                break
            except socket.timeout as e:
                print('3:', e)
                time.sleep(random.choice(range(8, 15)))
    
            except socket.error as e:
                print('4:', e)
                time.sleep(random.choice(range(20, 60)))
    
            except http.client.BadStatusLine as e:
                print('5:', e)
                time.sleep(random.choice(range(30, 80)))
    
            except http.client.IncompleteRead as e:
                print('6:', e)
                time.sleep(random.choice(range(5, 15)))
        return rep.text   # 返回的Html全部代码
    第一步

    第二步:通过获取到的html代码进行过滤,获取到有用天气数据 (需要用到bs4模块下的BeautifulSoup)

    # 过滤筛选有用数据
    def weatherdata(html_text):
        data_al = []
        bs = BeautifulSoup(html_text, "html.parser")   # 创建BeautifulSoup对象并以html.parser方式解析
        li = bs.body.find('div', {'id': '7d'}).find('ul').find_all('li')   # 根据前端HTML代码的标签获取具体天气数据
    
        for data in li:
            temp = []
            date = data.find('h1').string
            inf = data.find_all('p')
            weather = inf[0].string   # 天气
            temperature_highest = inf[1].find('span').string    # 最高温度
            temperature_low = inf[1].find('i').string   # 最低温度
            temp.append(date)   # 添加日期
            temp.append(weather)    # 添加天气
            temp.append(temperature_low)    # 添加最低温度
            temp.append(temperature_highest)    # 添加最高温度
            data_al.append(temp)  # 数据全部储存在一个列表中
        return data_al
    第二步

    第三步:获取到的天气数据,进行本地化保存

    # 把数据写入本地文件
    def writedata(data, name):
        with open(name, 'a', errors='ignore', newline='') as f:
            f_csv = csv.writer(f)
            f_csv.writerows(data)
    第三步

    最后,进行调用

    if __name__ == '__main__':
        url = 'http://www.weather.com.cn/weather/101010100.shtml'   # 获取天气数据的网址
        html = htmlcontent(url)    # 获取网页信息
        result = weatherdata(html)    # 解析网页信息,拿到需要的数据
        writedata(result, 'C:/Users/LoveCounter/Desktop/天气test.csv')  # 数据写入到 csv文档中

    完整性代码,如下:

    from bs4 import BeautifulSoup
    import requests, time, random, socket, csv
    import http.client
    
    
    # 获取请求网址的完整HTML代码
    def htmlcontent(url, data=None):
        header = {
            'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8',
            'Accept-Encoding': 'gzip, deflate, sdch',
            'Accept-Language': 'zh-CN,zh;q=0.8',
            'Connection': 'keep-alive',
            'User-Agent': 'Mozilla/5.0 (Windows NT 6.3; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/43.0.235'
        }   # request 的请求头
        timeout = random.choice(range(80, 180))
        while True:
            try:
                rep = requests.get(url, headers=header, timeout=timeout)   # 请求url地址,获得返回response信息
                rep.encoding = 'utf-8'
                break
            except socket.timeout as e:
                print('3:', e)
                time.sleep(random.choice(range(8, 15)))
    
            except socket.error as e:
                print('4:', e)
                time.sleep(random.choice(range(20, 60)))
    
            except http.client.BadStatusLine as e:
                print('5:', e)
                time.sleep(random.choice(range(30, 80)))
    
            except http.client.IncompleteRead as e:
                print('6:', e)
                time.sleep(random.choice(range(5, 15)))
        return rep.text   # 返回的Html全部代码
    
    # 过滤筛选有用数据
    def weatherdata(html_text):
        data_al = []
        bs = BeautifulSoup(html_text, "html.parser")   # 创建BeautifulSoup对象并以html.parser方式解析
        li = bs.body.find('div', {'id': '7d'}).find('ul').find_all('li')   # 根据前端HTML代码的标签获取具体天气数据
    
        for data in li:
            temp = []
            date = data.find('h1').string
            inf = data.find_all('p')
            weather = inf[0].string   # 天气
            temperature_highest = inf[1].find('span').string    # 最高温度
            temperature_low = inf[1].find('i').string   # 最低温度
            temp.append(date)   # 添加日期
            temp.append(weather)    # 添加天气
            temp.append(temperature_low)    # 添加最低温度
            temp.append(temperature_highest)    # 添加最高温度
            data_al.append(temp)  # 数据全部储存在一个列表中
        return data_al
    
    
    # 把数据写入本地文件
    def writedata(data, name):
        with open(name, 'a', errors='ignore', newline='') as f:
            f_csv = csv.writer(f)
            f_csv.writerows(data)
    
    
    if __name__ == '__main__':
        url = 'http://www.weather.com.cn/weather/101010100.shtml'   # 获取天气数据的网址
        html = htmlcontent(url)    # 获取网页信息
        result = weatherdata(html)    # 解析网页信息,拿到需要的数据
        writedata(result, 'C:/Users/LoveCounter/Desktop/天气test.csv')  # 数据写入到 csv文档中
    获取天气完整性代码
  • 相关阅读:
    Codeforces 834D The Bakery
    hdu 1394 Minimum Inversion Number
    Codeforces 837E Vasya's Function
    Codeforces 837D Round Subset
    Codeforces 825E Minimal Labels
    Codeforces 437D The Child and Zoo
    Codeforces 822D My pretty girl Noora
    Codeforces 799D Field expansion
    Codeforces 438D The Child and Sequence
    Codeforces Round #427 (Div. 2) Problem D Palindromic characteristics (Codeforces 835D)
  • 原文地址:https://www.cnblogs.com/ngz311616/p/9525045.html
Copyright © 2011-2022 走看看