zoukankan      html  css  js  c++  java
  • 天气预报爬虫小程序

    Python3.5
    Mac ios系统
    爬取天气预报的小程序:

    import requests, csv, random, time, socket
    from bs4 import BeautifulSoup
    import http.client

    def get_content(url, data = None):
    header = {
    'Accept': 'text / html, application / xhtml + xml, application / xml;q = 0.9, image / webp, * / *;q = 0.8',
    'Accept - Encoding':'gzip, deflate, sdch',
    'Accept - Language':'zh - CN, zh;q = 0.8',
    'Connection':'keep - alive',
    'User - Agent': 'Mozilla / 5.0(Macintosh;Intel Mac OS X 10 11_6) AppleWebKit / 537.36(KHTML, like Gecko) Chrome / 50.0.2661.102Safari / 537.36'
    }
    timeout = random.choice(range(80,180))
    while True:
    try:
    rep = requests.get(url, headers = header, timeout = timeout)
    rep.encoding = 'utf-8'
    break
    except socket.timeout as e:
    print('3:', e)
    time.sleep(random.choice(range(8,15)))

    except socket.error as e:
    print('4:', e)
    time.sleep(random.choice(range(20,60)))

    except http.client.BadStatusLine as e:
    print('5:', e)
    time.sleep(random.choice(range(30,80)))

    except http.client.ImproperConnectionState as e:
    print('6:', e)
    time.sleep(random.choice(range(5,15)))

    return rep.text

    def get_data(html_text):
    finalFile = []
    bs = BeautifulSoup(html_text, 'html.parser')
    body = bs.body
    data = body.find('div', id="15d")
    ul = data.find('ul')
    li = ul.find_all('li')

    for day in li:
    temp = []
    inf = day.find_all('span')

    date = inf[0].string
    temp.append(date)

    weather = inf[1].string
    temp.append(weather)

    temperature= inf[2].text
    temp.append(temperature)

    wind = inf[3].string
    temp.append(wind)

    wind1 = inf[4].string
    temp.append(wind1)

    finalFile.append(temp)

    return finalFile

    def write_data(data, name):
    file_name = name
    with open(file_name, 'a', errors = 'ignore', newline = '') as f:
    f_csv = csv.writer(f)
    f_csv.writerows(data)

    if __name__ == '__main__':
    url = 'http://www.weather.com.cn/weather15d/101270101.shtml'
    html = get_content(url)
    result = get_data(html)
    write_data(result, 'content.csv')
  • 相关阅读:
    RWIGS and LORBIT (1)
    时间档案:飞秒、皮秒、纳秒、微秒、毫秒、秒 (转自新浪)
    Linux Shell 文本处理工具集锦(转载)
    awk——getline
    PERL 正则表达式简介
    算法的性能
    排序算法与稳定性的理解
    实现双端队列
    实现栈
    实现队列
  • 原文地址:https://www.cnblogs.com/fredkeke/p/5767216.html
Copyright © 2011-2022 走看看