zoukankan html css js c++ java

天气预报爬虫小程序

Python3.5
Mac ios系统
爬取天气预报的小程序：

import requests, csv, random, time, socket
from bs4 import BeautifulSoup
import http.client

def get_content(url, data = None):
    header = {
    'Accept': 'text / html, application / xhtml + xml, application / xml;q = 0.9, image / webp, * / *;q = 0.8',
    'Accept - Encoding':'gzip, deflate, sdch',
    'Accept - Language':'zh - CN, zh;q = 0.8',
    'Connection':'keep - alive',
    'User - Agent': 'Mozilla / 5.0(Macintosh;Intel Mac OS X 10 11_6) AppleWebKit / 537.36(KHTML, like Gecko) Chrome / 50.0.2661.102Safari / 537.36'
    }
    timeout = random.choice(range(80,180))
    while True:
        try:
            rep = requests.get(url, headers = header, timeout = timeout)
            rep.encoding = 'utf-8'
            break
        except socket.timeout as e:
            print('3:', e)
            time.sleep(random.choice(range(8,15)))

        except socket.error as e:
            print('4:', e)
            time.sleep(random.choice(range(20,60)))

        except http.client.BadStatusLine as e:
            print('5:', e)
            time.sleep(random.choice(range(30,80)))

        except http.client.ImproperConnectionState as e:
            print('6:', e)
            time.sleep(random.choice(range(5,15)))

    return rep.text

def get_data(html_text):
    finalFile = []
    bs = BeautifulSoup(html_text, 'html.parser')
    body = bs.body
    data = body.find('div', id="15d")
    ul = data.find('ul')
    li = ul.find_all('li')

    for day in li:
        temp = []
        inf = day.find_all('span')

        date = inf[0].string
        temp.append(date)

        weather = inf[1].string
        temp.append(weather)

        temperature= inf[2].text
        temp.append(temperature)

        wind = inf[3].string
        temp.append(wind)

        wind1 = inf[4].string
        temp.append(wind1)

        finalFile.append(temp)

    return finalFile

def write_data(data, name):
    file_name = name
    with open(file_name, 'a', errors = 'ignore', newline = '') as f:
        f_csv = csv.writer(f)
        f_csv.writerows(data)

if __name__ == '__main__':
    url = 'http://www.weather.com.cn/weather15d/101270101.shtml'
    html = get_content(url)
    result = get_data(html)
    write_data(result, 'content.csv')

查看全文

相关阅读:
[转]趣题：一个n位数平均有多少个单调区间？---- From Matrix67
2015编程之美复赛
 Codeforces Round #304 (Div. 2)
HDU 5226
HDU 5225
HDU 3666
HDU 4598
Codeforces Round #303 (Div. 2) E
编程之美初赛第二场AB
2015 编程之美初赛第一场 AC题

原文地址：https://www.cnblogs.com/fredkeke/p/5767216.html