注释:
1、本例子仅为测试代码有效性,故只选取了四个城市进行数据获取:
2、本例尚有可优化之处,例如代码的简洁性、循环输入城市名字等;
1 #抓取天气网站最近7天的天气情况,写入文件并在控制台显示 2 from bs4 import BeautifulSoup #用来代替正则表达式取源码中相应标签的内容 3 import random 4 import requests #用来抓取网页的html源代码 5 import socket #用做异常处理 6 import time 7 import http.client #用做异常处理 8 import csv 9 10 def get_html(url,data=None): 11 """ 12 模拟浏览器来获取网页的html代码 13 """ 14 header={ 15 'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8', 16 'Accept-Encoding': 'gzip, deflate, sdch', 17 'Accept-Language': 'zh-CN,zh;q=0.8', 18 'Connection': 'keep-alive', 19 'User-Agent': 'Mozilla/5.0 (Windows NT 6.3; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/43.0.235' 20 } 21 #设定超时时间,取随机数是因为防止被网站认为是爬虫 22 timeout=random.choice(range(80,180)) 23 while True: 24 try: 25 rep=requests.get(url,headers=header,timeout=timeout) 26 rep.encoding="utf-8" 27 break 28 except socket.timeout as e: 29 print("3:",e) 30 time.sleep(random.choice(range(8,15))) 31 32 except socket.error as e: 33 print("4:",e) 34 time.sleep(random.choice(range(20,60))) 35 except http.client.BadStatusLine as e: 36 print("5:",e) 37 time.sleep(random.choice(range(30,80))) 38 39 except http.client.IncompleteRead as e: 40 print("6:",e) 41 time.sleep(random.choice(range(5,15))) 42 43 return rep.text 44 45 def get_data(html_txt): 46 final=[] 47 bs=BeautifulSoup(html_txt,"html.parser") #创建BeautifulSoup对象 48 body=bs.body #获取body部分 49 data=body.find("div",{"id":"7d"}) #找到id为7d的div 50 ul=data.find("ul") #获取ul部分 51 li=ul.find_all("li") #获取所有的li 52 53 for day in li: #对每个标签中的内容进行遍历 54 temp=[] 55 date=day.find("h1").string #获取日期 56 temp.append(date) #将日期添加到temp 中 57 inf=day.find_all("p") #找到li中的所有p标签 58 temp.append(inf[0].string) #将第一个p标签中的内容添加到temp列表中红 59 if inf[1].find("span") is None: 60 temperature_high=None #傍晚没有最高气温 61 else: 62 temperature_high=inf[1].find("span").string #最高气温 63 temperature_high=temperature_high.replace("℃","") 64 temperature_lower=inf[1].find("i").string #找到最低温 65 temperature_lower=temperature_lower.replace("℃","") 66 temp.append(temperature_high) 67 temp.append(temperature_lower) 68 final.append(temp) #将temp添加到final中 69 70 return final 71 72 def write_data(data, name): 73 file_name = name 74 with open(file_name, 'a', errors='ignore', newline='') as f: 75 f_csv = csv.writer(f) 76 f_csv.writerows(data) 77 78 def get_url(): 79 city={ 80 "海口":"101310101", 81 "三亚":"101310201", 82 "苏州":"101190401", 83 "郑州":"101180101" 84 } 85 for k in city: 86 print(k) 87 city_name=input("请输入你要查询的城市名字:") 88 city_num=city[city_name] 89 weather_url="http://www.weather.com.cn/weather/%s.shtml"%city_num 90 return weather_url 91 92 if __name__=="__main__": 93 # url="http://www.weather.com.cn/weather/101190401.shtml" 94 url=get_url() 95 html=get_html(url) 96 result=get_data(html) 97 write_data(result,"weather.csv") 98 for i in result: 99 print(i) #打印天气情况