zoukankan      html  css  js  c++  java
  • Python 基础实战 -- 爬虫(天气查询系统)

      1 #需求:查询地区天气
      2 #分析:第一步,抓取上面所有的天气信息
      3 
      4 from html.parser import HTMLParser
      5 from urllib import request
      6 import pickle
      7 import json
      8 
      9 #解析中国天气网HTML
     10 class WeatherHtmlParser(HTMLParser):
     11     def __init__(self):
     12         self.flag = False
     13         self.weather_data = None
     14         super(WeatherHtmlParser,self).__init__()
     15     
     16     def handle_starttag(self,tag,attr):
     17         if tag == "script":
     18             self.flag = True
     19 
     20     def handle_endtag(self,tag):
     21         if tag == "script":
     22             self.flag = False
     23 
     24     def handle_data(self,data):
     25         if self.flag:
     26             if "var hour3data=" in data:
     27                 data = data.strip("
    ")
     28                 data = data.strip("var hour3data=")
     29                 self.weather_data = json.loads(data)
     30                 
     31 
     32 #全国城市天气预报代码
     33 class CityCodeHtmlParser(HTMLParser):
     34 
     35     def __init__(self):
     36         self.flag = False
     37         self.city_dict = {}
     38         super(CityCodeHtmlParser,self).__init__()
     39 
     40     def handle_starttag(self,tag,attr):
     41         if tag == "p" or tag == "br":
     42             self.flag = True
     43 
     44     def handle_endtag(self,tag):
     45         if tag == "p" or tag == "br":
     46             self.flag = False
     47 
     48     def handle_data(self,data):
     49         if self.flag:
     50             if "=" in data:
     51                 data = data.split("=")
     52                 self.city_dict[data[1]] = data[0]
     53             
     54                 
     55 def printWeatherInfo(func):
     56     def call():
     57         info = func()
     58         if info == None:
     59             return None
     60 
     61         #一天之内的天气
     62         one_day = info["1d"]
     63         for item in one_day:
     64             item = item.split(",")
     65             print("%s::天气:%s; 温度:%s; 风向:%s; 风力:%s" % (item[0],item[2],item[3],item[4],item[5]))
     66 
     67         #未来7天内的天气
     68         flag = input("是否打印未来7天内的天气:")
     69         if flag == "":
     70             seven_day = info["7d"]
     71             for i in range(7):
     72                 if i >= 1:
     73                     for item in seven_day[i]:
     74                         item = item.split(",")
     75                         print("%s::天气:%s; 温度:%s; 风向:%s; 风力:%s" % (item[0],item[2],item[3],item[4],item[5]))
     76         else:
     77             return None
     78 
     79     return call
     80 
     81 
     82 
     83 #抓取天气信息
     84 @printWeatherInfo
     85 def getAllWeather():
     86     city = input("请输入你要查询的城市:")
     87     city = queryCityCode(city)
     88     if city == None:
     89         return None
     90     url_address = "http://www.weather.com.cn/weather1d/%s.shtml" % city
     91     req = request.Request(url_address)
     92     req.add_header("User-Agent","Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/55.0.2883.87 Safari/537.36")
     93     with request.urlopen(req) as html:
     94         data = html.read().decode("utf-8")
     95         html_parser = WeatherHtmlParser()
     96         html_parser.feed(data)
     97         html_parser.close()
     98         return html_parser.weather_data
     99         
    100 
    101         
    102 #查询城市的编码
    103 def queryCityCode(city_name):
    104 
    105     #从网上抓取信息,本来我是想放在文件里的,博客园传不了,我稍微改造了下
    106     #目前代码很丑,先做个记录吧,现在毕竟没工作,先把总体的知识过一遍再说
    107     def getAllCityInfo():
    108         url_address = "http://doc.orz520.com/a/doc/2014/0322/2100581.html"
    109         req = request.Request(url_address)
    110         req.add_header("User-Agent","Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/55.0.2883.87 Safari/537.36")
    111         with request.urlopen(req) as html:
    112             data = html.read().decode("utf-8")
    113             html_parser = CityCodeHtmlParser()
    114             html_parser.feed(data)
    115             html_parser.close()
    116             return html_parser.city_dict
    117 
    118     city_dict = getAllCityInfo()
    119     if city_name not in city_dict:
    120         return None
    121     return city_dict[city_name]
    122         
    123         
    124 getAllWeather()
    125 aa = input()
  • 相关阅读:
    一段路
    memcache 键名的命名规则以及和memcached的区别
    浏览器解释网页时乱码
    windows下安装Apache
    巧用PHP数组函数
    程序返回值的数据结构
    Linux如何生成列表
    判断用户密码是否在警告期内(学习练习)
    判断用户的用户名和其基本组的组名是否一致
    sed笔记
  • 原文地址:https://www.cnblogs.com/jiangchenxi/p/8082571.html
Copyright © 2011-2022 走看看