zoukankan      html  css  js  c++  java
  • 第5课-中国天气网爬虫案例

    一、中国天气网爬虫案例

    #中国天气网爬虫
    
    import  requests
    from pyecharts.charts import Bar
    from bs4 import BeautifulSoup
    import copy
    
    import html5lib
    datas = []
    data = {
        "city":None,
        "day":None,
        "higher_temp":None,
        "lower_temp":None
    }
    
    HEADERS = {
        "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/78.0.3904.108 Safari/537.36",
        "Referer": "http://www.weather.com.cn/textFC/db.shtml"
    }
    def weather_spider_dome(url):
        html = requests.get(url=url,headers=HEADERS).content.decode("utf-8")
        soup = BeautifulSoup(html,"html5lib")
        conMidtabs = soup.find_all(attrs={"class":"conMidtab"})
        if url.find("gat")!= "-1":
            for conMidtab in conMidtabs:
                tables = conMidtab.find(attrs={"class":"conMidtab2"}).find_all("table")
                for table in tables:
                    trs = table.find_all("tr")
                    for i,tr in enumerate(trs):
                        global cur_day
                        tds = trs[i].find_all("td")
                        if i == 0:
                            start = tds[2].string.find("(")
                            end = tds[2].string.find(")")
                            cur_day = tds[2].string[start+1:end]
                            data["day"] = cur_day
                        elif i==2:
                            data["city"] = list(tds[1].stripped_strings)[0]
                            higher_temp = tds[4].string
                            lower_temp = tds[7].string
                            data["higher_temp"] = higher_temp
                            data["lower_temp"] = lower_temp
                            datas.append(copy.copy(data))
                        elif i>=3:
                            data["city"] = list(tds[0].stripped_strings)[0]
                            higher_temp = tds[3].string
                            lower_temp = tds[6].string
                            data["higher_temp"] = higher_temp
                            data["lower_temp"] = lower_temp
                            datas.append(copy.copy(data))
        else:
            for conMidtab in conMidtabs:
                conMidtab2s = conMidtab.find_all(attrs={"class":"conMidtab2"})
                for conMidtab2 in conMidtab2s:
                    trs = conMidtab2.find_all("tr")
                    for i,tr in enumerate(trs):
                        tds = trs[i].find_all("td")
                        if i == 0:
                            start = tds[2].string.find("(")
                            end = tds[2].string.find(")")
                            cur_day = tds[2].string[start+1:end]
                            data["day"] = cur_day
                        elif i>1:
                            higher_temp = ""
                            lower_temp = ""
                            if i==2:
                                higher_temp = copy.copy(tds[4].string)
                                lower_temp = copy.copy(tds[7].string)
                                data["higher_temp"] = higher_temp
                                data["lower_temp"] = lower_temp
                            elif i>2:
                                higher_temp = tds[3].string
                                lower_temp = tds[6].string
                            data["higher_temp"] = higher_temp
                            data["lower_temp"] = lower_temp
                            data["city"] = list(tds[0].stripped_strings)
                            print(data)
                            datas.append(copy.copy(data))
    
    
    
    
    if __name__=="__main__":
       urls = ["http://www.weather.com.cn/textFC/hb.shtml",
               "http://www.weather.com.cn/textFC/db.shtml",
               "http://www.weather.com.cn/textFC/hd.shtml",
               "http://www.weather.com.cn/textFC/hz.shtml",
               "http://www.weather.com.cn/textFC/hn.shtml",
               "http://www.weather.com.cn/textFC/xb.shtml",
               "http://www.weather.com.cn/textFC/xn.shtml",
               "http://www.weather.com.cn/textFC/gat.shtml"]
    
       for url in urls:
           weather_spider_dome(url)
       for i in datas:
           print(i)
       # cities = []
       # temp = []
       # for i in datas:
       #     if i["day"] == "12月11日":
       #         cities.append(i["city"])
       #         cities.append(i["city"])
       #         temp.append(i["higher_temp"])
       #         temp.append(i["lower_temp"])
       # print(cities)
       # print(temp)
       # bar = Bar()
       #
       #
       # bar.add_xaxis(cities)
       # bar.add_yaxis("12月11日", temp)
       # bar.render("weather.html")
    

      

  • 相关阅读:
    需求分析的方法与实践
    系统架构分析与设计方法论
    装修-3
    装修-2
    装修-1
    daikuan
    JAVA容器全面总结
    超图8C iserver启动成功,访问不了网站localhost:8090/iserver/manager,显示404
    Arcgis中给字段添加属性域
    arcgis for server搭建集群环境
  • 原文地址:https://www.cnblogs.com/win0211/p/12024984.html
Copyright © 2011-2022 走看看