zoukankan      html  css  js  c++  java
  • selenium爬虫 | 爬取疫情实时动态

    import csv
    import selenium.webdriver
    from selenium.webdriver.chrome.options import Options

    class spider():
    def get_msg(self,url):
    global timeNum, provinceDic
    # 无窗口弹出操作
    options = Options()
    options.add_argument('--headless')
    options.add_argument('--disable-gpu')
    driver=selenium.webdriver.Chrome(options=options)
    driver.get(url)
    timeNum=driver.find_element_by_xpath('//*[@id="charts"]/div[2]/span[1]').text#实时
    icbar_confirm=driver.find_element_by_xpath('//*[@id="charts"]/div[3]/div[1]/div[1]').text#全国确诊数
    icbar_suspect=driver.find_element_by_xpath('//*[@id="charts"]/div[3]/div[2]/div[1]').text#疑似病例数
    icbar_cure=driver.find_element_by_xpath('//*[@id="charts"]/div[3]/div[3]/div[1]').text#治愈人数
    icbar_dead=driver.find_element_by_xpath('//*[@id="charts"]/div[3]/div[4]/div[1]').text#死亡人数
    print("{} 全国确诊:{} 疑似病例:{} 治愈人数:{} 死亡人数:{} ".format(timeNum, icbar_confirm, icbar_cure, icbar_dead,icbar_suspect))
    place_current=driver.find_elements_by_css_selector('div[class="place current"]')#湖北省的数据
    place = driver.find_elements_by_css_selector('div[class="place"]')#其他省的数据
    place_= driver.find_elements_by_css_selector('div[class="place "]')#其他省的数据
    place_no_sharp = driver.find_elements_by_css_selector("div[class='place no-sharp ']")#自治区的数据
    tplt = "{0:{4}<10} {1:{4}<15} {2:{4}<15} {3:{4}<15}"
    print(tplt.format("地区","确诊人数","治愈人数","死亡人数",chr(12288)) + " ")
    # 建立一个字典,键为省名,值为省的具体数据
    provinceDic=dict()
    provinceDic["全国"]=["全国",icbar_confirm, icbar_cure, icbar_dead, icbar_suspect]
    places = place_current + place + place_ + place_no_sharp # 所有的行省的数据列表合集
    for place in places:
    # print(place.text)
    name=place.find_element_by_css_selector("span[class='infoName']").text
    confirm=place.find_element_by_css_selector("span[class='confirm'] span").text
    try:
    heal=place.find_element_by_css_selector("span[class='heal '] span").text
    except:
    heal = place.find_element_by_css_selector("span[class='heal hide'] span").text
    try:
    dead=place.find_element_by_css_selector("span[class='dead '] span").text
    except:
    dead=place.find_element_by_css_selector("span[class='dead hide'] span").text
    print(tplt.format(name,confirm,heal,dead,chr(12288)))
    provinceDic[name]=[name,confirm,heal,dead]
    def save_data_as_csv(self,filename,dataList):
    # filename="_".join(time.split(":"))
    filename=filename.replace(":"," ")#调整时间
    with open(filename+".csv","w",newline="") as f:
    writer=csv.writer(f)
    writer.writerow(["地区","确诊人数","治愈人数","死亡人数","疑似病例"])
    for i in dataList:
    writer.writerow(i)
    f.close()
    def main(self):
    url = "https://news.qq.com/zt2020/page/feiyan.htm"
    self.get_msg(url)
    self.save_data_as_csv(timeNum,provinceDic.values())


    billie=spider()
    billie.main()

    
    
  • 相关阅读:
    华为云发送邮件
    activiti act_re_model 分析
    tengine upstream
    zuul压力测试与调优
    idea 快捷键
    kubernetes helm
    编写高质量代码–改善python程序的建议(二)
    编写高质量代码--改善python程序的建议(一)
    总结OpenvSwitch的调试经验
    提高SDN控制器拓扑发现性能
  • 原文地址:https://www.cnblogs.com/billie52707/p/12238787.html
Copyright © 2011-2022 走看看