zoukankan      html  css  js  c++  java
  • python爬虫练习

    #百度热搜
    #
    -*- coding:utf-8 -*- import requests from bs4 import BeautifulSoup url = "http://top.baidu.com/buzz?b=1&fr=topindex" header = { "user-agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/83.0.4103.106 Safari/537.36", } content = [] r = requests.get(url,headers = header) respond = r.text soup = BeautifulSoup(respond,"html.parser") # HotSearchs = soup.find_all("td",class_="keyword") # HotSearchs = soup.select("td[class='keyword']") HotSearchs = soup.find_all("tr")[1:] for HotSearch in HotSearchs: if HotSearch.find(class_ ="list-title") != None: title = HotSearch.find(class_ = "list-title").text.encode("iso-8859-1").decode("gbk") number = HotSearch.find(class_ = "last").text.strip() content.append([title,number]) print(content)
    #天气预报
    #
    -*- coding:utf-8 -*- import requests from bs4 import BeautifulSoup url = "http://www.weather.com.cn/weather/101010100.shtml" header = { "user-agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/83.0.4103.106 Safari/537.36", } content = [] r = requests.get(url,headers = header) r.encoding = "utf-8" respond = r.text soup = BeautifulSoup(respond,"html.parser") TodayWeather = soup.select("ul[class='t clearfix']") for weather in TodayWeather: title = weather.select("h1")[0].text wea = weather.select(".wea")[0].text tem = weather.select(".tem")[0].text.strip() win = weather.select(".win")[0].text.strip() content.append([title,wea,tem,win]) print(content)
  • 相关阅读:
    第八章 路由器交换机及其操作系统的介绍
    k-Tree DP计数
    Drop Voicing 最长升序
    高精度
    1196D2
    C
    POJ 3974 马拉车
    2020.8.1第二十六天
    2020.7.31第二十五天
    每日日报
  • 原文地址:https://www.cnblogs.com/python-kp/p/13254943.html
Copyright © 2011-2022 走看看