zoukankan html css js c++ java

python爬虫练习

#百度热搜
# -*- coding:utf-8 -*-
import requests
from bs4 import BeautifulSoup

url = "http://top.baidu.com/buzz?b=1&fr=topindex"
header = {
    "user-agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/83.0.4103.106 Safari/537.36",
}
content = []
r = requests.get(url,headers = header)
respond = r.text
soup = BeautifulSoup(respond,"html.parser")
# HotSearchs = soup.find_all("td",class_="keyword")
# HotSearchs = soup.select("td[class='keyword']")
HotSearchs = soup.find_all("tr")[1:]
for HotSearch in HotSearchs:
    if HotSearch.find(class_ ="list-title") != None:
        title = HotSearch.find(class_ = "list-title").text.encode("iso-8859-1").decode("gbk")
        number = HotSearch.find(class_ = "last").text.strip()
        content.append([title,number])
print(content)

#天气预报
# -*- coding:utf-8 -*-
import requests
from bs4 import BeautifulSoup

url = "http://www.weather.com.cn/weather/101010100.shtml"
header = {
    "user-agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/83.0.4103.106 Safari/537.36",
}
content = []
r = requests.get(url,headers = header)
r.encoding = "utf-8"
respond = r.text
soup = BeautifulSoup(respond,"html.parser")
TodayWeather = soup.select("ul[class='t clearfix']")
for weather in TodayWeather:
    title = weather.select("h1")[0].text
    wea = weather.select(".wea")[0].text
    tem = weather.select(".tem")[0].text.strip()
    win = weather.select(".win")[0].text.strip()
    content.append([title,wea,tem,win])
print(content)

查看全文

相关阅读:
Atitit.atiJsBridge 新特性v7q329
atitit.userService 用户系统设计 v6 q413
atitit.userService 用户系统设计 v6 q413
Atitit.获取某个服务网络邻居列表解决方案
 Hasse神舟笔记本卡logo解决，刷BIOS方法，教你修复神船
 Axure RP 8 注册码
 在本地硬盘安装WinPE系统，实现UEFI引导，摆脱U盘
 cmd实现批量文件的base64加密并双击加密文件后正常运行
 cmd的变量总结
 fiddler几种功能强大的用法

原文地址：https://www.cnblogs.com/python-kp/p/13254943.html