zoukankan      html  css  js  c++  java
  • python爬虫练习

    #百度热搜
    #
    -*- coding:utf-8 -*- import requests from bs4 import BeautifulSoup url = "http://top.baidu.com/buzz?b=1&fr=topindex" header = { "user-agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/83.0.4103.106 Safari/537.36", } content = [] r = requests.get(url,headers = header) respond = r.text soup = BeautifulSoup(respond,"html.parser") # HotSearchs = soup.find_all("td",class_="keyword") # HotSearchs = soup.select("td[class='keyword']") HotSearchs = soup.find_all("tr")[1:] for HotSearch in HotSearchs: if HotSearch.find(class_ ="list-title") != None: title = HotSearch.find(class_ = "list-title").text.encode("iso-8859-1").decode("gbk") number = HotSearch.find(class_ = "last").text.strip() content.append([title,number]) print(content)
    #天气预报
    #
    -*- coding:utf-8 -*- import requests from bs4 import BeautifulSoup url = "http://www.weather.com.cn/weather/101010100.shtml" header = { "user-agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/83.0.4103.106 Safari/537.36", } content = [] r = requests.get(url,headers = header) r.encoding = "utf-8" respond = r.text soup = BeautifulSoup(respond,"html.parser") TodayWeather = soup.select("ul[class='t clearfix']") for weather in TodayWeather: title = weather.select("h1")[0].text wea = weather.select(".wea")[0].text tem = weather.select(".tem")[0].text.strip() win = weather.select(".win")[0].text.strip() content.append([title,wea,tem,win]) print(content)
  • 相关阅读:
    【转】Centos yum 换源
    centos7下使用yum安装mysql
    【转】简易smtp调用类
    【转】Beanstalkd 队列简易使用
    【转】mysql 拖库写库用法
    【转】scp 命令格式
    【转】mac os 安装php
    pip 国内源 gem 国内源
    【转】25个必须记住的SSH命令
    NHibernate 有好几种数据库查询方式
  • 原文地址:https://www.cnblogs.com/python-kp/p/13254943.html
Copyright © 2011-2022 走看看