如何获取一个页面的所有的a链接
####
使用的beautiful soap的方式
import requests from bs4 import BeautifulSoup from ezpymysql import Connection db = Connection( 'localhost', 'spider_test', 'root', 'Ji10201749' ) re = requests.get("https://news.sina.com.cn/") html = re.content bs = BeautifulSoup(html, "html.parser") for item in bs.find_all("a"): item = { 'subject': item.get("href",""), 'url': item.text, } db.table_insert('news_sina_spider', item)
#####