#coding:utf-8 #---------------------------------------------------------------------------------------------------------- # 功能:爬取汽车之家的新闻 #---------------------------------------------------------------------------------------------------------- # pip3 install requests # pip3 install BeautifulSoup4 import requests from bs4 import BeautifulSoup # 获取一个新闻标题 # response = requests.get('http://www.autohome.com.cn/news/') # response.encoding = 'gbk' # soup = BeautifulSoup(response.text, 'html.parser')# 解析成对象 # tag = soup.find(id='auto-channel-lazyload-article') # h3 = tag.find(name='h3') # print(h3) #找到所有的新闻,包括标题、简洁、url、图片 response = requests.get('http://www.autohome.com.cn/news/') response.encoding = 'gbk' # 注意编码方式 # print(response.text) soup = BeautifulSoup(response.text, 'html.parser')# 解析成对象 li_list = soup.find(id='auto-channel-lazyload-article').find_all(name='li') i = 1 for li in li_list: title = li.find('h3') # 查找新闻标题[根据标签] if not title: continue # print(title.text) summary = li.find('p').text # 查找新闻简介[根据标签] # print(summary) # li.find('a').attrs,得到一个字典 # li.find('a').attrs['href'],和下面的效果一样 url = li.find('a').get('href') # 查找新闻标题的超链接url[根据属性] # print(url) img = li.find('img').get('src') # 查找图片,其实得到的也是url # print(img) # -------------------------要保存图片的话需要再次发送请求,保存到本地------------------------------------- img = 'https:' + img # 补全url # print(img) # print(title.text, url, summary, img) res = requests.get(img) # 发送请求 file_name = "%s.jpg" % (i,) # 设置图片名称为1,2,3,。。 i+=1 # print(file_name) with open(file_name, 'wb') as f: # 保存图片到当前文件夹下 f.write(res.content) # 注意这里是二进制[res.content]