#-*- coding:utf-8 -*- __author__ = "MuT6 Sch01aR" import requests from bs4 import BeautifulSoup import sys import io sys.stdout = io.TextIOWrapper(sys.stdout.buffer,encoding='gb18030') respons = requests.get( url = "http://www.baidu.com" ) respons.encoding = respons.apparent_encoding soup = BeautifulSoup(respons.text,features="html.parser") target = soup.find('table',attrs={'class':'ListProduct'}) tr_list = target.find_all("tr") for i in tr_list[1:200]: td_list = i.find_all("td") img_url = i.find_all('img') #txt = "姓名:"+td_list[0].text,"照片:"+"http://www.baidu.com/"+td_list[1].find('img').attrs.get("src"),"http://www.baidu.com/"+td_list[2].find('img').attrs.get("src"),"http://www.baidu.com/"+td_list[3].find('img').attrs.get("src"),"http://www.baidu.com/"+td_list[4].find('img').attrs.get("src"),"http://www.baidu.com/"+td_list[5].find('img').attrs.get("src"),"电话:"+td_list[6].text,"信息:"+td_list[7].text,"报名日期:"+td_list[8].text,"微信号:"+td_list[9].text #img_urls = "http://www.baidu.com/"+td_list[1].find('img').attrs.get("src"),"http://www.baidu.com/"+td_list[2].find('img').attrs.get("src"),"http://www.baidu.com/"+td_list[3].find('img').attrs.get("src"),"http://www.baidu.com/"+td_list[4].find('img').attrs.get("src"),"http://www.baidu.com/"+td_list[5].find('img').attrs.get("src") txt = "姓名:"+td_list[0].text,"电话:"+td_list[6].text,"信息:"+td_list[7].text,"报名日期:"+td_list[8].text,"微信号:"+td_list[9].text file_name = td_list[0].text + ".txt" with open(file_name,"wb") as s: s.write(bytes(str(txt).encode("utf-8"))) for a in range(1,6): if td_list[a].find('img').attrs.get("src") != "": img_urls = "http://www.baidu.com/" + td_list[a].find('img').attrs.get("src") img_response = requests.get(url=img_urls) file_name1 = td_list[0].text+str(a)+ ".jpg" with open(file_name1,"wb") as f: f.write(img_response.content) print("++++++++++++++++爬行结束+++++++++++++++")