import requests from bs4 import BeautifulSoup import string import time import datetime head = {} head['user-agent']='Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/55.0.2883.87 Safari/537.36' r=requests.get("http://news.gzcc.cn/html/xiaoyuanxinwen/",headers=head) r.encoding='utf-8' soup=BeautifulSoup(r.text,'html.parser') for i in soup.select('li'): if len(i.select(".news-list-title"))>0: a=i.select(".news-list-title")[0].text b=i.select(".news-list-info")[0].contents[0].text c=i.select(".news-list-info")[0].contents[1].text d=i.select("a")[0].attrs['href'] print("标题:"+a+' '+"时间:"+b+' '+"来源:"+c+' '+"链接:"+d+' ') print() print() print() r1=requests.get(d,headers=head) r1.encoding='utf-8' soup=BeautifulSoup(r1.text,'html.parser') e=soup.select(".show-info")[0].text f=e.split() for i in range(len(f)-1): print(f[i],end=' ') print() print() #时间类型转换 dt=e.lstrip('发布时间:')[:19] dt = datetime.datetime.strptime(dt,'%Y-%m-%d %H:%M:%S') print("datetime类型时间:",end=' ') print(dt) print() #作者 i=e.find('作者:') if i>0: s=e[e.find('作者:'):].split()[0].lstrip('作者:') print("作者:",end=' ') print(s) print() #审核 i=e.find('审核:') if i>0: s=e[e.find('审核:'):].split()[0].lstrip('审核:') print("审核:",end=' ') print(s) print() #来源 i=e.find('来源:') if i>0: s=e[e.find('来源:'):].split()[0].lstrip('来源:') print("来源:",end=' ') print(s) print() #摄影 i=e.find('摄影:') if i>0: s=e[e.find('摄影:'):].split()[0].lstrip('摄影:') print("摄影:",end=' ') print(s) print() #打印文章主体 print(soup.select("#content")[0].text) print() print() print()