1 import requests 2 from bs4 import BeautifulSoup 3 res=requests.get('http://news.gzcc.cn/html/xiaoyuanxinwen/') 4 res.encoding= 'utf-8' 5 soup =BeautifulSoup(res.text,'html.parser') 6 import re 7 8 def getclick(link): 9 newId = re.search('\_(.*).html', link).group(1).split('/')[1] 10 click = requests.get('http://oa.gzcc.cn/api.php?op=count&id={}&modelid=80'.format(newId)) 11 return click.text.split('.html')[-1].lstrip("('").rstrip("');") 12 13 a=soup.select('li') 14 15 for news in a: 16 if(len(news.select('.news-list-title'))>0): 17 title=news.select('.news-list-title')[0].text 18 description=news.select('.news-list-description')[0].text 19 link=news.a.attrs['href'] 20 resd = requests.get(link) 21 resd.encoding = 'utf-8' 22 soupd = BeautifulSoup(resd.text, 'html.parser') 23 24 content=soupd.select('.show-content')[0].text 25 info=soupd.select('.show-info')[0].text 26 divide=info.split() 27 date= divide[0]+' '+ divide[1] 28 author= divide[2] 29 auditing= divide[3] 30 source= divide[4] 31 clickcount=getclick(link) 32 33 print('新闻链接:' + link) 34 print('新闻标题:' + title) 35 print('新闻描述:' + description) 36 print('新闻正文:' + content) 37 print(date) 38 print(author) 39 print(auditing) 40 print(source) 41 print('点击次数:'+clickcount +'次') 42 break 43 44 45 from datetime import datetime 46 now=datetime.now() 47 print(now) 48 time='2018-04-04 14:53:25 942204' 49 print(type(time)) 50 date=datetime.strptime(time,'%Y-%m-%d %H:%M:%S %f') 51 print(date) 52 print(type(date))
截图: