1 import requests 2 for i in range(20): 3 r=requests.get("http://www.baidu.com") 4 s=r.text 5 f=r.content 6 d1=len(r.text) 7 d2=len(r.content) 8 print(s,f,d1,d2)
1,访问百度网页20次,结果:content
text:
长度都为2381
2,中国最好大学排名
1 import requests 2 import bs4 3 from bs4 import BeautifulSoup 4 def getHTMLText(url): 5 try: 6 r=requests.get(url,timeout=30) 7 r.raise_for_status() 8 r.encoding=r.apparent_encoding 9 return r.text 10 except: 11 return '' 12 13 def fillUnivList(ulist,html): 14 soup=BeautifulSoup(html,'html.parser') 15 for tr in soup.find('tbody').children: 16 if isinstance(tr,bs4.element.Tag): 17 tds=tr('td') 18 ulist.append([tds[0].string,tds[1].string,tds[3].string]) 19 def printUnivList(ulist,num): 20 tplt="{0:^10} {1:{3}^10} {2:^10}" 21 print(tplt.format('排名','学校名称','总分',chr(12288))) 22 for i in range(num): 23 u=ulist[i] 24 print(tplt.format(u[0],u[1],u[2],chr(12288))) 25 def main(): 26 uinfo=[] 27 url='http://www.zuihaodaxue.cn/zuihaodaxuepaiming2016.html' 28 html=getHTMLText(url) 29 fillUnivList(uinfo,html) 30 printUnivList(uinfo,10) 31 main()
结果: