1 import requests 2 import time 3 4 def getHTMLText(url): 5 try: 6 r= requests.get(url, timeout= 20) 7 r.raise_for_status() 8 r.encoding= r.apparent_encoding 9 return r.text 10 except: 11 return "异常退出" 12 13 def main(): 14 url = "https://www.baidu.com" 15 start= time.time() 16 for i in range(100): 17 getHTMLText(url) 18 print('第%d次成功'%(i+1)) 19 20 end= time.time() 21 print("爬取用时为: ", (end- start), "秒") 22 23 if __name__ == "__main__": 24 main()
基本爬虫框架,注意raise_fo_status与try,except的配合使用,方便检查错误