...
import urllib.request import time from bs4 import BeautifulSoup def url_open(url): response = urllib.request.urlopen(url) return response def parse_html(response): html_content = response.read() html_soup = BeautifulSoup(html_content, 'html.parser', from_encoding='utf-8') tag_lis = html_soup.find_all('li') for li in tag_lis: em = li.find('em') title = li.find_all('span', class_='title') # other = li.find_all('span', class_='other') rating = li.find('span', class_='rating_num') if title != []: rank=em.get_text() print("排名:" + rank + "------评分:" + str(rating.get_text()) + "-------" + title[0].get_text()) if rank==250: return None if int(rank)%25==0: url="https://movie.douban.com/top250?start="+rank+"&filter=" return url url = "https://movie.douban.com/top250?start=0&filter=" if __name__=='__main__': response=url_open(url) start_time=time.time() print("开始:"+str(start_time)) while 1: url=parse_html(response) if url==None: break response=url_open(url) end_time=time.time() print("结束:"+str(end_time)) print("一共用了:"+str(end_time-start_time)+"秒")