1 以抓取豆瓣分页为例 2 from re import S 3 4 import requests 5 from setuptools import findall # S多行匹配 M单行匹配 6 from urllib3 import response 7 8 9 def get_all_movies(): 10 page = 10 11 # 请求头 12 header = { 13 'user-agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/85.0.4183.121 Safari/537.36' 14 } 15 for n in range(page): 16 url = f'https://movie.douban.com/top250?start={n * 25}&filter=' 17 response = requests.get(url, headers=header) 18 print(response.text) 19 print(f'==============第{n + 1}页=============') 20 if response.status_code == 200: 21 analysis_data(response.text) 22 else: 23 print(f'爬取失败')