用my_fake_useragent生成headers
import random from my_fake_useragent import UserAgent def gererateUserAgent(): headers = {} headers_list = ua.get_useragent_list() headers['User-Agent'] = random.choice(headers_list) headers = {'User-Agent': headers['User-Agent'] }
用生成的headers去请求网页总是一会就报错了
requests.exceptions.InvalidHeader: Invalid return character or leading space in header: user-agent
查询是因为生成的headers会出现空格,实例如下
#错误的headers headers = { # 请求头 "user-agent":" Mozilla/5.0(Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/69.0.3497.92 Safari/537.36" } #正确的headers headers = { # 请求头 "user-agent":"Mozilla/5.0(Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/69.0.3497.92 Safari/537.36" }
为避免后续错误,通过headers_list内容,手动生成了一个headers字典,用作以后请求
headers_list = [ 'Opera/7.10 (Windows NT 5.1; U) [en]', 'Opera/9.80 (X11; Linux i686; U; it) Presto/2.5.24 Version/10.54', 'Opera/9.64 (X11; Linux i686; U; de) Presto/2.1.1', 'Mozilla/5.0 (Windows NT 5.2; U; ru; rv:1.9.1.6) Gecko/20091201 Firefox/3.5.6 Opera 10.70', 'Mozilla/5.0 (Windows; U; Windows NT 6.1; fr; rv:1.9.2.2) Gecko/20100316 Firefox/3.6.2 GTB7.0', 'Mozilla/5.0 (Macintosh; U; PPC Mac OS X; en-gb) AppleWebKit/85.8.5 (KHTML, like Gecko) Safari/85.8.1', ]
--------找找其他问题的时候发现一个自动headers的方法---------
import requests url = "http://www.xicidaili.com/" def getData(url): header = { 'User-Agent': '', 'Accept': '', 'Accept-Encoding': '', 'Accept-Language': '' } session1 = requests.Session() session1.headers.update(header) r = session1.get(url) content = r.text return content getData(url)
#来自https://www.cnblogs.com/smart-zihan/p/9471939.html