用my_fake_useragent生成headers
import random
from my_fake_useragent import UserAgent
def gererateUserAgent():
headers = {}
headers_list = ua.get_useragent_list()
headers['User-Agent'] = random.choice(headers_list)
headers = {'User-Agent': headers['User-Agent'] }
用生成的headers去请求网页总是一会就报错了
requests.exceptions.InvalidHeader: Invalid return character or leading space in header: user-agent
查询是因为生成的headers会出现空格,实例如下
#错误的headers
headers = { # 请求头
"user-agent":" Mozilla/5.0(Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/69.0.3497.92 Safari/537.36"
}
#正确的headers
headers = { # 请求头
"user-agent":"Mozilla/5.0(Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/69.0.3497.92 Safari/537.36"
}
为避免后续错误,通过headers_list内容,手动生成了一个headers字典,用作以后请求
headers_list = [
'Opera/7.10 (Windows NT 5.1; U) [en]',
'Opera/9.80 (X11; Linux i686; U; it) Presto/2.5.24 Version/10.54',
'Opera/9.64 (X11; Linux i686; U; de) Presto/2.1.1',
'Mozilla/5.0 (Windows NT 5.2; U; ru; rv:1.9.1.6) Gecko/20091201 Firefox/3.5.6 Opera 10.70',
'Mozilla/5.0 (Windows; U; Windows NT 6.1; fr; rv:1.9.2.2) Gecko/20100316 Firefox/3.6.2 GTB7.0',
'Mozilla/5.0 (Macintosh; U; PPC Mac OS X; en-gb) AppleWebKit/85.8.5 (KHTML, like Gecko) Safari/85.8.1',
]
--------找找其他问题的时候发现一个自动headers的方法---------
import requests
url = "http://www.xicidaili.com/"
def getData(url):
header = {
'User-Agent': '',
'Accept': '',
'Accept-Encoding': '',
'Accept-Language': ''
}
session1 = requests.Session()
session1.headers.update(header)
r = session1.get(url)
content = r.text
return content
getData(url)
#来自https://www.cnblogs.com/smart-zihan/p/9471939.html