GET请求
1 import requests 2 3 headers = { 4 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/70.0.3538.110 Safari/537.36' 5 } 6 data = { 7 'wd': '码云' 8 } 9 url = "https://www.baidu.com/s" 10 response = requests.get(url, headers=headers, params=data, verify=False) 11 print(response.status_code) 12 print(response.request.url) 13 print(response.content.decode())
POST请求
1 import json 2 3 import requests 4 import urllib3 5 6 urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning) 7 headers = { 8 'User-Agent': 'Mozilla/5.0 (Linux; Android 6.0; Nexus 5 Build/MRA58N) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/70.0.3538.110 Mobile Safari/537.36' 9 10 } 11 query_string = input("输入你要翻译的汉字:") 12 data = { 13 'query': query_string, 14 'from': 'zh', 15 'to': 'en' 16 17 } 18 url = 'https://fanyi.baidu.com/basetrans' 19 response = requests.post(url=url, headers=headers, data=data, verify=False) 20 # print(response.content.decode()) 21 22 dict_ret = json.loads(response.content.decode()) 23 ret = dict_ret['trans'][0]['dst'] 24 print(ret)
session登录
1 import requests 2 import urllib3 3 4 urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning) 5 session = requests.Session() 6 7 url = "https://login.m.taobao.com/login.htm" 8 headers = { 9 'user-agent': 'Mozilla/5.0 (iPhone; CPU iPhone OS 11_0 like Mac OS X) AppleWebKit/604.1.38 (KHTML, like Gecko) Version/11.0 Mobile/15A372 Safari/604.1' 10 } 11 data = { 12 'TPL_username': "xxx", 13 'TPL_password': "xxx" 14 } 15 session.post(url=url, headers=headers, data=data, verify=False) 16 ret = session.get("https://h5.m.taobao.com/mlapp/olist.html", headers=headers, verify=False) 17 with open('cart.html', 'w', encoding='utf-8') as f: 18 f.write(ret.content.decode('utf-8'))
cookie登录
1 import requests 2 import urllib3 3 4 urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning) 5 6 headers = { 7 'user-agent': 'Mozilla/5.0 (iPhone; CPU iPhone OS 11_0 like Mac OS X) AppleWebKit/604.1.38 (KHTML, like Gecko) Version/11.0 Mobile/15A372 Safari/604.1', 8 'Cookie': 'isg=BHx8iHc6DAlbjzjWCQinbw9ITRru3SevFeYCa1b9iGdKIRyrfoXwL_KTBc9Zclj3' 9 } 10 11 ret = requests.get("https://h5.m.taobao.com/mlapp/olist.html", headers=headers, verify=False) 12 with open('cart.html', 'w', encoding='utf-8') as f: 13 f.write(ret.content.decode('utf-8'))
cookie生成键值对
1 import requests 2 import urllib3 3 4 urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning) 5 url = 'https://h5.m.taobao.com/mlapp/olist.html' 6 headers = { 7 'user-agent': 'Mozilla/5.0 (iPhone; CPU iPhone OS 11_0 like Mac OS X) AppleWebKit/604.1.38 (KHTML, like Gecko) Version/11.0 Mobile/15A372 Safari/604.1', 8 } 9 10 cookies = 'thw=cn; t=d04efea3a8fd36e76208d492e2af8f43; ' 11 cookies = {i.split('=')[0]: i.split('=')[1] for i in cookies.split("; ")} 12 13 print(cookies) 14 ret = requests.get(url, headers=headers, cookies=cookies, verify=False) 15 with open('cart.html', 'w', encoding='utf-8') as f: 16 f.write(ret.content.decode('utf-8'))
cookiejar
1 import requests 2 import urllib3 3 4 urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning) 5 6 url = "https://www.baidu.com/" 7 headers = { 8 'user-agent': 'Mozilla/5.0 (iPhone; CPU iPhone OS 11_0 like Mac OS X) AppleWebKit/604.1.38 (KHTML, like Gecko) Version/11.0 Mobile/15A372 Safari/604.1' 9 } 10 response = requests.get(url, headers=headers, verify=False) 11 12 # 将CookieJar转为字典: 13 cookies_dict = requests.utils.dict_from_cookiejar(response.cookies) 14 print(cookies_dict) 15 # 将字典转为CookieJar: 16 # cookie_dict - 插入到CookieJar中的键/值的字典。 17 # cookiejar - (可选)一个cookiejar来添加cookie。 18 # 覆盖 - (可选)如果为False,则不会使用新的jar替换jar中已有的cookie。 19 cookies = requests.utils.cookiejar_from_dict(cookies_dict, cookiejar=None, overwrite=True) 20 print(cookies) 21 22 # 转换完之后就可以把它赋给cookies 并传入到session中了: 23 # s = requests.Session() 24 # s.cookies = cookies 25 # 26 # print(s.cookies)
url地址解码
1 import requests 2 import urllib3 3 4 urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning) 5 6 url = 'https://www.baidu.com/s?word=人工智能' 7 8 ret = requests.utils.quote(url) 9 print(ret) 10 11 ret = requests.utils.unquote(ret) 12 print(ret) 13 14 headers = { 15 'User-Agent': 'Mozilla/5.0 (iPhone; CPU iPhone OS 11_0 like Mac OS X) AppleWebKit/604.1.38 (KHTML, like Gecko) Version/11.0 Mobile/15A372 Safari/604.1' 16 } 17 response = requests.get(url, timeout=5, headers=headers, verify=False) 18 print(response.content.decode())
异常重试
1 import requests 2 from retrying import retry 3 4 url = "http://www.youtube.com" 5 headers = { 6 'User-Agent': 'Mozilla/5.0 (iPhone; CPU iPhone OS 11_0 like Mac OS X) AppleWebKit/604.1.38 (KHTML, like Gecko) Version/11.0 Mobile/15A372 Safari/604.1' 7 } 8 9 10 # 最大重试3次 异常捕获 11 @retry(stop_max_attempt_number=3) 12 def _parse_url(urls, method, data, proxies): 13 if method == 'POST': 14 response = requests.post(urls, headers=headers, data=data, timeout=3, proxies=proxies) 15 else: 16 response = requests.get(urls, headers=headers, data=data, timeout=3, proxies=proxies) 17 assert response.status_code == 200 18 return response.content.decode() 19 20 21 def parse_url(urls, method='get', data=None, proxies={}): 22 try: 23 html_str = _parse_url(urls, method, data, proxies) 24 except: 25 html_str = None 26 27 return html_str 28 29 30 if __name__ == '__main__': 31 print(parse_url(url))