Requests请求库
import requests
# -*- coding:utf8 -*-
# -*- coding:utf8 -*-
# 工程路径:3 requests请求库使用.py
# 工程日期:8/6/2019
# 工程目标:
"""
requests 7个主要方法:
requests.request(): 构造一个请求,支撑一下各方法的基础方法
requests.get(): 获取HTML网页的主要方法,对应HTTP的GET
requests.head(): 获取HTML网页头的信息方法,对应HTTP的HEAD
requests.post(): 向HTML网页提交POST请求方法,对应HTTP的POST
requests.put(): 向HTML网页提交PUT请求的方法,对应HTTP的RUT
requests.patch(): 向HTML网页提交局部修改请求,对应于HTTP的PATCH
requests.delete(): 向HTML页面提交删除请求,对应HTTP的DELETE
13个参数 requests.request(method,url,**kwargs)
method:请求方式,对应get/put/post等7种
requests.method(url, **kwargs)
url:拟获取页面的url链接
**kwargs:控制访问参数,共13个,均为可选项:
params: 字典或字节序列,作为参数增加到url中
data: 字典,字节序列或文件对象,作为Request的内容
json: JSON格式的数据,作为Request的内容
headers: 字典,HTTP定制头(模拟浏览器进行访问)
cookies: 字典或CpplieJar,Request中的cookie
auth: 元祖,支持HTTP认证功能
files: 字典类型,传输文件
timeout: 设定超时时间,秒为单位
proxies: 字典类型,设定访问代理服务器,可以增加登陆认证
allow_redirects:True//False, 默认为True,重定向开关
stream:True/False,默认为True, 获取内容立即下载开关
verify:True/False,默认为True, 认证SSL证书开关
cert: 本地SSL证书路径
"""
#%%
import requests
response = requests.get('http://www.baidu.com/')
print(type(response)) #查看response的类型
print(requests.status_codes) # 状态码
print(response.text) # 输出响应内容
print(response.headers) # 返回响应头
print(requests.cookies)
#%% get 方法传传参 添加参数,headers等
import requests
data = {
"s?tn" : "02003390_30_hao_pg",
'wd':'美女'
}
header = {}
response = requests.get('http://www.taobao.com/',params=data )
print(response.text)
print(response.url)
#%% 解析json
import requests
import json
response = requests.get('http://www.baidu.com/')
print(response.json())
#%% 获取二进制流数据
import requests
response = requests.get('https://hbimg.huabanimg.com/6519f3b9d79be866403eb8d33ea5fa9ca5e3e5a2e40f6-Fzf6yq_fw658')
with open('tupian.jpg','wb') as f:
f.write(response.content)
f.close()
#%% 响应属性
import requests
response = requests.get('http://www.baidu.com')
print(response.content)
print(response.url)
print(response.headers)
print(response.text)
print(response.cookies)
print(response.encoding)
print(response.history)
print(response.next)
#%%状态码判断
import requests
response = requests.get('http://www.baidu.com')
if response.status_code == requests.codes.ok:
print("ok")
#%% 取cookies
import requests
response = requests.get('http://www.baidu.com')
print(response.cookies)
for key, value in response.cookies.items():
print(key + '=' + value)
#%% 会话维持 session
#%% 证书验证
# 大部分的网站为https网站, 需要证书验证 非官方认证的证书网站会发生ssl报错
# 为避免该类型的异常抛出,将证书的参数设置为false
import requests
# response = requests.get('https://www.12306.cn',verify = False)
response = requests.get('https://www.12306.cn')
print(response.status_code)
print(response.content)
#%% 代理设置
# 声明字典类型的代理集,作为代理参数传即可
import requests
proxies = {
'http':'http://127.0.0.1:1080'
#'https': 'https://127.0.1.7:1060'
}
response = requests.get('https://www.12306.cn',verify = False, proxies=proxies)
print(response.content)
#%% 异常处理
# requestsexception(ioerror)
# 父类异常为requestexception 继承IOerror
# requests的异常也可以捕获子类 connectionerror、urlrequerd、toomanyredirects、httperror
# connecttimeout、readtimeout、timeout、sslerror、proxyerror 异常
import requests
from requests.exceptions import ReadTimeout, HTTPError, RequestException
response = requests.get('http://www.baidu.com')
try:
response = requests.get('https://www.baidu.com', timeout=0.1)
print(response.status_code)
except ReadTimeout:
print("超时错误")
except ConnectionError:
print("连接错误")
except RequestException