zoukankan html css js c++ java

python模块之requests

为什么使用requests：

支持使用Cookie保持会话

支持文件上传

支持自动确定响应内容的编码

对用户来说比较人性化

模拟get请求：

获取token

1 # 使用微信公众平台举例
2 get_param_dict={
3     "grant_type":"**************",
4     "appid":"**************",
5     "secret":"**************",
6 }
7 response = requests.get(url='https://api.weixin.qq.com/cgi-bin/token', # url地址
8                         params=get_param_dict)  # 参数
9 print(response.content.decode('utf-8'))

模拟请求头部信息

注：因为requests请求头是以python，requests发起的，所以大部分接口都会需要手动添加头部信息

 1 # get 模拟请求头部信息，（当你发现数据不对时，就模拟）
 2 # 以百度举例
 3 get_param_dict ={
 4     "wd":"newdream"
 5 }
 6 # 添加头部信息字典（可以使用抓包抓取到头部信息）
 7 header_info_dict = {
 8     "User-Agent":"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_6) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/84.0.4147.105 Safari/537.36",
 9     "Accpet":"text/plain, */*; q=0.01"
10 }
11 response = requests.get(url = 'https://www.baidu.com/s',
12                         params=get_param_dict,headers=header_info_dict)
13 print(response.content.decode('utf-8'))

模拟post请求

import requests,json
# requests模拟发送post请求
# 使用微信公众平台举例
url_param_doct = {"access_token": "43_XcK_1rvR8VPgicGGzq7Vp2QrGx30Kwhy9SSShoVTQs11G_jP9aqhy2bwRQFuG2hYzkwVjphJFfPj8WYQR8vgfu5Xej7KaZBiyPDJ9sYoCKte78sqgtBdCf6N5S8QosNXBOFSEJnzLMbxJwCOTWAgAAANQU"}
post_param_data = {
   "tag" : {     "name" : "我是新标签"   }
}
response = requests.post(url='https://api.weixin.qq.com/cgi-bin/tags/create',
                         params=url_param_doct,
                         # json=post_param_data # 可以使用json
                         data=json.dumps(post_param_data)  # 也可以使用data，但是data要求是字符串，需要使用json模块dumps转化
                         )
print(response.content.decode('utf-8'))

requests上传文件

import requests,os
# post上传文件
current_path = os.path.dirname(__file__) # os模块定位当前路径
excel_path = os.path.join(current_path,'..','data','j.xlsx')  # join拼接
excel_file = {'file':open(excel_path,'rb')}  # 做成字典，open打开文件  rb：只读二进制
response = requests.post(url='https://2.python-requests.org/', # requests官方实例文档地址
                         files=excel_file)   # files传文件
print( response.content.decode('utf-8') )

requests设置代理

import  requests
# 设置代理：为什么设置代理？
# 爬虫类项目，有检测机制
# 防止公司系统有防灌水功能
# 需要FQ做接口的时候
proxy_server = {'http':'http://127.0.0.1:8888',
                'https':'http://127.0.0.1:8888'} # 做一个字典
proxy_user_pass = {
    'https':'http://uesrname:password@127.0.0.1:8888' # 需要用户跟密码使用这个
}
response = requests.get(url= 'https://baidu.com',
                        proxies=proxy_server)  # proxies设置代理关键字
print(response.status_code)

time模块设置请求超时

如果一个请求很久没有结果，就会让整个项目的效率变得非常低，这个时候我们就需要对请求进行强制要求

让他必须在特定的时间内返回结果，否则就报错。

# 设置请求超时
import requests
import time
print(time.time()) # 时间戳
response = requests.get(url='https://www.baidu.com',timeout=3)  # timeout=3： 请求如果在规定时间之内(3秒钟内)没有得到响应，就会抛出超时错误
print(time.time())

retrying模块设置刷新

使用超时参数能够加快我们整体的请求速度，但是在正常的网页浏览过成功，如果发生速度很慢的情况，我们会做的选择是刷新页面

retrying模块就可以帮助我们解决。使用retrying模块提供的retry模块

通过装饰器的方式使用，让被装饰的函数反复执行retry中可以传入参数stop_max_attempt_number,让函数报错后继续重新执行

达到最大执行次数的上限，如果每次都报错，整个函数报错，如果中间有一个成功，程序继续往后执行。

import requests
from retrying import retry


#  如果函数连续调用三次都报错，才会报错,如果三次之中有一次成功，就成功
@retry(stop_max_attempt_number=3)
def get_response(url):
    response = requests.get(url, timeout=2)
    return response
retrying_requests = get_response("https://www.baidu.com")
print(retrying_requests.content.decode())

使用session设置cookie保持会话连接

好处：能够访问登录后的页面

坏处：一套cookie往往对应的是一个用户的信息，请求太频繁有更大的可能性被对方识别为爬虫
如何解决 ?使用多个账号

# 使用requests提供的session模块
import requests
#  构造formdata表单数据，填写自己的账号和密码
post_data = {
    "username": "xxxxx",
    "password": "xxxxx"
}
#  session的使用: 在请求之前创建session对象
session = requests.session()
#  后续的请求都由session来发起,因为session中保存了用户的登陆信息
session.post(url="https://www.baidu.com", data=post_data)
response = session.get("https://www.baidu.com")
#  使用session请求登陆后的界面
print(response.content.decode())

处理证书认证错误

import requests
# 方式一：不验证证书，报警告，返回200
requests.packages.urllib3.disable_warnings()# 直接解决爆红警告

# 方式二不验证证书，报警告，返回200 ，后面拼接verify=False,加这个控制台报警的话，就在加上方式一
response = requests.get('https://www.12306.cn',verify=False)
print(response.content.decode('utf-8'))

# 方式三：安装pyopenssl 安装之后就不会报错
# pip3 install -U requests[security]  

response = requests.get('https://www.12306.cn')
print(response.content.decode('utf-8'))

# 方式四： 加上证书  公司内部 问开发要xxx.crt文件 ，最稳妥
response = requests.get('https://www.12306.cn',cert=('/path/server.crt', '/path/key'))

requests+jsonpath解析数据

hosts = 'https://api.weixin.qq.com' # 主机地址
# 获取token
get_param_dict = {
    "grant_type":"**********",
    "appid":"*************",
    "secret":"***************"
}
response = requests.get('%s/cgi-bin/token'%hosts,params=get_param_dict)
json_obj = response.json() # 转换为json格式
 # json数据解析：从一个json体中取出需要的数据，就叫json数据解析
token_id = jsonpath.jsonpath(json_obj,'$.access_token')[0]
 #  语法：json_obj代表对象，$代表根节点，.access_token代表要取的键，要是遇到多层可以使用下标切片，[0]是因为它返回的是一个列表，所以把值取出来
print(token_id)

查看全文

相关阅读:
docker入门(一)
netstat命令
 grep的小技巧
 gd库的安装
 jar命令的用法详解
 关于awk的范围模式功能问题
 更换文本中第二次出现的字符串内容
 awk打印第n个参数到最后一个技巧/将n行组成一列
 awk -f program.file 功能使用
 shell的变量处理

原文地址：https://www.cnblogs.com/feng-20/p/14624777.html