zoukankan      html  css  js  c++  java
  • python第三方库requests

    1.请求分析

    """requests库的应用。requests是比python内置urllib库更好的http请求库"""
    import requests
    from requests.models import Response
    
    
    res = requests.get("https://www.baidu.com/")    # 此处可以换成post,delete,patch等请求
    print(type(res))        # <class 'requests.models.Response'>  Response类
    print(res.status_code)  # 请求返回的状态码 200表示成功
    print(type(res.text))   # str类型
    print(res.text)         # 返回文本属性
    print(res.cookies)      # cookies信息<RequestsCookieJar[<Cookie BDORZ=27315 for .baidu.com/>]>
    
    
    req = requests.get("http://httpbin.org/get")    # 可直接添加字典形式的参数请求params=dict()
    # req = requests.get("http://httpbin.org/get", params=data)
    print(req.headers)      # 获取返回的请求头信息
    """
    {
      "args": {
            # 此处会显示请求的url中参数信息
      }, 
      "headers": {      # 请求头信息
        "Accept": "*/*", 
        "Accept-Encoding": "gzip, deflate", 
        "Connection": "close",      # 连接关闭
        "Host": "httpbin.org", 
        "User-Agent": "python-requests/2.20.1"      # 不设置headers,则显示本身请求信息
      }, 
      "origin": "223.73.146.13", 
      "url": "http://httpbin.org/get"
    }"""
    
    
    # 直接获取返回的字典形式解析结果
    rew = requests.get("http://httpbin.org/get")
    print(type(rew.text))       # str类型
    print(rew.json())     # 调用响应类Response的方法json()转换json字符串为字典
    print(type(rew.json()))     # <class 'dict'>
    View Code

    2.POST请求

    """requests的POST请求和文件上传"""
    import requests
    
    
    data = {"name": "Jim", "age": "26"}
    res = requests.post("http://httpbin.org/post", data=data)
    print(res.text)
    
    
    """
    {
      "args": {}, 
      "data": "", 
      "files": {}, 
      "form": {         # post请求的内容
        "age": "26", 
        "name": "Jim"
      }, 
      "headers": {
        "Accept": "*/*", 
        "Accept-Encoding": "gzip, deflate", 
        "Connection": "close", 
        "Content-Length": "15", 
        "Content-Type": "application/x-www-form-urlencoded",    # 请求数据类型
        "Host": "httpbin.org", 
        "User-Agent": "python-requests/2.20.1"
      }, 
      "json": null, 
      "origin": "223.73.146.12", 
      "url": "http://httpbin.org/post"
    }
    """
    
    
    files = {"file": open("favicon.ico", "rb")}
    ren = requests.post("http://httpbin.org/post", files=files)
    print(ren.text)
    
    
    """
    {
      "args": {}, 
      "data": "", 
      "files": {            # 文件上传内容会有单独的files字段来标明
        "file": "data:application/octet-stream;base64"
      }, 
      "form": {},           # 空form
      "headers": {
        "Accept": "*/*", 
        "Accept-Encoding": "gzip, deflate", 
        "Connection": "close", 
        "Content-Length": "6665", 
        # 文件上传类型
        "Content-Type": "multipart/form-data; boundary=717f83c70b6a0c14c53c66a11897cc72", 
        "Host": "httpbin.org", 
        "User-Agent": "python-requests/2.20.1"
      }, 
      "json": null, 
      "origin": "223.73.146.12", 
      "url": "http://httpbin.org/post"
    }
    """
    View Code

    3.requests获取多媒体数据

    """获取多媒体二进制数据"""
    import requests
    
    
    # 获取多媒体二进制数据,比如图标
    tag = requests.get("https://github.githubassets.com/favicon.ico")
    print(tag.text)     # 因为是二进制数据,不能直接文本显示。乱码
    print(tag.content)  # 字符串以b开头的二进制数据
    # with open文件操作语句,在打开文件执行操作完成之后自动完成文件关闭。里面必须包含__enter__和__exit__方法
    with open("favicon.ico", "wb") as f:
        # f是获取的文件句柄对象,调用文件句柄的方法完成读写操作。在这里是把图标存入当前目录下
        f.write(tag.content)
    View Code

    4.cookies的应用

    """requests中cookies的应用"""
    import requests
    
    
    headers={"User-Agent": "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/71.0.3554.0 Safari/537.36"}
    res = requests.get("https://www.baidu.com", headers=headers)
    print(res.cookies)
    # 调用items()方法转化为元祖组成的列表,遍历输出每一个cookie
    for key, value in res.cookies.items():
        print(key + "=" + value)
    
    
    """
    # 类型RequestsCookieJar
    <RequestsCookieJar[<Cookie BAIDUID=CE10743D4DCCA7E8EAE505F60EBD3992:FG=1 for .baidu.com/>, <Cookie BIDUPSID=CE10743D4DCCA7E8EAE505F60EBD3992 for .baidu.com/>, <Cookie H_PS_PSSID=26523_1450_21091_28206_28132_27750_28140_27508 for .baidu.com/>, <Cookie PSTM=1546168944 for .baidu.com/>, <Cookie delPer=0 for .baidu.com/>, <Cookie BDSVRTM=0 for www.baidu.com/>, <Cookie BD_HOME=0 for www.baidu.com/>]>
    BAIDUID=CE10743D4DCCA7E8EAE505F60EBD3992:FG=1
    BIDUPSID=CE10743D4DCCA7E8EAE505F60EBD3992
    H_PS_PSSID=26523_1450_21091_28206_28132_27750_28140_27508
    PSTM=1546168944
    delPer=0
    BDSVRTM=0
    BD_HOME=0
    """
    
    
    """requests库利用cookies请求知乎的两种方式"""
    import requests
    from requests.cookies import RequestsCookieJar
    
    """
    # 常用
    headers = {
        "cookie": 'q_c1=c5dafb4029a94bd28bab31916755a82a|1546093256000|1546093256000; r_cap_id="NzJjY2ViMWQ2MWQ2NGNmYmE5ZjJjYjA4MmI4OTlmYjM=|1546093256|4fdca7ef25aee38df8b6895fe0e6870b6c4157d7"; cap_id="NTIxY2Q0NWY2NGYzNDMwYjhmZWJiNzkxY2YxNzMxNTE=|1546093256|98ea49da057eacfa5979f20ebc86610106421e92"; l_cap_id="M2JlYTFhZmQzOWRjNDE1NzhmM2VjYjA2YWFmNGViNzI=|1546093256|321a5802717da27fc618ba74eb0dcb928d037974"; d_c0="AODhnmWpvg6PTmDEXzC_5yO9xS5KsJL3g1c=|1546093259"; _xsrf=T3PdhoFZCjirABeT1olJ9o2Ju1WH5PQP; _zap=492572c4-fbf2-48d2-9ac9-ec7ee95a9154; tgw_l7_route=931b604f0432b1e60014973b6cd4c7bc; capsion_ticket="2|1:0|10:1546169369|14:capsion_ticket|44:MWJhZDk3YTYxYTA5NDkxZjhlYWUwODRlNDliY2Y4Mjc=|a303a846033bf574b0bdd3677218f5bb49a56b00890aa86a7a9b0ac7c6b53985"; z_c0="2|1:0|10:1546169409|4:z_c0|92:Mi4xVEljZkJBQUFBQUFBNE9HZVphbS1EaVlBQUFCZ0FsVk5RZm9WWFFCdTAxTV94cHpiQ0xUV2dBRC16RmpacE4xZXp3|e24885ab0706d33abc6e7f45f2eb74769d88bc15366d945d57366efe8f5856a0"; unlock_ticket="AJBCprKETQsmAAAAYAJVTUmzKFwvSS6hhmi4BuDGUgm1G6fZBYhVXw=="; tst=r',
        "User-Agent": "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/71.0.3554.0 Safari/537.36"
    }
    res = requests.get("https://www.zhihu.com", headers=headers)
    print(res.text)
    """
    
    # 比较繁琐
    cookies = 'q_c1=c5dafb4029a94bd28bab31916755a82a|1546093256000|1546093256000; r_cap_id="NzJjY2ViMWQ2MWQ2NGNmYmE5ZjJjYjA4MmI4OTlmYjM=|1546093256|4fdca7ef25aee38df8b6895fe0e6870b6c4157d7"; cap_id="NTIxY2Q0NWY2NGYzNDMwYjhmZWJiNzkxY2YxNzMxNTE=|1546093256|98ea49da057eacfa5979f20ebc86610106421e92"; l_cap_id="M2JlYTFhZmQzOWRjNDE1NzhmM2VjYjA2YWFmNGViNzI=|1546093256|321a5802717da27fc618ba74eb0dcb928d037974"; d_c0="AODhnmWpvg6PTmDEXzC_5yO9xS5KsJL3g1c=|1546093259"; _xsrf=T3PdhoFZCjirABeT1olJ9o2Ju1WH5PQP; _zap=492572c4-fbf2-48d2-9ac9-ec7ee95a9154; tgw_l7_route=931b604f0432b1e60014973b6cd4c7bc; capsion_ticket="2|1:0|10:1546169369|14:capsion_ticket|44:MWJhZDk3YTYxYTA5NDkxZjhlYWUwODRlNDliY2Y4Mjc=|a303a846033bf574b0bdd3677218f5bb49a56b00890aa86a7a9b0ac7c6b53985"; z_c0="2|1:0|10:1546169409|4:z_c0|92:Mi4xVEljZkJBQUFBQUFBNE9HZVphbS1EaVlBQUFCZ0FsVk5RZm9WWFFCdTAxTV94cHpiQ0xUV2dBRC16RmpacE4xZXp3|e24885ab0706d33abc6e7f45f2eb74769d88bc15366d945d57366efe8f5856a0"; unlock_ticket="AJBCprKETQsmAAAAYAJVTUmzKFwvSS6hhmi4BuDGUgm1G6fZBYhVXw=="; tst=r'
    jar = requests.cookies.RequestsCookieJar()      # 生成请求的cookies实例jar
    headers = {
        "User-Agent": "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/71.0.3554.0 Safari/537.36"
    }
    for cookie in cookies.split(";"):
        print(type(cookie))     # 类型str
        key, value = cookie.split("=", 1)   # 变量循环接收值,需指定每一次接收的数量,在这里是1个
        jar.set(key, value)                 # 在jar实例中设置cookies
    # 携带cookies和headers发起请求
    res = requests.get("https://www.zhihu.com", cookies=jar, headers=headers)
    print(res.text)
    View Code

    5.简单的实战作用

    """requests配合re正则实战解析"""
    import requests
    import re
    
    
    headers={"user-agent": "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/71.0.3554.0 Safari/537.36"}
    url = "https://www.zhihu.com/explore"
    # 获取响应的数据
    res = requests.get(url=url, headers=headers)
    # 正则字符串编译成正则表达式对象,.*?是非贪婪模式匹配,匹配的字符串越短越好,re.S使.匹配换行符在内的所有字符
    content_tag = re.compile("explore-feed.*?question_link.*?>(.*?)</a>", re.S)
    # 获取匹配的字符串信息
    title = re.findall(content_tag, res.text)
    print(title)
    View Code

    6.代理设置

    """requests的代理设置proxies和socks。同理可以添加超时设置timeout,
    因为请求分为连接和读取,也可以接收一个元祖传参数。此外还可以做登陆认证,
    直接使用auth认证。
    """
    import requests
    
    
    # 基本的Http代理
    proxies = {
        "http": "http://user:password@192.168.1.102:3780"
    }
    res = requests.get("https://www.taobao.com", proxies=proxies, timeout=1, auth=("username", "password"))
    print(res.text)
    
    # socks协议的代理
    proxies1 = {
        "https": "socks5://user:password@host:port"
    }
    ren = requests.get("https://www.taobao.com", proxies=proxies1, timeout=(1, 2, 3))
    print(ren.text)
    View Code

    7.会话维持

    """requests当中的session会话维持"""
    import requests
    
    
    # 生成会话对象
    ses = requests.Session()
    # 用会话对象请求url并设置cookies
    ses.get("http://httpbin.org/cookies/set/number/1234")
    # 再次使用会话对象获取已设置的cookies信息
    res = ses.get("http://httpbin.org/cookies")
    print(res.text)
    View Code

    8.证书验证

    """requests中SSL证书验证专门用于保护Web通讯的.TLS用于两个应用程序之间提供保密性和数据完整性"""
    import requests
    import urllib3
    import logging
    
    
    logging.captureWarnings(True)   # 捕获警告日志的方式忽略警告
    urllib3.disable_warnings()      # 在这里直接忽略警告
    res = requests.get("https://www.baidu.com", verify=False)       # 不要证书验证,会显示需要证书的警告
    print(res.status_code)
    
    """
    # 返回证书验证的警告信息
    InsecureRequestWarning: Unverified HTTPS request is being made. Adding certificate verification is strongly advised. See: https://urllib3.readthedocs.io/en/latest/advanced-usage.html#ssl-warnings
    InsecureRequestWarning
    200
    """
    
    
    # 指定一个本地证书用作客户端证书,这可以是单个文件(包含密钥和证书)或一个包含两个文件路径的元组.私有证书的key必须是解密的
    ren = requests.get("https://www.baidu.com", cert=("/path/server.crt", "/path/key"))
    print(res.status_code)
    View Code
  • 相关阅读:
    FTP概述
    day1 基础总结
    数据库简介
    数据库基础——写在前面的话
    常用markdown语法入门
    【搬运工】——Java中的static关键字解析(转)
    【搬运工】——初识Lua(转)
    【搬运工】之YSlow安装教程
    Chome——扩展程序,获取更多扩展程序报错
    node.js 下载安装及gitbook环境安装、搭建
  • 原文地址:https://www.cnblogs.com/Guishuzhe/p/10206296.html
Copyright © 2011-2022 走看看