zoukankan      html  css  js  c++  java
  • requests的基本使用

    1. 简单获取一个页面信息

     1 import requests
     2 
     3 # requests可以通过requests的get、post、put等方法进行请求调用
     4 r = requests.get('https://www.baidu.com/')
     5 print(r)
     6 print(type(r))
     7 print(r.status_code)
     8 print(type(r.text))
     9 print(r.text)
    10 print(r.cookies)

    2. 通过params参数,get请求进行url字符串拼接

     1 import requests
     2 
     3 data = {
     4     'name':'dmr',
     5     'age':25,
     6 }
     7 # 将data传给参数params,会把url拼接成?key1=value1&key2=value2的形式
     8 r = requests.get(url='http://httpbin.org/get', params=data)
     9 print(r.text, r.url)
    10 # r.text返回的是一个字典类型的json格式,通过json方法解析可以获取到一个字典
    11 print(type(r.text))
    12 print(r.json())
    13 print(type(r.json()))

    3. 抓取二进制数据(如图片等)

    1 import requests
    2 
    3 r = requests.get('https://dss0.bdstatic.com/5aV1bjqh_Q23odCf/static/superman/img/logo_top-e3b63a0b1b.png')
    4 # r.text字符串格式;r.content字节格式
    5 print(r.text, r.content)
    6 print(type(r.text), type(r.content))
    7 # 保存为文件格式
    8 with open('baidu.png','wb') as f:
    9     f.write(r.content)

    4. 添加headers请求头的User-agent进行页面访问

    1 import requests
    2 
    3 r = requests.get('https://www.zhihu.com/')
    4 print(r.text)
    5 headers = {
    6     'User-Agent' : 'Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/52.0.2743.116 Safari/537.36'
    7 }
    8 r2 = requests.get('https://www.zhihu.com/', headers=headers)
    9 print(r2.text)

    5. post请求,data字典内容,提交数据到表单

     1 import requests
     2 
     3 data = {
     4     'name':'dmr',
     5     'age':25,
     6 }
     7 r = requests.post('http://httpbin.org/post', data=data)
     8 print(r.text)
     9 
    10 
    11 print(dir(r))
    12 print(dir(requests.codes))
    13 print(requests.codes.ok)

    6. 扩展用法

     1 # 文件上传
     2 import requests
     3 
     4 files = {'file':open('baidu.png', 'rb')}
     5 r = requests.post('http://httpbin.org/post', files=files)
     6 print(r.text)
     7 
     8 # Cookies
     9 import requests
    10 
    11 r = requests.get('https://www.baidu.com/')
    12 print(r.cookies, r.cookies.items())
    13 for key, value in r.cookies.items():
    14     print(key + '=' + value)
    15 
    16 
    17 # 设置headers的Cookie值来保持登录状态
    18 import requests
    19 
    20 headers = {
    21     'Cookie':'JSNID=2D7B55dfsdfsdfECE6E47C6AD; signature=MjEyMzJmMjk3YdsfsdfNzQzODk0YTBlNGE4MDFmYzMxNTgzOTE4MTk4NjMx3cf711642ab38b713e3e9a54f19fad0e; timeStamp=da23b847-43c0-46ab-ba78-9e8ca41a11ea; redisKey=5f948ef7-ae9a-4d67-a7f8-4211baafe92f',
    22     'User-agent':'Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/52.0.2743.116 Safari/537.36'
    23 }
    24 r = requests.get('http://ceshi/public/html/index.html', headers=headers)
    25 print(r.text)
    26 
    27 
    28 # 通过cookies参数key和value来保持登录状态
    29 import requests
    30 
    31 cookies = 'JSEID=2D7B5505568DCB699685BECE6E47C6AD; signare=MjEyMzJmMjk3YTDk0YTBlNGE4MDFmYzMxNTgzOTE4MTk4NjMx3cf711642ab38b713e3e9a54f19fad0e; timeStamp=da23b847-43c0-46ab-ba78-9e8ca41a11ea; redisKey=5f948ef7-ae9a-4d67-a7f8-4211baafe92f'
    32 headers = {
    33     'User-agent':'Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/52.0.2743.116 Safari/537.36'
    34 }
    35 jar = requests.cookies.RequestsCookieJar()
    36 for cookie in cookies.split(';'):
    37     key, value = cookie.strip().split('=')
    38     print(key, value)
    39     jar.set(key, value)
    40 r = requests.get('http://ceshi/public/html/index.html',headers=headers,cookies=jar)
    41 print(r.text)
    42 
    43 
    44 ## 保持会话,Session
    45 import requests
    46 
    47 # 给测试页面设置值
    48 r = requests.get('http://httpbin.org/cookies/set/dmr/25')
    49 # 独立进行第二次访问页面时,返回的页面内容为空
    50 r2 = requests.get('http://httpbin.org/cookies')
    51 print(r.text, r2.text)
    52 
    53 # 通过session保持会话进行二次访问
    54 s = requests.Session()
    55 # 给测试页面设置值
    56 r = s.get('http://httpbin.org/cookies/set/dmr/25')
    57 # 通过session保持会话进行二次访问时,设置的值还存在
    58 r2 = s.get('http://httpbin.org/cookies')
    59 print(r.text, r2.text)
    60 
    61 
    62 ## SSL证书验证,verify是否检查证书验证,默认为true
    63 import requests
    64 
    65 r = requests.get('https://www.12306.cn')
    66 r2 = requests.get('https://www.12306.cn', verify=False)
    67 r3 = requests.get('https://www.12306.cn', cert=('path/cert.crt', '/path/key'))
    68 
    69 print(r.text)
    70 
    71 
    72 ## proxy,代理设置
    73 import requests
    74 
    75 # 无效的代理
    76 proxies = {
    77     'http':'http://10.0.0.100:3128',
    78     'https':'http://10.0.0.100:3168',
    79 }
    80 requests.get('https://www.taobao.com', proxies=proxies)
    81 
    82 
    83 # 超时设置
    84  import requests
    85 
    86  r = requests.get('http://www.python.org', timeout=0.5)
    87  print(r.status_code)
    88 
    89 
    90  ## 弹窗身份认证
    91  import requests
    92  from requests.auth import HTTPBasicAuth
    93 
    94  auth = HTTPBasicAuth('username', 'password')
    95  r = requests.get('http://localhost:5000', auth=auth)
    96  # 可简写,requests默认使用HTTPBasicAuth来进行认证
    97  r2 = requests.get('http://localhost:5000', auth=('username', 'password'))
    98  print(r.status_code, r2.status_code)
    View Code

    7. requests将参数内容封装成数据对象来请求访问页面

     1 from requests import Request, Session
     2 
     3 url = 'https://www.baidu.com'
     4 data = {
     5     'name': 'dmr',
     6     'age': 25
     7 }
     8 headers = {
     9     'User-Agent': 'Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/52.0.2743.116 Safari/537.36'
    10 }
    11 # 构建Request请求对象
    12 req = Request(method='POST', url=url, data=data, headers=headers)
    13 # 生成会话
    14 s = Session()
    15 # 构建prepared数据结构请求对象
    16 prepared = s.prepare_request(req)
    17 # 发送请求
    18 r = s.send(prepared)
    19 print(r.text, r.status_code)
  • 相关阅读:
    SOA精华的内容和实用的知识
    众多SEO专家集体盛赞
    黑客大曝光:VoIP安全机密与解决方案
    博文视点大讲堂41期SEO难点之网站内部链接结构
    TransactSQL管理与开发实例精粹
    千万不要错过云计算兴起的时代
    《海量数据库解决方案》之位图索引的结构和特征
    Oracle开发艺术
    Android应用程序的开发
    BizTalk Accelerator for HL7医疗行业消息路由处理机制
  • 原文地址:https://www.cnblogs.com/Caiyundo/p/12464696.html
Copyright © 2011-2022 走看看