zoukankan      html  css  js  c++  java
  • python爬虫知识点总结(四)Requests库的基本使用

    官方文档:http://docs.python-requests.org/en/master

    安装方法

      命令行下输入:pip3 install requests。详见:https://www.cnblogs.com/cthon/p/9388304.html

    一、什么是Requets?

    requets

    实例引入

    import requests
    
    response = requests.get('https://www.baidu.com')
    print(type(response))
    print(response.status_code)
    print(type(response.text))
    print(response.text)
    print(response.cookies)
    

      

    各种请求方式

    import requests
    requests.post('http://httpbin.org/post')
    requests.put('http://httpbin.org/put')
    requests.delete('http://httpbin.org/delete')
    requests.get('http://httpbin.org/get')
    requests.options('http://httpbin.org/get')
    

      

    请求

    基本GET请求

    基本写法

    import requests
    
    response = requests.get('http://httpbin.org/get')
    print(response.text)
    

      

    带参数GET请求

    import requests
    response = requests.get('http://httpbin.org/get?name=jack&age=22')
    print(response.text)
    

      

    import requests
    
    data = {
        'name':'jack',
        'age':22
    }
    response = requests.get('http://httpbin.org/get',params=data)
    print(response.text)
    

      

    解析json

    import requests
    import json
    
    response = requests.get('https://github.com/get')
    print(type(response.text))
    print(response.json())
    print(json.loads(response.text))
    print(type(response.json()))
    

      

    获取二进制数据

    import requests
    
    response = requests.get('https://github.com/favicon.ico')
    print(type(response.text),type(response.content))
    print(response.text)
    print(response.content)
    

      

    import requests
    
    response = requests.get('https://www.bilibili.com/video/av24028845/?p=9')
    with open('q.avi','wb') as f:
        f.write(response.content)
        f.close()
    

      

    添加headers

    import requests
    
    response = requests.get('https://zhihu.com/explore')
    print(response.text)
    

      

    import requests
    
    headers = {'User-Agent':'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/68.0.3440.75 Safari/537.36'}
    response = requests.get('https://www.zhihu.com/explore',headers=headers)
    print(response.text)
    

      

    基本POST请求

    import requests
    
    data = {'name':'jack','age':'22'}
    response = requests.post('https://httpbin.org/post',data=data)
    print(response.text)
    print(response.json())
    

      

    import requests
    
    data = {'name':'jack','age':'22'}
    headers = {'User-Agent':'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/68.0.3440.75 Safari/537.36'}
    response = requests.post('https://httpbin.org/post',data=data,headers=headers)
    print(response.text)
    print(response.json())
    

      

    响应

    response属性

    import requests
    
    response = requests.get('http://www.jianshu.com')
    print(type(response.status_code),response.status_code)
    print(type(response.headers),response.headers)
    print(type(response.cookies),response.cookies)
    print(type(response.url),response.url)
    print(type(response.history),response.history)
    

      

    状态码判断

    import requests
    
    response = requests.get('http://www.cnblogs.com/cthon/p/9383778.html')
    exit() if not response.status_code == requests.codes.not_found else print('404 Not Found')
    

      

    import requests
    
    response = requests.get('http://www.cnblogs.com/cthon/p/9383778.html')
    exit() if not response.status_code == 200 else print('Request Successfully')
    

      

    状态码

    100:('continue',),
    101:('switching_protocols',),
    102:('processing',),
    103:('checkpoint',),
    122:('url_too_long','request_url_too_long'),
    200:('ok','okay','all_ok','all_okay','all_good','\o/','√',),
    201:('created',),
    202:('accepted',),
    203:('non_authoritative_info','non_authoritative_information'),
    204:('no_content',),
    205:('reset_content','reset',),
    206:('partial_content','partial'),
    207:('multi_status','multiple_status','multi_stati','multiple_status'),
    208:('already_reported',),
    226:('im_used',),
        
    #Redirection
    300:('multiple_choices',),
    301:('moved_permanently','moved','\o-'),
    302:('found',),
    303:('see_other','other'),
    304:('not_modified',),
    305:('use_proxy',),
    306:('switch_proxy',),
    307:('temporary_redirect','temporary_moved','temporary'),
    308:('permanent_redirect','temporary_moved','temporary',),#There 2 to be removed in 3.0
        
    #Client Error
    400:('bad_request','bad'),
    401:('unauthorized',),
    402:('payment_required','payment'),
    403:('forbidden',),
    404:('not_found','-o-'),
    405:('method_not_allowed','not_allowed'),
    406:('not_acceptable',),
    407:('proxy_authentication_required','proxy_auth','proxy_authentication'),
    408:('request_timeout','timeout'),
    409:('confict',),
    410('gone',),
    411:('length_required',),
    412:('precondition_failed','precondition'),
    413:('request_entity_too_large',),
    414:('request_url_too_large',),
    415:('unsupported_media_type','unsupported_media','media_type'),
    416:('requested_range_not_satisfiable','requestd_range','range_not_satisfiable'),
    417:('expectation_request',),
    418:('im_a_teapot','teapot','i_am_a_teapot'),
    421:('misdirected_request',),
    422:('unprocessable_entity','unprocessable'),
    423:('locked',),
    424:('failed_dependency','dependency'),
    425:('unordered_collection','unordered'),
    426:('upgrade_required','upgrade'),
    428:('precondition_required','precondition'),
    429:('too_many_requests','too_many'),
    431:('header_fields_too_large','fields_too_large'),
    444:('no_response','none'),
    449:('retry_with','retry'),
    450:('blocked_by_windows_parental_controls','parental_controls'),
    451:('unavailable_for_legal_reasons','legal_reasons'),
    499:('client_closed_request',),
    
    #Server Error
    500:('internal_server_error','server_error','/o\','×'),
    501:('not_implemented',),
    502:('bad_gateway',),
    503:('service_unavailable','unavailable'),
    504:('gateway_timeout',),
    505:('http_version_not_supported','http_version'),
    506:('variant_also_negotiaes',),
    507:('insufficient_storage',),
    509:('bandwidth_limit_exceeded','bandwidth'),
    510:('not_extended',),
    511:('network_aurhentication_required','network_auth','network_authentication'),
    

      

    高级文件操作

    import requests
    
    files= {'file':open('favicon.ico','rb')}
    response = requests.post('http://httpbin.org/post',files=files)
    print(response.text)
    

      

    获取Cookie

    import requests
    
    response = requests.get('http://www.baidu.com')
    print(response.cookies)
    for key,value in response.cookies.items():
        print(key+'='+value)
    

      

    会话维持

    import requests
    
    requests.get('http://httpbin.org/cookies/set/number/123456789')
    response=requests.get('http://httpbin.org/cookies')
    print(response.text)
    

      

    import requests
    
    s = requests.Session()
    s.get('http://httpbin.org/cookies/set/number/123456789')
    response=s.get('http://httpbin.org/cookies')
    print(response.text)
    

      

    证书验证

    #12306错误证书,请求失败
    import requests
    
    response = requests.get('https://www.12306.cn/')
    print(response.status_code)
    

      

    import requests
    from requests.packages import urllib3
    urllib3.disable_warnings()
    response = requests.get('https://www.12306.cn',verify = False)
    print(response.status_code)
    

      

    import requests
    
    reeponse = requests.get('https://www.12306.cn',cer=('/path/server.crt','/path/key'))
    print(response.status_code)
    

      

    代理设置

      http代理

    import requests
    
    proxies = {
        'http':'http://127.0.0.1:9743',
        'https':'https://127.0.0.1:9743'
    }
    response = requests.get('https://www.taobao.com',proxies=proxies)
    print(response.status_code)
    

      

    import requests
    
    proxies = {
        'http':'http:/user:password@/127.0.0.1:9743'
    }
    response = requests.get('https://www.taobao.com',proxies=proxies)
    print(response.status_code)
    

      

      socket代理

    pip3 install 'requests[socks]'
     
    import requests
    
    proxies = {
        'http':'socks5://127.0..0.1.9742',
        'https':'socks5://127.0.0.1:9742'
    }
    response = requests.get('https://www.taobao.com',proxies=proxies)
    print(response.status_code)
    

     

    超时设置

    import requests
    from requests.exceptions import ReadTimeout
    try:
        response = requests.get('http://www.baidu.com',timeout = 0.01)
        print(response.status_code)
    except ReadTimeout:
        print('Timeout')
    

      

    认证设置

    import requests
    from requests.auth import HTTPBasicAuth 
    
    r = requests.get('http://120.27.34.24:9001',auth=HTTPBasicAuth('user','123'))
    print(r.status_code)
    

      

    import requests
    
    r = requests.get('http://120.27.34.24:9001',auth=('user','123'))
    print(r.status_code)
    

      

    异常处理

    import requests
    from requests.exceptions import ReadTimeout,HTTPError,RequestException
    
    try:
        response = requests.get('http://www.baidu.com',timeout=0.1)
        print(response.status_code)
    except ReadTimeout:
        print('Timeout')
    except HTTPError:
        print('Http error')
    except ConnectionError:
        print('Connection Error')   
    except RequestException:
        print('Error')
    

      

  • 相关阅读:
    Python 集合 深浅copy
    python基础(基础数据类型)
    python基础一
    Asp.net获取网站绝对路径的几种方法
    Ajax请求被缓存的几种处理方式
    说说字符编码
    linux学习记录
    mysql基础
    【Android开发入门】关于ListView中按钮监听器设置的解决方案
    线程同步小结
  • 原文地址:https://www.cnblogs.com/cthon/p/9398026.html
Copyright © 2011-2022 走看看