zoukankan      html  css  js  c++  java
  • 从0开始学爬虫10之urllib和requests库与github/api的交互

    urllib库的使用

    # coding=utf-8
    import urllib2
    import urllib
    
    # htpbin模拟的环境
    URL_IP="http://10.11.0.215:8080"
    URL_GET = "http://10.11.0.215:8080/get"
    
    
    def use_simple_urllib2():
        response = urllib2.urlopen(URL_IP)
        print '>>>> Response Headers:'
        print response.info()
        print '>>>>Response Body:'
        print ''.join([line for line in response.readlines()])
    
    
    def use_params_urllib2():
        # 构建请求参数
        params = urllib.urlencode({'param1': 'hello','param2': 'world'})
        print 'Request Params:'
        print params
        # 发送请求
        response = urllib2.urlopen('?'.join([URL_GET, '%s']) % params)
        # 处理响应
        print '>>>Response Headers:'
        print response.info()
        print '>>>Status code'
        print response.getcode()
        print '>>>Response Body'
        print ''.join([line for line in response.readlines()])
        # print response.readlines()
    
    
    if __name__ == '__main__':
        # print '>>>Use simple urllib2'
        # use_simple_urllib2()
        print '>>>Use params urllib2'
        use_params_urllib2()

    requests库的简单使用

    # coding=utf-8
    
    import requests
    
    URL_IP="http://10.11.0.215:8080/ip"
    URL_GET="http://10.11.0.215:8080/get"
    
    
    def use_simple_requests():
        response = requests.get(URL_IP)
        print ">>>Response Headers:"
        print response.headers
        print ">>>Response Code:"
        print response.status_code
        print "Response Body:"
        print response.text
    
    
    def use_params_requests():
        response = requests.get(URL_GET)
        print ">>>Response Headers:"
        print response.headers
        print ">>>Response Code:"
        print response.status_code
        print response.reason
        print "Response Body:"
        print response.json()
    
    
    if __name__ == "__main__":
        # print "simple requests:"
        # use_simple_requests()
        print "params requests:"
        use_params_requests()

    requests和github api的互动

    # coding=utf-8
    import json
    import requests
    from requests import exceptions
    
    
    URL = "https://api.github.com"
    
    
    def build_uri(endpoint):
        # 拼凑url为最终的api路径
        return '/'.join([URL, endpoint])
    
    
    def better_print(json_str):
        # 格式化输出, indent=4是缩进为4个空格
        return json.dumps(json.loads(json_str), indent = 4)
    
    
    def request_method():
        # 获取用户信息
        # response = requests.get(build_uri('users/reblue520'))
        # response = requests.get(build_uri('user/emails'), auth=('reblue520', 'reblue520'))
        response = requests.get(build_uri('user/public_emails'), auth=('reblue520', 'reblue520'))
        print(better_print(response.text))
    
    
    def params_request():
        response = requests.get(build_uri('users'), params={'since':11})
        print better_print(response.text)
        print response.request.headers
        print response.url
    
    
    def json_request():
        # 更新用户信息,邮箱必须是已经验证过的邮箱
        # response = requests.patch(build_uri('user'), auth=('reblue520','reblue520'),json={'name':'hellojack2019','email':'reblue520@163.com'})
        response = requests.post(build_uri('user/emails'), auth=('reblue520','Reblue0225520'),json=['hellojack2019@163.com'])
        print better_print(response.text)
        print response.request.headers
        print response.request.body
        print response.status_code
    
    
    def timeout_request():
        # api异常处理:超时
        try:
            response = requests.get(build_uri('user/emails'), timeout=10)
            response.raise_for_status()
        except exceptions.Timeout as e:
            print e.message
        except exceptions.HTTPError as e:
            print e.message
        else:
            print response.status_code
            print response.text
    
    
    def hard_requests():
        # 自定义request
        from requests import Request, Session
        s = Session()
        headers = {'User-Agent': 'fake1.3.4'}
        req = Request('GET', build_uri('user/emails'), auth=('reblue520', 'Reblue0225520'), headers=headers)
        prepped = req.prepare()
        print prepped.body
        print prepped.headers
    
        resp = s.send(prepped, timeout = 5)
        print resp.status_code
        print resp.request.headers
        print resp.text
    
    
    if __name__ == '__main__':
        # request_method()
        # params_request()
        # json_request()
        # timeout_request()
        hard_requests()

     response响应的常用api

    响应的基本API
    In [1]: import requests                                                                                                                                                                                              
    
    In [2]: response = requests.get("https://api.github.com")                                                                                                                                                            
    
    In [3]: response.status_code                                                                                                                                                                                         
    Out[3]: 200
    
    In [4]: response.reason                                                                                                                                                                                              
    Out[4]: 'OK'
    
    In [5]: response.headers                                                                                                                                                                                             
    Out[5]: {'Date': 'Sat, 20 Jul 2019 03:48:51 GMT', 'Content-Type': 'application/json; charset=utf-8', 'Transfer-Encoding': 'chunked', 'Server': 'GitHub.com', 'Status': '200 OK', 'X-RateLimit-Limit': '60', 'X-RateLimit-Remaining': '47', 'X-RateLimit-Reset': '1563598131', 'Cache-Control': 'public, max-age=60, s-maxage=60', 'Vary': 'Accept, Accept-Encoding', 'ETag': 'W/"7dc470913f1fe9bb6c7355b50a0737bc"', 'X-GitHub-Media-Type': 'github.v3; format=json', 'Access-Control-Expose-Headers': 'ETag, Link, Location, Retry-After, X-GitHub-OTP, X-RateLimit-Limit, X-RateLimit-Remaining, X-RateLimit-Reset, X-OAuth-Scopes, X-Accepted-OAuth-Scopes, X-Poll-Interval, X-GitHub-Media-Type', 'Access-Control-Allow-Origin': '*', 'Strict-Transport-Security': 'max-age=31536000; includeSubdomains; preload', 'X-Frame-Options': 'deny', 'X-Content-Type-Options': 'nosniff', 'X-XSS-Protection': '1; mode=block', 'Referrer-Policy': 'origin-when-cross-origin, strict-origin-when-cross-origin', 'Content-Security-Policy': "default-src 'none'", 'Content-Encoding': 'gzip', 'X-GitHub-Request-Id': '33D9:591B:9D084B:CF860E:5D328F23'}
    
    In [6]: response.url                                                                                                                                                                                                 
    Out[6]: 'https://api.github.com/'
    
    In [7]: response.history                                                                                                                                                                                             
    Out[7]: []
    
    In [8]: response = requests.get("http://api.github.com")                                                                                                                                                             
    
    In [9]: response.history                                                                                                                                                                                             
    Out[9]: [<Response [301]>]
    
    In [10]: response = requests.get("https://api.github.com")                                                                                                                                                           
    
    In [11]: response.elapsed                                                                                                                                                                                            
    Out[11]: datetime.timedelta(microseconds=459174)
    
    In [12]: response.request                                                                                                                                                                                            
    Out[12]: <PreparedRequest [GET]>
    
    In [13]: response.request.headers                                                                                                                                                                                    
    Out[13]: {'User-Agent': 'python-requests/2.22.0', 'Accept-Encoding': 'gzip, deflate', 'Accept': '*/*', 'Connection': 'keep-alive'}
    
    In [14]: response.encoding                                                                                                                                                                                           
    Out[14]: 'utf-8'
    
    In [15]: response.raw.read(10)                                                                                                                                                                                       
    Out[15]: b''
    
    In [16]: response.content                                                                                                                                                                                            
    Out[16]: b'{"current_user_url":"https://api.github.com/user","current_user_authorizations_html_url":"https://github.com/settings/connections/applications{/client_id}","authorizations_url":"https://api.github.com/authorizations","code_search_url":"https://api.github.com/search/code?q={query}{&page,per_page,sort,order}","commit_search_url":"https://api.github.com/search/commits?q={query}{&page,per_page,sort,order}","emails_url":"https://api.github.com/user/emails","emojis_url":"https://api.github.com/emojis","events_url":"https://api.github.com/events","feeds_url":"https://api.github.com/feeds","followers_url":"https://api.github.com/user/followers","following_url":"https://api.github.com/user/following{/target}","gists_url":"https://api.github.com/gists{/gist_id}","hub_url":"https://api.github.com/hub","issue_search_url":"https://api.github.com/search/issues?q={query}{&page,per_page,sort,order}","issues_url":"https://api.github.com/issues","keys_url":"https://api.github.com/user/keys","notifications_url":"https://api.github.com/notifications","organization_repositories_url":"https://api.github.com/orgs/{org}/repos{?type,page,per_page,sort}","organization_url":"https://api.github.com/orgs/{org}","public_gists_url":"https://api.github.com/gists/public","rate_limit_url":"https://api.github.com/rate_limit","repository_url":"https://api.github.com/repos/{owner}/{repo}","repository_search_url":"https://api.github.com/search/repositories?q={query}{&page,per_page,sort,order}","current_user_repositories_url":"https://api.github.com/user/repos{?type,page,per_page,sort}","starred_url":"https://api.github.com/user/starred{/owner}{/repo}","starred_gists_url":"https://api.github.com/gists/starred","team_url":"https://api.github.com/teams","user_url":"https://api.github.com/users/{user}","user_organizations_url":"https://api.github.com/user/orgs","user_repositories_url":"https://api.github.com/users/{user}/repos{?type,page,per_page,sort}","user_search_url":"https://api.github.com/search/users?q={query}{&page,per_page,sort,order}"}'
    
    In [17]: response.json()                                                                                                                                                                                             
    Out[17]: 
    {'current_user_url': 'https://api.github.com/user',
     'current_user_authorizations_html_url': 'https://github.com/settings/connections/applications{/client_id}',
     'authorizations_url': 'https://api.github.com/authorizations',
     'code_search_url': 'https://api.github.com/search/code?q={query}{&page,per_page,sort,order}',
     'commit_search_url': 'https://api.github.com/search/commits?q={query}{&page,per_page,sort,order}',
     'emails_url': 'https://api.github.com/user/emails',
     'emojis_url': 'https://api.github.com/emojis',
     'events_url': 'https://api.github.com/events',
     'feeds_url': 'https://api.github.com/feeds',
     'followers_url': 'https://api.github.com/user/followers',
     'following_url': 'https://api.github.com/user/following{/target}',
     'gists_url': 'https://api.github.com/gists{/gist_id}',
     'hub_url': 'https://api.github.com/hub',
     'issue_search_url': 'https://api.github.com/search/issues?q={query}{&page,per_page,sort,order}',
     'issues_url': 'https://api.github.com/issues',
     'keys_url': 'https://api.github.com/user/keys',
     'notifications_url': 'https://api.github.com/notifications',
     'organization_repositories_url': 'https://api.github.com/orgs/{org}/repos{?type,page,per_page,sort}',
     'organization_url': 'https://api.github.com/orgs/{org}',
     'public_gists_url': 'https://api.github.com/gists/public',
     'rate_limit_url': 'https://api.github.com/rate_limit',
     'repository_url': 'https://api.github.com/repos/{owner}/{repo}',
     'repository_search_url': 'https://api.github.com/search/repositories?q={query}{&page,per_page,sort,order}',
     'current_user_repositories_url': 'https://api.github.com/user/repos{?type,page,per_page,sort}',
     'starred_url': 'https://api.github.com/user/starred{/owner}{/repo}',
     'starred_gists_url': 'https://api.github.com/gists/starred',
     'team_url': 'https://api.github.com/teams',
     'user_url': 'https://api.github.com/users/{user}',
     'user_organizations_url': 'https://api.github.com/user/orgs',
     'user_repositories_url': 'https://api.github.com/users/{user}/repos{?type,page,per_page,sort}',
     'user_search_url': 'https://api.github.com/search/users?q={query}{&page,per_page,sort,order}'}
  • 相关阅读:
    sql语句添加查询字段
    SqlServer Case when then用法总结
    单例与多线程
    HttpSession详解
    范式
    SQL语句中的Having子句与where子句
    HTTP无状态
    字节流与字符流的区别
    选择排序
    ReentrantLock VS synchronized
  • 原文地址:https://www.cnblogs.com/reblue520/p/11230814.html
Copyright © 2011-2022 走看看