zoukankan      html  css  js  c++  java
  • github自动登录

    最终实现代码

     1 # coding=utf-8
     2 # Version:python3.6.0
     3 # Tools:Pycharm 2017.3.2
     4 # author ="wlx"
     5 __date__ = '2018/6/14 10:37'
     6 import requests
     7 from bs4 import BeautifulSoup
     8 
     9 ret = requests.get(url="https://github.com/login")
    10 ret_cookie_dir = ret.cookies.get_dict()
    11 s1 = BeautifulSoup(ret.text, 'html.parser')
    12 token = s1.find(name='input', attrs={'name': 'authenticity_token'}).get('value')
    13 
    14 r = requests.post(
    15     url='https://github.com/session',
    16     data={
    17         'commit': 'Sign in',
    18         'utf8': '',
    19         'authenticity_token': token,
    20         'login': '792665319@qq.com',
    21         'password': '_97e68fde946b'
    22     },
    23     headers={
    24         'User-Agent':'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/63.0.3239.108 Safari/537.36'
    25     },
    26     cookies=ret_cookie_dir
    27 )
    28 s2 = BeautifulSoup(r.text, 'html.parser')
    29 name = s2.find(name = 'strong', attrs={'class': 'css-truncate-target'}).string
    30 print('name:', name)

    所学知识

    1. 简易爬虫request和beautifulsoup爬取汽车之家

    # coding=utf-8
    # Version:python3.6.0
    # Tools:Pycharm 2017.3.2
    # author ="wlx"
    __date__ = '2018/6/12 21:10'
    import requests
    from bs4 import BeautifulSoup
    
    ret = requests.get(url="https://www.autohome.com.cn/news/")
    # print(ret.content)  #二进制输出内容
    # print(ret.apparent_encoding)  # 检测其文档用的是什么编码
    # ret.encoding = "gbk"
    ret.encoding = ret.apparent_encoding
    # print(ret.text)
    
    soup = BeautifulSoup(ret.text, 'html.parser')  # lxml
    # print(type(soup))  # 把文本变成了对象<class 'bs4.BeautifulSoup'>
    div = soup.find(name='div', id='auto-channel-lazyload-article')  # find()找匹配成功的第一个,只有对象才有find()函数
    # id属性用id,类属性不能用class,因为class是python内置关键字用后面 class_='name'or attrs={'id':'id1','class':'wei'}
    # print(div)
    li_list = div.find_all(name='li')  # find_all()函数匹配所有满足条件的对象,并返回这些对象构成的列表,返回列表后就不能往下找了,只有对象能find往下找
    # print(li_list)
    for i in li_list:
        h3 = i.find(name='h3')
        if not h3:
            continue
        print(h3.text)
        p = i.find(name='p')
        print(p.text)
        a = i.find('a')  # name='a' 不写name默认为第一个参数name
        # print(a.attrs)  # 取a标记所有属性
        # for key in a.attrs:
        #     print(a.attrs[key])
        print(a.get('href'))  # 取标签指定属性
        img = i.find(name='img')
        # print(img.get('src'))      # 这样做只得到图片的地址,要再次发请求
        src = img.get('src')
        file_name = src.rsplit('__', maxsplit=1)[1]
    
        ret_img = requests.get(url="https:"+src)
        with open(file_name, 'wb') as f:
            f.write(ret_img.content)
        # print(ret_img.content)

       2. 抽屉登录

    # coding=utf-8
    # Version:python3.6.0
    # Tools:Pycharm 2017.3.2
    # author ="wlx"
    __date__ = '2018/6/13 10:59'
    import requests
    # 网页浏览器工作流程,第一步先访问主页,然后主页返回一个未授权的cookie,然后发送post请求携带着用户名密码和为授权的cookie登陆,
    # 登陆后,网页对未授权的cookie进行授权,第一次给的未授权的cookie则可用了
    # 1向网址发送get请求,有防爬虫防火墙,所以要带上头部headers表明自己是走浏览器发的请求
    ret = requests.get(
        url="https://dig.chouti.com/",
        headers={
            'User-Agent':'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/63.0.3239.108 Safari/537.36'
        }
    )
    ret_cookie_dict = ret.cookies.get_dict()
    
    
    # 2向网址发送post请求,并获取cookie值
    response = requests.post(
        url='https://dig.chouti.com/login',
        data={
            'phone': '8618846453138',
            'password': 'we18846453138',
            'oneMonth': '1'
        },
        headers={
            'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/63.0.3239.108 Safari/537.36'
        },
        cookies=ret_cookie_dict  # 携带初始访问网页的cookies登陆,使之被授权
    )
    # cookie_dict = response.cookies.get_dict()  # 获取cookie 第二次访问得到的cookie在此处无用
    
    r1 = requests.post(
        url='https://dig.chouti.com/link/vote?linksId=20217671',
        headers={
            'User-Agent':'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/63.0.3239.108 Safari/537.36'
        },
        cookies=ret_cookie_dict  # 注意是cookies不是cookie,使用的是授权后的cookies
    )
    print(r1.text)

      3. requests模块

    # params:URL中传入参数
    import requests
    '''
    请求头:http://www/oldboyedu.com    headers={} ...
    请求体:用data传请求体内部会转换为name=alex&age=18 用json传请求体的话传字符串'{"name":"alex","age":18}'
    标志: 一般Form Data需发送data类型请求体,payload需发送json类型请求体
    '''
    requests.request(method='get', url='http://127.0.0.1:8000/test/')
    requests.request(method='post', url='http://127.0.0.1:8000/test/')
    requests.get(url='x')  # 等价于requests.request(method='get', url='x')
    requests.post(url='x')  # 等价于requests.request(method='post', url='x')
    requests.get(url='http://www/oldboyedu.com', params={"nid": 1, 'name': 'x'}, headers={}, cookie={})  # data和json都能传请求体,具体区别见上
    # json={"name":"alex","age":18}   data=json.dumps({"name":"alex","age":18})//json其实就是帮忙做一个json.dump的操作
    requests.post(url='http://www/oldboyedu.com', params={"nid": 1, 'name': 'x'}, data={"name":"alex","age":18}, headers={}, cookie={})
    # param向URL中传参上面的URL相当于https://www.oldboyedu.com?nid=1&name=x
    '''
    模块
        requests
            method:
            url:
            params
            data:
            json:
            headers:
            cookies:
            proxies:封ip,用代理,代理别写死,买十分代理每次发请求随机选择一个代理发送有的代理还要authorize认证auth = HTTPProxyAuth('username', 'mypassword')
                r = requests.get("http://www.google.com", proxies=proxyDict, auth=auth)
            
            file:上传文件
            auth:基本认证
            timeout:相应超时等,设定超时时间
            allow_redricts:True
            stream: 下载大文件时分开下载
                ret = requests.get('http://127.0.0.2:8000/test/', stram = True)
                for i in r.iter_content():
                    print(i)
                from contextlib import closing
                with closing(requests.get('http://httpbin.org/get'), stream=True) as r:
                    # 在此处理相应
                    for i in r.iter_content():
                        print(i)
            cert:证书
            verify:确认
        参考:https://www.cnblogs.com/wupeiqi/articles/6283017.html            
    '''
    '''
    def request(method, url, **kwargs):
        """Constructs and sends a :class:`Request <Request>`.
    
        :param method: method for the new :class:`Request` object.
        :param url: URL for the new :class:`Request` object.
        :param params: (optional) Dictionary or bytes to be sent in the query string for the :class:`Request`.
        :param data: (optional) Dictionary, bytes, or file-like object to send in the body of the :class:`Request`.
        :param json: (optional) json data to send in the body of the :class:`Request`.
        :param headers: (optional) Dictionary of HTTP Headers to send with the :class:`Request`.
        :param cookies: (optional) Dict or CookieJar object to send with the :class:`Request`.
        :param files: (optional) Dictionary of ``'name': file-like-objects`` (or ``{'name': file-tuple}``) for multipart encoding upload.
            ``file-tuple`` can be a 2-tuple ``('filename', fileobj)``, 3-tuple ``('filename', fileobj, 'content_type')``
            or a 4-tuple ``('filename', fileobj, 'content_type', custom_headers)``, where ``'content-type'`` is a string
            defining the content type of the given file and ``custom_headers`` a dict-like object containing additional headers
            to add for the file.
        :param auth: (optional) Auth tuple to enable Basic/Digest/Custom HTTP Auth.
        :param timeout: (optional) How long to wait for the server to send data
            before giving up, as a float, or a :ref:`(connect timeout, read
            timeout) <timeouts>` tuple.
        :type timeout: float or tuple
        :param allow_redirects: (optional) Boolean. Set to True if POST/PUT/DELETE redirect following is allowed.
        :type allow_redirects: bool
        :param proxies: (optional) Dictionary mapping protocol to the URL of the proxy.
        :param verify: (optional) whether the SSL cert will be verified. A CA_BUNDLE path can also be provided. Defaults to ``True``.
        :param stream: (optional) if ``False``, the response content will be immediately downloaded.
        :param cert: (optional) if String, path to ssl client cert file (.pem). If Tuple, ('cert', 'key') pair.
        :return: :class:`Response <Response>` object
        :rtype: requests.Response
    
        Usage::
    
          >>> import requests
          >>> req = requests.request('GET', 'http://httpbin.org/get')
          <Response [200]>
      参考:https://www.cnblogs.com/wupeiqi/articles/6283017.html     
    '''
  • 相关阅读:
    jQuery 中 attr() 和 prop() 方法的区别
    Jquery DOM元素的方法
    超链接的#和javascript:void(0)的区别
    CSS定位之position详解(转载)
    jQuery最佳实践(转载)
    jQuery官方基础教程笔记(转载)
    股票---基金基础知识
    eclipse里面构建maven项目详解(转载)
    sax解析操作XML
    DOM4j操作xml文件
  • 原文地址:https://www.cnblogs.com/wlx97e6/p/9270790.html
Copyright © 2011-2022 走看看