zoukankan      html  css  js  c++  java
  • Python爬虫-爬取开心网主页(有登录界面-利用cookie)

    爬取开心网主页内容

    ==========================================

    =======================================

    ===================================

     1 '''
     2 登录开心网
     3 利用cookie
     4 免除ssl
     5 '''
     6 from urllib import request, parse
     7 import ssl
     8 '''sd
     9 步骤:
    10 1, 寻找登录入口, 通过搜查相应文字可以快速定位
    11   login_url = "https://security.kaixin001.com/login/login_post.php"
    12   相应的用户名和密码对应名称为email, password
    13 2. 构造opener
    14 3. 构造login函数
    15 '''
    16 
    17 import ssl
    18 # 忽略安全问题
    19 ssl._create_default_https_context = ssl._create_unverified_context
    20 
    21 from http import cookiejar
    22 
    23 cookie = cookiejar.CookieJar()
    24 cookie_handler = request.HTTPCookieProcessor(cookie)
    25 http_handler = request.HTTPHandler()
    26 https_handler = request.HTTPSHandler()
    27 
    28 opener = request.build_opener(http_handler, https_handler, cookie_handler)
    29 
    30 
    31 
    32 def login():
    33 
    34     login_url = "https://security.kaixin001.com/login/login_post.php"
    35 
    36     data = {
    37         "email":"13119144223",
    38         "password": "123456"
    39     }
    40 
    41 
    42     # 对post的data内容进行编码
    43     data = parse.urlencode(data)
    44 
    45     # http协议的请求头
    46     headers = {
    47         "Content-Length": len(data),
    48         "User-Agent": "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/64.0.3282.119 Safari/537.36"
    49     }
    50 
    51     # 构造请求Request对象
    52     # data要求是一个bytes对象,所以需要进行编码
    53     req = request.Request(login_url, data=data.encode(), headers=headers)
    54 
    55     rsp = opener.open(req)
    56 
    57     html = rsp.read()
    58     html = html.decode()
    59 
    60 def getHomePage():
    61     base_url = "http://www.kaixin001.com/home/?_profileuid=181697221"
    62 
    63     rsp = opener.open(base_url)
    64     html = rsp.read()
    65     html = html.decode()
    66 
    67     print(html)
    68 
    69 if __name__ == '__main__':
    70     login()
    71     getHomePage()
  • 相关阅读:
    3373=数据结构实验之查找一:二叉排序树
    3345=数据结构实验之二叉树六:哈夫曼编码
    3341=数据结构实验之二叉树二:遍历二叉树
    3340=数据结构实验之二叉树一:树的同构
    3362=数据结构实验之图论六:村村通公路
    Object处理方法返回值
    ASPCTJ
    myBatis 多对多
    初识MyBatis
    测试题
  • 原文地址:https://www.cnblogs.com/xuxaut-558/p/10086348.html
Copyright © 2011-2022 走看看