zoukankan      html  css  js  c++  java
  • 三种urllib实现网页下载,含cookie模拟登陆

    coding=UTF-8

    import re
    import urllib.request, http.cookiejar, urllib.parse
    
    #
    # print('---------------第一种方法----------------------')
    # URL = 'https://baike.baidu.com/item/%E5%B7%B4%E6%B2%99%E5%B0%94%C2%B7%E9%98%BF%E8%90%A8%E5%BE%B7/2867946?fromtitle=%E9%98%BF%E8%90%A8%E5%BE%B7&fromid=9693472'
    # response = urllib.request.urlopen(URL)
    # if response.getcode() == 200:
    #     conf = response.read()
    #     print(conf)
    # else:
    #     print('Fail')
    #
    # print('---------------第二种方法----------------------')
    # # 创建 request 对象
    # request = urllib.request.Request(URL)
    #
    # # 封装 request 对象
    # request.add_header('User-Agent', 'Mozilla/5.0')
    #
    # # 发送带头信息的请求
    # response1 = urllib.request.urlopen(request)
    # if response1.getcode() == 200:
    #     conf = response1.read()
    #     print(conf)
    # else:
    #     print('Fail')
    
    print('---------------第三种方法----------------------')
    URL2 = 'http://lczl.cnki.net/jbdetail/index?query=1'
    URL3 = 'http://r.cnki.net/Klogin/Login.aspx?ReturnUrl=http://lczl.cnki.net/jbdetail/index?query=1'
    # 创建cookieJar作为cookie容器
    cj = http.cookiejar.CookieJar()
    
    # 创建一个opener
    opener = urllib.request.build_opener(urllib.request.HTTPCookieProcessor(cj))
    
    # 给urllib.request安装opener
    urllib.request.install_opener(opener)
    
    # 封装 request 对象
    request1 = urllib.request.urlopen(URL2)
    
    
    def getVIEWSTATE(data):
        cer = re.compile('name="__VIEWSTATE" id="__VIEWSTATE" value="(.*)"', flags=0)
        strlist = cer.findall(data)
        # print(data)
        return strlist[0]
    
    
    VIEWSTATE = getVIEWSTATE(request1.read().decode())
    # print(VIEWSTATE)
    data = {'__VIEWSTATE': VIEWSTATE.encode(), 'userName': '345666561@qq.com', 'passWord': '215501',
            'iplogin': 0}  # 登陆用户名和密码
    post_data = urllib.parse.urlencode(data).encode()
    
    request2 = urllib.request.Request(URL3, post_data)
    request2.add_header('User-Agent', 'Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:55.0) Gecko/20100101 Firefox/55.0')
    
    response2 = urllib.request.urlopen(request2)
    if response2.getcode() == 200:
        conf = response2.read()
        print(conf.decode('utf8'))
        print(cj)
    else:
        print('Fail')
  • 相关阅读:
    【Java】【IDE】【Jetbrain Idea】Intellij IDEA 快捷键整理
    【Linux】【Services】【KVM】virsh命令详解
    【Linux】【Services】【KVM】安装与简单配置
    【Linux】【Services】【Docker】Docker File
    【Linux】【Services】【Docker】网络
    【Linux】【Services】【Docker】应用
    【Linux】【Services】【Docker】基础理论
    【Python】【Module】json and pickle
    【Python】【Module】hashlib
    Highcharts 对数组的要求
  • 原文地址:https://www.cnblogs.com/cenzhongman/p/7344406.html
Copyright © 2011-2022 走看看