zoukankan      html  css  js  c++  java
  • 爬虫day 04(通过登录去爬虫 解决django的csrf_token)

    #通过登录去爬虫
    #首先要有用户名和密码
    import urllib.request
    import http.cookiejar
    from lxml import etree
    head = {
        'Connection': 'Keep-Alive',
        'Accept': 'text/html, application/xhtml+xml, */*',
        'Accept-Language': 'en-US,en;q=0.8,zh-Hans-CN;q=0.5,zh-Hans;q=0.3',
        'User-Agent': 'Mozilla/5.0 (Windows NT 6.3; WOW64; Trident/7.0; rv:11.0) like Gecko'
    }
    # 给opener加上cookie
    def makeMyOpener(head):
        cj = http.cookiejar.CookieJar()
        opener = urllib.request.build_opener(urllib.request.HTTPCookieProcessor(cj))
        header = []
        for key, value in head.items():
            elem = (key, value)
            header.append(elem)
        opener.addheaders = header
        return opener
    # 爬自己的页面 
    oper = makeMyOpener(head)
    uop = oper.open('http://127.0.0.1:8000/index/loginHtml/', timeout = 1000)
    data = uop.read()
    html = data.decode()
    # lxml提取 csrfmiddlewaretoken 
     selector = etree.HTML(html) links = selector.xpath('//form/input[@name="csrfmiddlewaretoken"]/@value') for link in links: csrfmiddlewaretoken = link print(link) url = 'http://127.0.0.1:8000/index/login/' datas = {'csrfmiddlewaretoken':csrfmiddlewaretoken,'email':'aa','pwd':'aa'}
    # 必须要把字符串改为二进制流 data_encoded
    = urllib.parse.urlencode(datas).encode(encoding='utf-8') response = oper.open(url,data_encoded) content = response.read() html = content.decode() print(html)
  • 相关阅读:
    输出菱形
    合工大OJ 1359
    9.游标的使用
    8.存储过程和触发器
    css sprite---css精灵网页图片应用处理方式分析
    为什么HTML使用<!DOCTYPE HTML>
    Dom捕捉事件和冒泡事件-原理与demo测试
    html5 canvas 绘制五星红旗
    javascript实现 color颜色格式转换【 rgb和十六进制的转换】
    d3.js 根据需求定制pie图饼图
  • 原文地址:https://www.cnblogs.com/qieyu/p/7818511.html
Copyright © 2011-2022 走看看