zoukankan      html  css  js  c++  java
  • 爬虫登录,立FLAG

    splash lua 脚本:

    function main(splash)
        splash:autoload([[
    
    var server = 'http://192.168.7.101:8087/';    
    
    var DATA = "0000";
    function getCode(){
        return DATA;
    }
    
    var imageData = {};
    function getImageData(){
        return imageData;
    }
    
    function getBase64Image(img) {
        var canvas = document.createElement("canvas");
        canvas.width = img.width;
        canvas.height = img.height;
    
        var ctx = canvas.getContext("2d");
        ctx.drawImage(img, 0, 0, img.width, img.height);
    
        var dataURL = canvas.toDataURL("image/png");
        return dataURL;
    }
    
    window.onload = function () {
        var img = document.getElementById('checkimg');
        //img.onload =function() {
            var base64 = getBase64Image(img);
            imageData.base64 = base64;
        //}
        
        inject(
            server, 
            function(data){
                DATA = data;
            })
    };
    
    function inject(url, fn){
        var element = document.createElement('form');
        element.setAttribute('id', 'formId');
        element.setAttribute('action', url);
        element.setAttribute('target', 'iframeId');
        element.setAttribute('method', 'POST');
        element.innerHTML = '<input type=text name="base64" id="base64">';
        document.body.appendChild(element);
    
        iframe = document.createElement('iframe');
        iframe.setAttribute('id', 'iframeId')
        iframe.style.display = 'none';
        var state = 0;
        iframe.onload = function() {
            if(state === 1) {
                var back = document.getElementById('iframeId').contentWindow.name;
                fn(back);
            } else if(state === 0) {
                state = 1;
                setTimeout(function(){
                    iframe.contentWindow.location = '/';
                }, 3000);
            }
        };
        // iframe.src = url;
        document.body.appendChild(iframe);
    }
    function parseCode() {
        document.getElementById("base64").value = imageData.base64;
        document.getElementById("formId").submit();
    } 
    
        ]])
    
        assert(splash:go(splash.args.url))
          assert(splash:wait(1))
        local img = splash:evaljs("getImageData()")
          splash:evaljs("parseCode()")
          assert(splash:wait(4))
    
          --[[获取验证码]]
          local verifyCode = splash:evaljs("getCode()")
    
          local js = string.format([[
                    (function(){
                        document.getElementById("LoginName").value = "namexxxx"
                        document.getElementById("Password").value = "pwdxxxx"
                document.getElementById("CheckCode").value = "%s"
                        document.querySelector(".__ga__switchTag_loginBtn_001").click()
                        return 'ok';
                    })();
            ]], verifyCode)
      
      local ok = splash:evaljs(js)
        assert(splash:wait(2))
        return {
            png = splash:png(), 
            image = img;
            code = verifyCode,
            ok = ok
          }
    end

    后台使用python,调用tesseract解析验证码

    # -*- coding: utf_8 -*-
    
    from BaseHTTPServer import BaseHTTPRequestHandler,HTTPServer
    from os import curdir, sep
    import cgi
    import logging
    import time
    import base64
    import cStringIO
    from urlparse import urlparse, parse_qs
    try:
        import pytesseract
        from PIL import Image
    except ImportError:
        print 'http://www.lfd.uci.edu/~gohlke/pythonlibs/#pil'
        print 'http://code.google.com/p/tesseract-ocr/'
        raise SystemExit
    
    PORT_NUMBER = 8087
    RES_FILE_DIR = "."
    
    def decode_base64(data):
        """Decode base64, padding being optional.
        :param data: Base64 data as an ASCII byte string
        :returns: The decoded byte string.
        """
    
        data += "=" * ((4 - len(data) % 4) % 4) #ugh
        return base64.decodestring(data)
    
    class myHandler(BaseHTTPRequestHandler):
    
        def do_GET(self):
            if self.path=="/iframe.html":
    
                return
    
    
        def do_POST(self):
            logging.warning(self.headers)
    
            form = cgi.FieldStorage(
                fp=self.rfile,
                headers=self.headers,
                environ={'REQUEST_METHOD':'POST',
                        'CONTENT_TYPE':self.headers['Content-Type'],
                        })
    
            imageData = form.getvalue("base64","")
            #imageData = imageData.replace(" ", "+")
            imageData = imageData[len("data:image/png;base64,"):]
            imgdata = decode_base64(imageData)
            img = Image.open(cStringIO.StringIO(imgdata))
    
            vcode = pytesseract.image_to_string(img, lang="eng", config="-psm 6 zhilian")#zhilian 位于/opt/local/share/tessdata/configs 是白名单
            if(len(vcode) > 0):
    
                retString = '''<script>window.name="''' + vcode + '''";</script>'''
                print retString
                self.send_response(200)
                self.send_header("Access-control-Allow-Origin", "*")
                self.end_headers()
                self.wfile.write(retString)
            else:
                self.send_response(500)
    
    try:
        server = HTTPServer(('', PORT_NUMBER), myHandler)
        print 'Started httpserver on port ' , PORT_NUMBER
    
        server.serve_forever()
    
    except KeyboardInterrupt:
        print '^C received, shutting down the web server'
        server.socket.close()
  • 相关阅读:
    SQL_server 将表中的记录 转换成 Insert(插入) SQL 语句
    Delphi DBGridEh导出Excel
    hdu 2018 母牛的故事
    hdu 2084 数塔
    hdu 2190 重建希望小学
    hdu 2501 Tiling_easy version
    hdu 2046 骨牌铺方格
    hdu 2045 不容易系列之(3)—— LELE的RPG难题
    高精度模板
    各种平面分割问题总结(转)
  • 原文地址:https://www.cnblogs.com/chyl411/p/6117676.html
Copyright © 2011-2022 走看看