zoukankan      html  css  js  c++  java
  • Java 验证码识别之多线程打码兔

    验证码识别,爬虫永远的话题~

    用打码兔总体的体验就是单线程速度太慢~

    import java.io.IOException;
    import java.net.MalformedURLException;
    import java.util.Date;
    
    import org.apache.log4j.Logger;
    import org.jsoup.Jsoup;
    import org.jsoup.nodes.Document;
    import org.jsoup.select.Elements;
    
    import com.gargoylesoftware.htmlunit.BrowserVersion;
    import com.gargoylesoftware.htmlunit.FailingHttpStatusCodeException;
    import com.gargoylesoftware.htmlunit.WebClient;
    import com.gargoylesoftware.htmlunit.html.HtmlButton;
    import com.gargoylesoftware.htmlunit.html.HtmlForm;
    import com.gargoylesoftware.htmlunit.html.HtmlPage;
    import com.gargoylesoftware.htmlunit.html.HtmlSubmitInput;
    
    import cn.smy.dama2.Dama2Web;
    import cn.smy.dama2.Dama2Web.DecodeResult;
    import cn.smy.dama2.Dama2Web.ReadBalanceResult;
    
    /**   
    * @Title: main.java 
    * @Package  
    * @Description: TODO(用一句话描述该文件做什么) 
    * @author A18ccms A18ccms_gmail_com   
    * @date 2017年2月15日 下午3:42:00 
    * @version V1.0   
    */
    
    /**
     * @ClassName: main
     * @Description: TODO
     * @author zeze
     * @date 2017年2月15日 下午3:42:00
     * 
     */
    public class main {
        private static Logger logger = Logger.getLogger(main.class);
        private static final long serialVersionUID = 1325980466616825****;
        private static Dama2Web dama2 = new Dama2Web(46****, "41c5a58de6********d23b67f61645e3a7", "***", "****");
        private static int id;
    
        private static long nd = 1000 * 24 * 60 * 60;
        private static long nh = 1000 * 60 * 60;
        private static long nm = 1000 * 60;
        private static long ns = 1000;
        // 获得两个时间的毫秒时间差异
        private static Date nowDate;
        private static Date endDate;
        private static long diff;// getTime返回的是一个long型的毫秒数
        // 计算差多少分钟
        private static long min;
        // 计算差多少秒//输出结果
        private static long sec;
        // 计算多少毫秒
        private static long ms;
    
        public static void main(String[] agrs) {
            String emailAccount = "asd@qq.com";
    
    
            for (int i = 0; i < 10; i++) {
                nowDate = new Date();
    
                emailAccount = "asd" + i + "@qq.com";
                if(i==0)
                    emailAccount="asd@qq.com";
                
                int statusCode=checkEbayAccount(emailAccount);
                if(statusCode==0){
                    System.out.println(emailAccount + " 该邮箱号不是ebay账号");
                }else if(statusCode==1){
                    System.out.println(emailAccount + " 该账号是eBay账号!");
                }else if(statusCode==101){
                    System.out.println("打码错误!");
                    statusCode=checkEbayAccount(emailAccount);
                    while(statusCode==101){
                        statusCode=checkEbayAccount(emailAccount);
                    }
                }else{
                    System.out.println(statusCode);
                }
    
                endDate = new Date();
                diff = endDate.getTime() - nowDate.getTime();
                min = diff % nd % nh / nm;
                sec = diff % nd % nh % nm / ns;
                ms = diff % nd % nh % nm % ns;
                System.out.println(min + "分钟" + sec + "秒" + ms + "毫秒");
            }
    
        }
    
        // 验证邮箱是否为eBay账号
        public static int checkEbayAccount(String emailAccount) {
            System.out.println("开始验证账号:" + emailAccount);
            WebClient webClient = new WebClient(BrowserVersion.FIREFOX_17);
            HtmlPage page = null;
            try {
                page = webClient.getPage("http://fyp.ebay.com/");
            } catch (FailingHttpStatusCodeException e) {
                logger.error(e);
            } catch (MalformedURLException e) {
                logger.error(e);
            } catch (IOException e) {
                logger.error(e);
            }
            HtmlForm form = page.getForms().get(1);
            form.getInputByName("input").setValueAttribute(emailAccount);
            HtmlButton button = (HtmlButton) form.getElementsByTagName("button").get(0);
    
            try {
                page = button.click();
            } catch (IOException e1) {
                logger.error(e1);
            }
            if (page.asText().indexOf("Select how you want to reset your password") != -1) {
    //            System.out.println(emailAccount + " 该账号是eBay账号!");
                return 1;
            }
    
            while (page.asText().indexOf("Security Measure") != -1) {
    
                Document doc = Jsoup.parse(page.asXml());
                Elements imgSrc = doc.getElementsByTag("iframe");
                String imgUrl = imgSrc.attr("src");
                System.out.println("验证码图片链接:" + imgUrl);
                String code = getCode(imgUrl);
    
                // 提交验证码
                form = page.getForms().get(0);
                form.getInputByName("tokenText").setValueAttribute(code);
                HtmlSubmitInput input = (HtmlSubmitInput) form.getElementsByTagName("input").get(5);
                try {
                    page = input.click();
                } catch (IOException e1) {
                    logger.error(e1);
                }
    
                if (page.asText().indexOf("the verification code you entered doesn't match against the image") != -1) {
    //                System.out.println("打码错误!");
                    dama2.reportError(id);
                    return 101;
                }
    
                // 再次提交邮箱
                form = page.getForms().get(1);
                form.getInputByName("input").setValueAttribute(emailAccount);
                button = (HtmlButton) form.getElementsByTagName("button").get(0);
                try {
                    page = button.click();
                } catch (IOException e1) {
                    logger.error(e1);
                }
    
                if (page.asText().indexOf("Security Measure") != -1){// 如果还是验证码页面
                    System.out.println("提交还是验证码页面!");
                    continue;
                }
    
                if (page.asText().indexOf("Oops, that's not a match. Try again?") != -1) {
    //                System.out.println(emailAccount + " 该邮箱号不是ebay账号");
                    return 0;
                } else if (page.asText().indexOf("Select how you want to reset your password") != 1) {
    //                System.out.println(emailAccount + " 该账号是eBay账号!");
                    return 1;
                } else {
                    System.out.println(page.asText());
                    return 2;
                }
            }
            return 3;
        }
    
        // 打码兔获取验证码
        public static String getCode(String imgUrl) {
            // 打码兔
            int type = 6;
            int timeout = 30;
            ReadBalanceResult balanceResult = dama2.getBalance();
            // System.out.println(balanceResult);
            DecodeResult res = dama2.decodeUrlAndGetResult(imgUrl, type, timeout);
            String s;
            if (res.ret >= 0) {
                id = res.ret;
                s = "success: result=" + res.result + "; id=" + res.ret;
                System.out.println(s);
            } else {
                s = "failed: ret = " + res.ret + "; desc=" + res.desc;
                System.err.println(s);
            }
            return res.result;
        }
    
    }

     测试结果如下:

    用多线程测试,明显快多了

    package test;
    
    import java.io.IOException;
    import java.net.MalformedURLException;
    import java.util.ArrayList;
    import java.util.Date;
    import java.util.concurrent.Callable;
    import java.util.concurrent.ExecutorService;
    import java.util.concurrent.Executors;
    import java.util.concurrent.Future;
    
    import org.apache.log4j.Logger;
    import org.jsoup.Jsoup;
    import org.jsoup.nodes.Document;
    import org.jsoup.select.Elements;
    
    import com.gargoylesoftware.htmlunit.BrowserVersion;
    import com.gargoylesoftware.htmlunit.FailingHttpStatusCodeException;
    import com.gargoylesoftware.htmlunit.WebClient;
    import com.gargoylesoftware.htmlunit.html.HtmlButton;
    import com.gargoylesoftware.htmlunit.html.HtmlForm;
    import com.gargoylesoftware.htmlunit.html.HtmlPage;
    import com.gargoylesoftware.htmlunit.html.HtmlSubmitInput;
    
    import cn.smy.dama2.Dama2Web;
    import cn.smy.dama2.Dama2Web.DecodeResult;
    import cn.smy.dama2.Dama2Web.ReadBalanceResult;
    
    /***
     * 
     * @ClassName: EbayMultiplyThreadCheck
     * @Description: TODO
     * @author zeze
     * @date 2017年2月16日 上午8:49:46
     *
     */
    public class EbayMultiplyThreadCheck {
    
        private static int threadNum = 30;
    
        private static long nd = 1000 * 24 * 60 * 60;
        private static long nh = 1000 * 60 * 60;
        private static long nm = 1000 * 60;
        private static long ns = 1000;
        private static Date nowDate;
        private static Date endDate;
        private static long diff;
        private static long min;
        private static long sec;
        private static long ms;
    
        public static void main(String[] args) {
            nowDate = new Date();
    
            ExecutorService exec = Executors.newFixedThreadPool(threadNum);
            ArrayList<Future<Integer>> results = new ArrayList<Future<Integer>>();
    
            for (int i = 0; i < threadNum; i++) {
                String email = "asd" + i + "@qq.com";
                if (i == 0)
                    email = "asd@qq.com";
                results.add(exec.submit(new CheckEbayAccount(email)));
            }
    
            boolean isDone = false;
            while (!isDone) {
                isDone = true;
                for (Future<Integer> future : results) {
                    if (!future.isDone()) {
                        isDone = false;
                        try {
                            Thread.sleep(1000);
                        } catch (InterruptedException e) {
                        }
                        break;
                    }
                }
            }
            exec.shutdown();
    
            endDate = new Date();
            diff = endDate.getTime() - nowDate.getTime();
            min = diff % nd % nh / nm;
            sec = diff % nd % nh % nm / ns;
            ms = diff % nd % nh % nm % ns;
            System.out.println(min + "分钟" + sec + "秒" + ms + "毫秒");
    
        }
    }
    
    class CheckEbayAccount implements Callable<Integer> {
    
        private String email;
        private static Logger logger = Logger.getLogger(CheckEbayAccount.class);
        private static Dama2Web dama2 = new Dama2Web(****, "41c5a58de68ebe2*******", "***", "****");
        private static int id;
    
        public CheckEbayAccount(String email) {
            this.email = email;
        }
    
        @Override
        public Integer call() {
    
            System.out.println(Thread.currentThread().getName() + " 开始验证账号:" + email);
            WebClient webClient = new WebClient(BrowserVersion.FIREFOX_17);
            HtmlPage page = null;
            try {
                page = webClient.getPage("http://fyp.ebay.com/");
            } catch (FailingHttpStatusCodeException e) {
                logger.error(e);
            } catch (MalformedURLException e) {
                logger.error(e);
            } catch (IOException e) {
                logger.error(e);
            }
            HtmlForm form = page.getForms().get(1);
            form.getInputByName("input").setValueAttribute(email);
            HtmlButton button = (HtmlButton) form.getElementsByTagName("button").get(0);
    
            try {
                page = button.click();
            } catch (IOException e1) {
                logger.error(e1);
            }
    
            if (page.asText().indexOf("Select how you want to reset your password") != -1) {
                System.out.println(Thread.currentThread().getName() + " " + email + " 该账号是eBay账号!");
                return 1;
            } else if (page.asText().indexOf("Oops, that's not a match. Try again?") != -1) {
                System.out.println(Thread.currentThread().getName() + " " + email + " 该邮箱号不是ebay账号");
                return 0;
            }
    
            while (page.asText().indexOf("Security Measure") != -1) {
    
                Document doc = Jsoup.parse(page.asXml());
                Elements imgSrc = doc.getElementsByTag("iframe");
                String imgUrl = imgSrc.attr("src");
                System.out.println(Thread.currentThread().getName() + " " + "验证码图片链接:" + imgUrl);
                String code = getCode(imgUrl);
    
                // 提交验证码
                form = page.getForms().get(0);
                form.getInputByName("tokenText").setValueAttribute(code);
                HtmlSubmitInput input = (HtmlSubmitInput) form.getElementsByTagName("input").get(5);
                try {
                    page = input.click();
                } catch (IOException e1) {
                    System.out.println(Thread.currentThread().getName() + " " + e1);
                }
    
                while (page.asText().indexOf("Sorry") != -1) {
                    System.out.println(Thread.currentThread().getName() + " 打码错误!重试");
                    dama2.reportError(id);
    
                    doc = Jsoup.parse(page.asXml());
                    imgSrc = doc.getElementsByTag("iframe");
                    imgUrl = imgSrc.attr("src");
                    System.out.println(Thread.currentThread().getName() + " " + "验证码图片链接:" + imgUrl);
                    code = getCode(imgUrl);
    
                    // 提交验证码
                    form = page.getForms().get(0);
                    form.getInputByName("tokenText").setValueAttribute(code);
                    input = (HtmlSubmitInput) form.getElementsByTagName("input").get(5);
                    try {
                        page = input.click();
                    } catch (IOException e1) {
                        logger.error(e1);
                    }
                }
    
                // 再次提交邮箱
                form = page.getForms().get(1);
                form.getInputByName("input").setValueAttribute(email);
                button = (HtmlButton) form.getElementsByTagName("button").get(0);
                try {
                    page = button.click();
                } catch (IOException e1) {
                    logger.error(e1);
                }
    
                if (page.asText().indexOf("Security Measure") != -1) {// 如果还是验证码页面
                    System.out.println(Thread.currentThread().getName() + " 提交还是验证码页面!");
                    continue;
                }
    
                if (page.asText().indexOf("Oops, that's not a match. Try again?") != -1) {
                    System.out.println(Thread.currentThread().getName() + " " + email + " 该邮箱号不是ebay账号");
                    return 0;
                } else if (page.asText().indexOf("Select how you want to reset your password") != 1) {
                    System.out.println(Thread.currentThread().getName() + " " + email + " 该账号是eBay账号!");
                    return 1;
                } else {
                    System.out.println(Thread.currentThread().getName() + " " + page.asText());
                    return 2;
                }
            }
            System.out.println(Thread.currentThread().getName() + " " + page.asText());
            return 3;
    
        }
    
        // 打码兔获取验证码
        public static String getCode(String imgUrl) {
            // 打码兔
            int type = 6;
            int timeout = 30;
            ReadBalanceResult balanceResult = dama2.getBalance();
            // System.out.println(balanceResult);
            DecodeResult res = dama2.decodeUrlAndGetResult(imgUrl, type, timeout);
            String s;
            if (res.ret >= 0) {
                id = res.ret;
                s = "[打码结果=" + res.result + "] [id=" + res.ret + "] " + balanceResult;
                System.out.println(Thread.currentThread().getName() + " " + s);
            } else {
                while (res.result == null) {
                    s = "打码失败,重试: ret = " + res.ret + "; desc=" + res.desc;
                    System.out.println(Thread.currentThread().getName() + " " + s);
                    dama2.reportError(id);
                    res = dama2.decodeUrlAndGetResult(imgUrl, type, timeout);
                    if (res.ret >= 0) {
                        id = res.ret;
                        s = "[打码结果=" + res.result + "] [id=" + res.ret + "] " + balanceResult;
                        System.out.println(Thread.currentThread().getName() + " " + s);
                    }
                }
            }
            return res.result;
        }
    
    }

     测试30个账号,平均每个3秒

  • 相关阅读:
    字典操作
    集合操作
    字符编码与转码
    基于Vue的WebApp项目开发(二)
    算法之递归
    webpack学习(一)
    基于Vue的WebApp项目开发(一)
    webpack踩坑之旅
    vue2.0中的watch和计算属性computed
    vue2.0路由写法、传参和嵌套
  • 原文地址:https://www.cnblogs.com/zeze/p/6402963.html
Copyright © 2011-2022 走看看