zoukankan      html  css  js  c++  java
  • java 验证码识别 base64 、URL、filepath

    
    

    import com.sun.org.apache.xml.internal.security.exceptions.Base64DecodingException;
    import com.sun.org.apache.xml.internal.security.utils.Base64;
    import com.sun.xml.internal.messaging.saaj.util.ByteInputStream;
    import org.apache.http.HttpStatus;
    import org.apache.http.StatusLine;
    import org.apache.http.client.methods.CloseableHttpResponse;
    import org.apache.http.client.methods.HttpGet;
    import org.apache.http.impl.client.CloseableHttpClient;
    import org.apache.http.impl.client.HttpClientBuilder;

    import javax.imageio.ImageIO;
    import java.awt.*;
    import java.awt.image.BufferedImage;
    import java.io.File;
    import java.io.FileOutputStream;
    import java.io.IOException;
    import java.io.InputStream;
    import java.net.URL;
    import java.util.*;
    import java.util.List;


    public class OCRUtil {

    private static Map<BufferedImage, String> trainMap = null;
    private static int index = 0;


    //private static final String dirPath = "D:\Proli\pic\One\";
    private static final String dirPath = "";

    private static boolean isBlack(int colorInt) {
    Color color = new Color(colorInt);
    return color.getRed() + color.getGreen() + color.getBlue() <= 100;
    }

    /**
    * 获得二值化图像
    * 最大类间方差法
    *
    * @param gray
    * @param width
    * @param height
    * @return
    */
    private static int getOstu(int[][] gray, int width, int height) {
    int grayLevel = 256;
    int[] pixelNum = new int[grayLevel];
    //计算所有色阶的直方图
    for (int x = 0; x < width; x++) {
    for (int y = 0; y < height; y++) {
    int color = gray[x][y];
    pixelNum[color]++;
    }
    }

    double sum = 0;
    int total = 0;
    for (int i = 0; i < grayLevel; i++) {
    sum += i * pixelNum[i]; //x*f(x)质量矩,也就是每个灰度的值乘以其点数(归一化后为概率),sum为其总和
    total += pixelNum[i]; //n为图象总的点数,归一化后就是累积概率
    }
    double sumB = 0;//前景色质量矩总和
    int threshold = 0;
    double wF = 0;//前景色权重
    double wB = 0;//背景色权重

    double maxFreq = -1.0;//最大类间方差

    for (int i = 0; i < grayLevel; i++) {
    wB += pixelNum[i]; //wB为在当前阈值背景图象的点数
    if (wB == 0) { //没有分出前景后景
    continue;
    }

    wF = total - wB; //wB为在当前阈值前景图象的点数
    if (wF == 0) {//全是前景图像,则可以直接break
    break;
    }

    sumB += (double) (i * pixelNum[i]);
    double meanB = sumB / wB;
    double meanF = (sum - sumB) / wF;
    //freq为类间方差
    double freq = (double) (wF) * (double) (wB) * (meanB - meanF) * (meanB - meanF);
    if (freq > maxFreq) {
    maxFreq = freq;
    threshold = i;
    }
    }

    return threshold;
    }

    /**
    * 图片预处理 灰度化、二值化、去噪
    * @param img
    * @return
    * @throws Exception
    */
    private static BufferedImage removeBackgroud(BufferedImage img) throws Exception {
    int width = img.getWidth();
    int height = img.getHeight();

    double Wr = 0.299;
    double Wg = 0.587;
    double Wb = 0.114;

    int[][] gray = new int[width][height];

    //灰度化
    for (int x = 0; x < width; x++) {
    for (int y = 0; y < height; y++) {
    Color color = new Color(img.getRGB(x, y));
    int rgb = (int) ((color.getRed() * Wr + color.getGreen() * Wg + color.getBlue() * Wb) / 3);
    gray[x][y] = rgb;
    }
    }
    int ostu = getOstu(gray, width, height);

    for (int x = 0; x < width; ++x) {
    for (int y = 0; y < height; ++y) {

    if (gray[x][y] > ostu) {
    img.setRGB(x, y, Color.white.getRGB());
    } else {
    img.setRGB(x, y, Color.black.getRGB());
    }

    }
    }
    //去噪
    for (int x = 0; x < width; ++x) {
    for (int y = 0; y < height; ++y) {
    if (isBlack(img.getRGB(x, y))) {
    if (isAlone(img, x, y,width,height)) {
    img.setRGB(x, y, Color.WHITE.getRGB());
    }
    }
    }
    }
    return img;
    }

    /**
    * 是否单个噪点 目前判断当前像素点的上下左右4个点是否有黑点,可判断8个方位点
    * @param img
    * @param x
    * @param y
    * @param width
    * @param height
    * @return
    */
    private static boolean isAlone(BufferedImage img, int x, int y,int width,int height) {

    if (x == 0 || width - x < 3 || y == 0 || height - y < 3) {
    return true;
    }
    try {
    int a1 = img.getRGB(x - 1, y + 1);
    int a2 = img.getRGB(x - 1, y);
    int a3 = img.getRGB(x - 1, y - 1);
    int a4 = img.getRGB(x, y + 1);
    int a5 = img.getRGB(x, y - 1);
    int a6 = img.getRGB(x + 1, y + 1);
    int a7 = img.getRGB(x + 1, y);
    int a8 = img.getRGB(x + 1, y - 1);

    ArrayList<Boolean> booleans = new ArrayList<Boolean>();

    booleans.add(isBlack(a1));
    booleans.add(isBlack(a2));
    booleans.add(isBlack(a3));
    booleans.add(isBlack(a4));
    booleans.add(isBlack(a5));
    booleans.add(isBlack(a6));
    booleans.add(isBlack(a7));
    booleans.add(isBlack(a8));

    long count = booleans.stream().filter((a) -> a).count();

    if (count <= 1) {
    return true;
    }
    } catch (Exception e) {
    return false;
    }
    return false;

    }


    /**
    * 移除空白像素
    * @param img
    * @return
    * @throws Exception
    */
    private static BufferedImage removeBlank(BufferedImage img) throws Exception {
    int width = img.getWidth();
    int height = img.getHeight();
    int start = 0;
    int end = 0;
    Label1:
    for (int y = 0; y < height; ++y) {
    for (int x = 0; x < width; ++x) {
    if (isBlack(img.getRGB(x, y))) {
    start = y;
    break Label1;
    }
    }
    }
    Label2:
    for (int y = height - 1; y >= 0; --y) {
    for (int x = 0; x < width; ++x) {
    if (isBlack(img.getRGB(x, y))) {
    end = y;
    break Label2;
    }
    }
    }
    return img.getSubimage(0, start, width, end - start + 1);
    }

    /**
    * 分割图片
    * @param img
    * @return
    * @throws Exception
    */
    private static List<BufferedImage> splitImage(BufferedImage img) throws Exception {
    List<BufferedImage> subImgs = new ArrayList<>();
    int width = img.getWidth();
    int height = img.getHeight();
    List<Integer> weightlist = new ArrayList<>();
    for (int x = 0; x < width; ++x) {
    int count = 0;
    for (int y = 0; y < height; ++y) {
    if (isBlack(img.getRGB(x, y))) {
    count++;
    }
    }
    weightlist.add(count);
    }
    for (int i = 0; i < weightlist.size(); i++) {
    int length = 0;
    while (i < weightlist.size() && weightlist.get(i) > 0) {
    i++;
    length++;
    }
    if (length > 2) {
    subImgs.add(removeBlank(img.getSubimage(i - length, 0, length, height)));
    }
    }
    return subImgs;
    }

    /**
    * 加载训练图片
    * @return
    * @throws Exception
    */
    private static Map<BufferedImage, String> loadTrainData() throws Exception {
    if (trainMap == null) {
    Map<BufferedImage, String> map = new HashMap<>();
    File dir = new File(dirPath + "train");
    File[] files = dir.listFiles();
    for (File file : files) {
    map.put(ImageIO.read(file), file.getName().charAt(0) + "");
    }
    trainMap = map;
    }
    return trainMap;
    }

    /**
    * 匹配单个图片信息
    * @param img
    * @param map
    * @return
    */
    private static String getSingleCharOcr(BufferedImage img,
    Map<BufferedImage, String> map) {
    String result = "#";
    int width = img.getWidth();
    int height = img.getHeight();
    int min = width * height;
    for (BufferedImage bi : map.keySet()) {
    int count = 0;
    if (Math.abs(bi.getWidth() - width) > 2)
    continue;
    int widthmin = width < bi.getWidth() ? width : bi.getWidth();
    int heightmin = height < bi.getHeight() ? height : bi.getHeight();
    Label1:
    for (int x = 0; x < widthmin; ++x) {
    for (int y = 0; y < heightmin; ++y) {
    if (isBlack(img.getRGB(x, y)) != isBlack(bi.getRGB(x, y))) {
    count++;
    if (count >= min) {
    break Label1;
    }
    }
    }
    }
    if (count < min) {
    min = count;
    result = map.get(bi);
    }
    if(count == 0 && min == 0){
    break;
    }
    }
    return result;
    }

    /**
    * @param read
    * @return
    * @throws Exception
    */
    private static String getTextByBufferedImage(BufferedImage read) throws Exception {

    //二值化、去噪
    BufferedImage img = removeBackgroud(read);
    //分割图片
    List<BufferedImage> listImg = splitImage(img);
    //加载训练集图库
    Map<BufferedImage, String> map = loadTrainData();

    StringBuilder result = new StringBuilder();
    //循环匹配单个图片
    for (BufferedImage bi : listImg) {
    result.append(getSingleCharOcr(bi, map));
    }

    return result.toString();
    }


    /**
    * 根据文件路径得到验证码
    *
    * @param fileStr 文件路劲+文件名
    * @return
    * @throws Exception
    */
    public static String getTextByFilePath(String fileStr) throws Exception {

    File file = new File(fileStr);
    BufferedImage read = ImageIO.read(file);
    return getTextByBufferedImage(read);
    }

    /**
    * 根据图片Url地址得到验证码
    *
    * @param urlStr
    * @return
    * @throws Exception
    */
    public static String getTextByImageUrl(String urlStr) throws Exception {

    URL url = new URL(urlStr);
    BufferedImage read = ImageIO.read(url);

    return getTextByBufferedImage(read);
    }

    /**
    * 通过Base64编码得到验证码
    *
    * @param base64Text
    * @return
    * @throws Exception
    */
    public static String getTextByBase64(String base64Text) throws Exception {

    byte[] decode = Base64.decode(base64Text);
    BufferedImage read = ImageIO.read(new ByteInputStream(decode, decode.length));
    return getTextByBufferedImage(read);
    }


    /**
    * 图片下载
    */

    public static void downloadImage() {
    CloseableHttpClient httpClient = HttpClientBuilder.create().build();
    HttpGet httpGet = new HttpGet("http://iir.circ.gov.cn/web/servlet/ValidateCode");
    httpGet.addHeader("Host", "game.tom.com");
    httpGet.addHeader("User-Agent", "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/62.0.3202.94 Safari/537.36");
    for (int i = 0; i < 30; i++) {
    try {
    CloseableHttpResponse execute = httpClient.execute(httpGet);
    StatusLine statusLine = execute.getStatusLine();
    int statusCode = statusLine.getStatusCode();
    if (statusCode != HttpStatus.SC_OK) {
    System.err.println("Method failed: " + statusLine);
    }
    InputStream inputStream = execute.getEntity().getContent();
    FileOutputStream outputStream = new FileOutputStream(new File(dirPath + "temp/" + i + ".jpg"));
    byte[] buff = new byte[1024];
    int len = 0;
    while((len = inputStream.read(buff, 0, 1024)) != -1){
    outputStream.write(buff, 0, len);
    }
    inputStream.close();
    outputStream.close();

    // 读取内容
    System.out.println(i + "OK!");
    } catch (Exception e) {
    e.printStackTrace();
    } finally {
    // 释放连接
    httpGet.releaseConnection();
    }
    }
    }

    /**
    * 训练数据
    * @throws Exception
    */
    public static void trainData() throws Exception {
    File dir = new File(dirPath + "temp");
    File[] files = dir.listFiles();
    for (File file : files) {
    //图片预处理 二值化、去噪
    BufferedImage img = removeBackgroud(ImageIO.read(file));
    //图片分割
    List<BufferedImage> listImg = splitImage(img);
    if (listImg.size() == 4) {
    for (int j = 0; j < listImg.size(); ++j) {
    ImageIO.write(listImg.get(j), "JPG", new File(dirPath + "train/" + file.getName().charAt(j) + "-" + (index++) + ".jpg"));
    }
    }
    }
    }

    public static void writeImgByBase64(String base64Str) throws Base64DecodingException, IOException {

    byte[] decode = Base64.decode(base64Str);
    BufferedImage read = ImageIO.read(new ByteInputStream(decode, decode.length));
    String name = UUID.randomUUID().toString().replaceAll("-", "") + ".jpg";
    ImageIO.write(read, "JPG", new File(dirPath + "tmp/" + name));

    }
    public static void writeImgByBase64(String base64Str,String fileName) throws Base64DecodingException, IOException {

    byte[] decode = Base64.decode(base64Str);
    BufferedImage read = ImageIO.read(new ByteInputStream(decode, decode.length));

    ImageIO.write(read, "JPG", new File(dirPath + "tmp/" + fileName + ".jpg"));

    }

    /**
    * @param args
    * @throws Exception
    */
    public static void main(String[] args) throws Exception {

    //downloadImage();//下载图片-保监会
    //trainData();//训练图片

    //String textUrl = getTextByImageUrl("http://iir.circ.gov.cn/web/servlet/ValidateCode?time=123");//保监会
    //String textUrl2 = getTextByImageUrl("http://chexian.axatp.com/getAdditionNo.do?type=policy");//天平保单查询
    //String text = getTextByImageFileUrl(dirPath + "temp/5xY5.jpg");
    String base64Text = "/9j/4AAQSkZJRgABAQAAAQABAAD/2wBDAAYEBQYFBAYGBQYHBwYIChAKCgkJChQODwwQFxQYGBcU " +
    "FhYaHSUfGhsjHBYWICwgIyYnKSopGR8tMC0oMCUoKSj/2wBDAQcHBwoIChMKChMoGhYaKCgoKCgo " +
    "KCgoKCgoKCgoKCgoKCgoKCgoKCgoKCgoKCgoKCgoKCgoKCgoKCgoKCgoKCj/wAARCAAeAFoDASIA " +
    "AhEBAxEB/8QAHwAAAQUBAQEBAQEAAAAAAAAAAAECAwQFBgcICQoL/8QAtRAAAgEDAwIEAwUFBAQA " +
    "AAF9AQIDAAQRBRIhMUEGE1FhByJxFDKBkaEII0KxwRVS0fAkM2JyggkKFhcYGRolJicoKSo0NTY3 " +
    "ODk6Q0RFRkdISUpTVFVWV1hZWmNkZWZnaGlqc3R1dnd4eXqDhIWGh4iJipKTlJWWl5iZmqKjpKWm " +
    "p6ipqrKztLW2t7i5usLDxMXGx8jJytLT1NXW19jZ2uHi4+Tl5ufo6erx8vP09fb3+Pn6/8QAHwEA " +
    "AwEBAQEBAQEBAQAAAAAAAAECAwQFBgcICQoL/8QAtREAAgECBAQDBAcFBAQAAQJ3AAECAxEEBSEx " +
    "BhJBUQdhcRMiMoEIFEKRobHBCSMzUvAVYnLRChYkNOEl8RcYGRomJygpKjU2Nzg5OkNERUZHSElK " +
    "U1RVVldYWVpjZGVmZ2hpanN0dXZ3eHl6goOEhYaHiImKkpOUlZaXmJmaoqOkpaanqKmqsrO0tba3 " +
    "uLm6wsPExcbHyMnK0tPU1dbX2Nna4uPk5ebn6Onq8vP09fb3+Pn6/9oADAMBAAIRAxEAPwD0HTNP " +
    "s3020Z7S3ZmhQkmNSSdo9qnl0nTpVCy6faOoYMA0KkZBBB6dQQCPcVl3VjpWu+C7Wz1xA2m3cNuG " +
    "SZ2hLElCinlWBL7Rjg549q8Q1/wxpE/xM0/w38NT/Zt9Bua+1G3kuHazwGDjzPMIPB2kbRhtql/m " +
    "YKAfQ39m2P8Az5W3/fpf8KP7Nsf+fK2/79L/AIVarz/4reNLDwvLothrFh9q03VpWS5kMzIIo0aP " +
    "cWVVJkXD8p0YAqchiKAO2/s2x/58rb/v0v8AhR/Ztj/z5W3/AH6X/CuBuPGvii4u9Ok0/wADarHp " +
    "txdLCkt1cLC53AqTPEI5HjjDZbdx91TnB2n0igDNS1sZbto4LTT5I4spORtLxSYRlUqF7q2eSCPl " +
    "4IbIsf2bY/8APlbf9+l/wry3x94k1Xw18SNNh1i+R/CWoqJYomggzFNDtYIrN0JkWP53IVRKcFSu " +
    "9ew+Hev6h4nsdQ1S5FqNLkvJY9MaGNlaW3RiokfcxOSRjG1SNpOORgA17zTNLjiu57+K0Fj5OJUn " +
    "ijEKKNxZiSvQg85JGFHA5zO2n232hAun2RgKsWcqAwbI2gLtwQRuycjGBwc8TfaPLn8u6aCLzZfL " +
    "th5uWl+TceCBhuH4GflXOeoHg/gDw1H8WdL8ReIfFk3n6lNK1lZFd6pY7Y9wKKHAZQZF+U/3SSSW " +
    "JoA9yXSNPW4eYWkO91VCCuVwCSML0B+Y5IGTxnOBjgdTVU1K7VFCqszgADAA3GpPgf4rn174fRXG " +
    "t3Gbi0ujYNczyDMx+TZk4HzHzFTuWIzkk03Vv+Qre/8AXZ//AEI0AaHjfXtV0bwIP+Eb0++vNZeG " +
    "COHyLN5lj3g5c4GDgI3rhim4YYZ4X4da5ZeDfDl0mvab4t0+8vmafUdcu9LwkcrDA+c7mIDHC7lb " +
    "LMSQAxA9OsfE1nBZW8LxXBaONUJCrjIGPWnN4nszcJIPtoRVZTEETaxJGGPfIwQMED5jkHjAB0Es " +
    "bO8LLM8YRtzKoXEg2kbWyCcZIPGDlRzjIPlfxm0S61jxf8P/ACtMnv7GK+b7XttzLGkZkgz5nBAU " +
    "gN14wDXUWPiNI90ksEUU0t1JJOYYM+bH8yxZO4Yk2CHcTkfKQBjBElx4g09LGWOys5lcMZ0jRvIV " +
    "5d3mfMyHIDPy3Bzk5DZIIB0kTfaJVnjknWNPMiMTR7AzBgNx3Lu42nBB2kNnkbSLFc//AMJXY/8A " +
    "PK5/75X/ABo/4Sux/wCeVz/3yv8AjQB5n8druXxVZy6Botq8n9kK+q6jdSZRLdY0lURkEZ3vyV6Z " +
    "G1hlSWHcfB/xHF4m8BadcIiRz2qizuI40CqskYA4AVVAKlWwowN2O1Ry634Z0mVr+PRliuJrqMvL " +
    "DaxLI0sjGMOTkEn962T1wzepyeHbrQtB+2HT7Frb7VL5jx20flw8cLti3lVbaFDFQNxGSOgAB3Fe " +
    "H+HH1z4WxeJNCt/D2q6rbyStd6Rc2lq1yjFlKgTspXGNkeQAD949Cpr0yXxPZu8LL9tjCNuZVRMS " +
    "DaRtbOTjJB4wcqOcZBk/4Sux/wCeVz/3yv8AjQBj/BzwnP4O8Ew2V/xf3ErXVygcOsbsAAoIHZVX " +
    "PX5t2CRis3Vv+Qre/wDXZ/8A0I11X/CV2P8Azyuf++V/xrkb6VZ724mQELJIzgHrgnNAH//Z";

    String base64Result = getTextByBase64(base64Text);
    System.out.println(base64Result);

    writeImgByBase64(base64Text,base64Result);//生成图片


    }
    }

    1. 新建3个文件夹指向代码中的文件地址

     

    2.result 文件用来保存识别结果

    3.temp 中的数据需手动设置

    4.训练后的单个字符,用来匹配查找

    执行后结果

    trainData();//训练图片

  • 相关阅读:
    Android实现不同Active页面间的跳转
    Android Dialog的整个生命周期
    fragment的基本用法
    使用URLEncoder、URLDecoder进行URL参数的转码与解码
    Android 通过URL获取网络资源
    Dialog向Activity传递数据
    Android 自定义AlertDialog(退出提示框)
    javascript的继承实现
    UVA Graph Coloring
    poj3744高速功率矩阵+可能性DP
  • 原文地址:https://www.cnblogs.com/proli/p/8043010.html
Copyright © 2011-2022 走看看