import org.apache.http.HttpStatus; import org.apache.http.StatusLine; import org.apache.http.client.methods.CloseableHttpResponse; import org.apache.http.client.methods.HttpGet; import org.apache.http.impl.client.CloseableHttpClient; import org.apache.http.impl.client.HttpClientBuilder; import javax.imageio.ImageIO; import java.awt.*; import java.awt.image.BufferedImage; import java.io.File; import java.io.FileOutputStream; import java.io.InputStream; import java.net.URL; import java.util.ArrayList; import java.util.HashMap; import java.util.List; import java.util.Map; /** * temp * train * result */ public class BaoJianHui { private static Map<BufferedImage, String> trainMap = null; private static int index = 0; public static final String dirPath = "D:\Proli\pic\One\"; public static boolean isBlack(int colorInt) { Color color = new Color(colorInt); return color.getRed() + color.getGreen() + color.getBlue() <= 100; } public static boolean isWhite(int colorInt) { Color color = new Color(colorInt); return color.getRed() + color.getGreen() + color.getBlue() > 100; } private static boolean isRemove(int rgb) { Color color = new Color(rgb); return color.getRed() == 244 || color.getRed() == 255 || color.getRed() == 241 || color.getRed() == 251 || color.getRed() == 247 || color.getRed() == 253; } /** * 获得二值化图像 * 最大类间方差法 * * @param gray * @param width * @param height */ private static int getOstu(int[][] gray, int width, int height) { int grayLevel = 256; int[] pixelNum = new int[grayLevel]; //计算所有色阶的直方图 for (int x = 0; x < width; x++) { for (int y = 0; y < height; y++) { int color = gray[x][y]; pixelNum[color]++; } } double sum = 0; int total = 0; for (int i = 0; i < grayLevel; i++) { sum += i * pixelNum[i]; //x*f(x)质量矩,也就是每个灰度的值乘以其点数(归一化后为概率),sum为其总和 total += pixelNum[i]; //n为图象总的点数,归一化后就是累积概率 } double sumB = 0;//前景色质量矩总和 int threshold = 0; double wF = 0;//前景色权重 double wB = 0;//背景色权重 double maxFreq = -1.0;//最大类间方差 for (int i = 0; i < grayLevel; i++) { wB += pixelNum[i]; //wB为在当前阈值背景图象的点数 if (wB == 0) { //没有分出前景后景 continue; } wF = total - wB; //wB为在当前阈值前景图象的点数 if (wF == 0) {//全是前景图像,则可以直接break break; } sumB += (double) (i * pixelNum[i]); double meanB = sumB / wB; double meanF = (sum - sumB) / wF; //freq为类间方差 double freq = (double) (wF) * (double) (wB) * (meanB - meanF) * (meanB - meanF); if (freq > maxFreq) { maxFreq = freq; threshold = i; } } return threshold; } /** * 图片预处理 灰度化、二值化、去噪 * @param picFile * @return * @throws Exception */ public static BufferedImage removeBackgroud(String picFile) throws Exception { BufferedImage img = ImageIO.read(new File(picFile)); int width = img.getWidth(); int height = img.getHeight(); double Wr = 0.299; double Wg = 0.587; double Wb = 0.114; int[][] gray = new int[width][height]; //灰度化 for (int x = 0; x < width; x++) { for (int y = 0; y < height; y++) { Color color = new Color(img.getRGB(x, y)); int rgb = (int) ((color.getRed() * Wr + color.getGreen() * Wg + color.getBlue() * Wb) / 3); gray[x][y] = rgb; } } int threshold = getOstu(gray, width, height); for (int x = 0; x < width; ++x) { for (int y = 0; y < height; ++y) { if (gray[x][y] > threshold) { img.setRGB(x, y, Color.white.getRGB()); } else { img.setRGB(x, y, Color.black.getRGB()); } } } //去噪 for (int x = 0; x < width; ++x) { for (int y = 0; y < height; ++y) { if (isBlack(img.getRGB(x, y))) { if (isAlone(img, x, y,width,height)) { img.setRGB(x, y, Color.WHITE.getRGB()); } } } } return img; } public static BufferedImage removeBackgroud(BufferedImage img) throws Exception { int width = img.getWidth(); int height = img.getHeight(); double Wr = 0.299; double Wg = 0.587; double Wb = 0.114; int[][] gray = new int[width][height]; //灰度化 for (int x = 0; x < width; x++) { for (int y = 0; y < height; y++) { Color color = new Color(img.getRGB(x, y)); int rgb = (int) ((color.getRed() * Wr + color.getGreen() * Wg + color.getBlue() * Wb) / 3); gray[x][y] = rgb; } } int ostu = getOstu(gray, width, height); for (int x = 0; x < width; ++x) { for (int y = 0; y < height; ++y) { if (gray[x][y] > ostu) { img.setRGB(x, y, new Color(0xFFFFFF).getRGB()); } else { img.setRGB(x, y, new Color(0x000000).getRGB()); } } } //去噪 for (int x = 0; x < width; ++x) { for (int y = 0; y < height; ++y) { if (isBlack(img.getRGB(x, y))) { if (isAlone(img, x, y,width,height)) { img.setRGB(x, y, Color.WHITE.getRGB()); } } } } return img; } /** * 是否单个噪点 * @param img * @param x * @param y * @param width * @param height * @return */ private static boolean isAlone(BufferedImage img, int x, int y,int width,int height) { if (x == 0 || width - x < 3 || y == 0 || height - y < 3) { return true; } try { // int a1 = img.getRGB(x - 1, y + 1); int a2 = img.getRGB(x - 1, y); // int a3 = img.getRGB(x - 1, y - 1); int a4 = img.getRGB(x, y + 1); int a5 = img.getRGB(x, y - 1); // int a6 = img.getRGB(x + 1, y + 1); int a7 = img.getRGB(x + 1, y); // int a8 = img.getRGB(x + 1, y - 1); // boolean b1 = isBlack(a1); boolean b2 = isBlack(a2); // boolean b3 = isBlack(a3); boolean b4 = isBlack(a4); boolean b5 = isBlack(a5); // boolean b6 = isBlack(a6); boolean b7 = isBlack(a7); // boolean b8 = isBlack(a8); ArrayList<Boolean> booleans = new ArrayList<Boolean>(); // booleans.add(isBlack(a1)); booleans.add(isBlack(a2)); // booleans.add(isBlack(a3)); booleans.add(isBlack(a4)); booleans.add(isBlack(a5)); // booleans.add(isBlack(a6)); booleans.add(isBlack(a7)); // booleans.add(isBlack(a8)); long count = booleans.stream().filter((a) -> a).count(); if (count < 1) { return true; } } catch (Exception e) { return false; } return false; } public static BufferedImage removeBlank(BufferedImage img) throws Exception { int width = img.getWidth(); int height = img.getHeight(); int start = 0; int end = 0; Label1: for (int y = 0; y < height; ++y) { for (int x = 0; x < width; ++x) { if (isBlack(img.getRGB(x, y))) { start = y; break Label1; } } } Label2: for (int y = height - 1; y >= 0; --y) { for (int x = 0; x < width; ++x) { if (isBlack(img.getRGB(x, y))) { end = y; break Label2; } } } return img.getSubimage(0, start, width, end - start + 1); } public static List<BufferedImage> splitImage(BufferedImage img) throws Exception { List<BufferedImage> subImgs = new ArrayList<>(); int width = img.getWidth(); int height = img.getHeight(); List<Integer> weightlist = new ArrayList<>(); for (int x = 0; x < width; ++x) { int count = 0; for (int y = 0; y < height; ++y) { if (isBlack(img.getRGB(x, y))) { count++; } } weightlist.add(count); } for (int i = 0; i < weightlist.size(); i++) { int length = 0; while (i < weightlist.size() && weightlist.get(i) > 0) { i++; length++; } if (length > 2) { subImgs.add(removeBlank(img.getSubimage(i - length, 0, length, height))); } } return subImgs; } public static Map<BufferedImage, String> loadTrainData() throws Exception { if (trainMap == null) { Map<BufferedImage, String> map = new HashMap<>(); File dir = new File(dirPath + "train"); File[] files = dir.listFiles(); for (File file : files) { map.put(ImageIO.read(file), file.getName().charAt(0) + ""); } trainMap = map; } return trainMap; } public static String getSingleCharOcr(BufferedImage img, Map<BufferedImage, String> map) { String result = "#"; int width = img.getWidth(); int height = img.getHeight(); int min = width * height; for (BufferedImage bi : map.keySet()) { int count = 0; if (Math.abs(bi.getWidth() - width) > 2) continue; int widthmin = width < bi.getWidth() ? width : bi.getWidth(); int heightmin = height < bi.getHeight() ? height : bi.getHeight(); Label1: for (int x = 0; x < widthmin; ++x) { for (int y = 0; y < heightmin; ++y) { if (isBlack(img.getRGB(x, y)) != isBlack(bi.getRGB(x, y))) { count++; if (count >= min) { break Label1; } } } } if (count < min) { min = count; result = map.get(bi); } } return result; } public static String getTextByImageFileUrl(String file) throws Exception { BufferedImage img = removeBackgroud(file); List<BufferedImage> listImg = splitImage(img); Map<BufferedImage, String> map = loadTrainData(); StringBuilder result = new StringBuilder(); for (BufferedImage bi : listImg) { result.append(getSingleCharOcr(bi, map)); } ImageIO.write(img, "JPG", new File(dirPath + "result/" + result + ".jpg")); return result.toString(); } public static String getTextByImageUrl(String url) throws Exception { BufferedImage read = ImageIO.read(new URL(url)); BufferedImage img = removeBackgroud(read); List<BufferedImage> listImg = splitImage(img); Map<BufferedImage, String> map = loadTrainData(); StringBuilder result = new StringBuilder(); for (BufferedImage bi : listImg) { result.append(getSingleCharOcr(bi, map)); } ImageIO.write(img, "JPG", new File(dirPath + "result/" + result + ".jpg")); return result.toString(); } public static void downloadImage() { CloseableHttpClient httpClient = HttpClientBuilder.create().build(); HttpGet httpGet = new HttpGet("http://iir.circ.gov.cn/web/servlet/ValidateCode"); httpGet.addHeader("Host", "game.tom.com"); httpGet.addHeader("User-Agent", "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/62.0.3202.94 Safari/537.36"); for (int i = 0; i < 30; i++) { try { CloseableHttpResponse execute = httpClient.execute(httpGet); StatusLine statusLine = execute.getStatusLine(); int statusCode = statusLine.getStatusCode(); if (statusCode != HttpStatus.SC_OK) { System.err.println("Method failed: " + statusLine); } InputStream inputStream = execute.getEntity().getContent(); FileOutputStream outputStream = new FileOutputStream(new File(dirPath + "temp/" + i + ".jpg")); byte[] buff = new byte[1024]; int len = 0; while((len = inputStream.read(buff, 0, 1024)) != -1){ outputStream.write(buff, 0, len); } inputStream.close(); outputStream.close(); // 读取内容 System.out.println(i + "OK!"); } catch (Exception e) { e.printStackTrace(); } finally { // 释放连接 httpGet.releaseConnection(); } } } /** * 训练数据 * @throws Exception */ public static void trainData() throws Exception { File dir = new File(dirPath + "temp"); File[] files = dir.listFiles(); for (File file : files) { //图片预处理 二值化、去噪 BufferedImage img = removeBackgroud(dirPath + "temp/" + file.getName()); //图片分割 List<BufferedImage> listImg = splitImage(img); if (listImg.size() == 4) { for (int j = 0; j < listImg.size(); ++j) { ImageIO.write(listImg.get(j), "JPG", new File(dirPath + "train/" + file.getName().charAt(j) + "-" + (index++) + ".jpg")); } } } } /** * @param args * @throws Exception */ public static void main(String[] args) throws Exception { // downloadImage();//下载图片-保监会 // trainData();//训练图片 // String text = getTextByImageUrl("http://iir.circ.gov.cn/web/servlet/ValidateCode?time=123");//保监会 // String text = getTextByImageUrl("");//验证码地址 String text = getTextByImageFileUrl(dirPath + "temp/XbF9.jpg"); System.out.println(text); } }