zoukankan      html  css  js  c++  java
  • java从图片中识别文字

    
    package com.dream.common;
    
    import java.awt.image.BufferedImage;
    import java.io.File;
    import java.io.IOException;
    import java.util.Iterator;
    import java.util.Locale;
    
    import javax.imageio.IIOImage;
    import javax.imageio.ImageIO;
    import javax.imageio.ImageReader;
    import javax.imageio.ImageWriteParam;
    import javax.imageio.ImageWriter;
    import javax.imageio.metadata.IIOMetadata;
    import javax.imageio.stream.FileImageInputStream;
    import javax.imageio.stream.ImageInputStream;
    import javax.imageio.stream.ImageOutputStream;
    
    import com.github.jaiimageio.plugins.tiff.TIFFImageWriteParam;
    
    /**
     * 识别图片中的文字
     * 
     * @author zlj
     *
     */
    public class ImageIOHelper {
    	/**
    	 * 创建临时图片文件
    	 * 
    	 * @param imageFile
    	 * @return
    	 * @throws IOException
    	 */
    	public File createImage(File imageFile) throws IOException {
    		Iterator<ImageReader> readers = ImageIO.getImageReaders(new FileImageInputStream(imageFile));
    		ImageReader reader = readers.next();
    		ImageInputStream iis = ImageIO.createImageInputStream(imageFile);
    		reader.setInput(iis);
    		
    		IIOMetadata streamMetadata = reader.getStreamMetadata();
    		TIFFImageWriteParam tiffWriteParam = new TIFFImageWriteParam(Locale.CHINESE);
    		tiffWriteParam.setCompressionMode(ImageWriteParam.MODE_DISABLED);
    		
    		Iterator<ImageWriter> writers = ImageIO.getImageWritersByFormatName("tiff");
    		ImageWriter writer = writers.next();
    		BufferedImage bi = reader.read(0);
    		
    		IIOImage image = new IIOImage(bi, null, reader.getImageMetadata(0));
    		File tempFile = tempImageFile(imageFile);
    		ImageOutputStream ios = ImageIO.createImageOutputStream(tempFile);
    		writer.setOutput(ios);
    		writer.write(streamMetadata, image, tiffWriteParam);
    		
    		ios.close();
    		iis.close();
    		writer.dispose();
    		reader.dispose();
    		return tempFile;
    	}
    
    	/**
    	 * 添加后缀 tempfile
    	 * 
    	 * @param imageFile
    	 * @return
    	 * @throws IOException
    	 */
    	private File tempImageFile(File imageFile) throws IOException {
    		String path = imageFile.getPath();
    		StringBuffer strB = new StringBuffer(path);
    		strB.insert(path.lastIndexOf('.'), "_text_recognize_temp");
    		String s = strB.toString().replaceFirst("(?<=//.)(//w+)$", "tif");
    		Runtime.getRuntime().exec("attrib " + """ + s + """ + " +H"); // 设置文件隐藏
    		return new File(strB.toString());
    	}
    }package com.dream.common;
    
    import java.io.BufferedReader;
    import java.io.File;
    import java.io.FileInputStream;
    import java.io.IOException;
    import java.io.InputStreamReader;
    import java.util.ArrayList;
    import java.util.List;
    
    import org.jdesktop.swingx.util.OS;
    
    /**
     * 从图片中识别文字
     * @author zlj
     *
     */
    public class OCRUtil {
    	private final String LANG_OPTION = "-l"; // 英文字母小写l,并非数字1
    	private final String EOL = System.getProperty("line.separator");
    	private String tessPath = "C://Program Files (x86)//Tesseract-OCR";// ocr默认安装路径
    	private String transname = "chi_sim";// 默认中文语言包,识别中文
    
    	/**
    	 * 从图片中识别文字
    	 * @param imageFile
    	 * @param imageFormat
    	 * @return text recognized in image
    	 * @throws Exception
    	 */
    	public String recognizeText(File imageFile) throws Exception {
    		File tempImage = new ImageIOHelper().createImage(imageFile);
    		return ocrImages(tempImage, imageFile);
    	}
    	/**
    	 * 识别图片中的文字
    	 * @param tempImage
    	 * @param imageFile
    	 * @return
    	 * @throws IOException
    	 * @throws InterruptedException
    	 */
    	private String ocrImages(File tempImage, File imageFile) throws IOException, InterruptedException {
    		File outputFile = new File(imageFile.getParentFile(), "output");
    		Runtime.getRuntime().exec("attrib " + """ + outputFile.getAbsolutePath() + """ + " +H"); // 设置文件隐藏
    		StringBuffer strB = new StringBuffer();
    		List<String> cmd = new ArrayList<String>();
    		if (OS.isWindowsXP()) {
    			cmd.add(tessPath + "//tesseract");
    		} else if (OS.isLinux()) {
    			cmd.add("tesseract");
    		} else {
    			cmd.add(tessPath + "//tesseract");
    		}
    		cmd.add("");
    		cmd.add(outputFile.getName());
    		cmd.add(LANG_OPTION);
    		cmd.add(transname);
    		
    		ProcessBuilder pb = new ProcessBuilder();
    		pb.directory(imageFile.getParentFile());
    		cmd.set(1, tempImage.getName());
    		pb.command(cmd);
    		pb.redirectErrorStream(true);
    		Process process = pb.start();
    		int w = process.waitFor();
    		
    		tempImage.delete();// 删除临时正在工作文件
    		if (w == 0) {
    			BufferedReader in = new BufferedReader(
    					new InputStreamReader(new FileInputStream(outputFile.getAbsolutePath() + ".txt"), "UTF-8"));
    			String str;
    			while ((str = in.readLine()) != null) {
    				strB.append(str).append(EOL);
    			}
    			in.close();
    		} else {
    			String msg;
    			switch (w) {
    			case 1:
    				msg = "Errors accessing files.There may be spaces in your image's filename.";
    				break;
    			case 29:
    				msg = "Cannot recongnize the image or its selected region.";
    				break;
    			case 31:
    				msg = "Unsupported image format.";
    				break;
    			default:
    				msg = "Errors occurred.";
    			}
    			tempImage.delete();
    			throw new RuntimeException(msg);
    		}
    		new File(outputFile.getAbsolutePath() + ".txt").delete();
    		return strB.toString();
    	}
    	
    	public static void main(String[] args) throws Exception {
    		System.out.println("begin");
    		String path = "F://test1.png";
    		String valCode = new OCRUtil().recognizeText(new File(path));
    		System.out.println(valCode);
    		System.out.println("end");
    	}
    }
    
    
  • 相关阅读:
    大前端工具集
    Python黑魔法,一行实现并行化
    MRPT
    ./configure 交叉编译库时所最常用到的配置
    Ubuntu16.04 ARM 编译 编译器版本和unordered_map map问题
    ubuntu 16.04 ARM glog移植
    Ubuntu16.04 ARM平台移植libcurl curl-7.63.0
    ubuntu16.04 ARM平台移植xmlrpc-c1.39.12
    ubunt 14.04 Could not find CMAKE_ROOT !!! CMake has most likely not been installed correctly. Modul
    ubuntu PCL的使用
  • 原文地址:https://www.cnblogs.com/lalalagq/p/10219412.html
Copyright © 2011-2022 走看看