zoukankan      html  css  js  c++  java
  • csharp:Optical Character Recognition

    using System;
    using System.Collections.Generic;
    using System.Linq;
    using System.Text;
    using System.Data;
    using System.Drawing;
    using System.IO;
    using System.Drawing.Imaging;
    using MODI;//Microsoft Office Document Imaging
    // 首先用office安装盘这个组件,默认安装office的时候是不会安装的,只要添加这个组件功能就好了安装说明:http://support.microsoft.com/kb/982760
    //组件Microsoft Office Document Imaging 12.0 Type Library(office2007)
    //或者Microsoft Office Document Imaging 11.0 Type Library(office2003)
    //中文简体OCR引擎 http://www.microsoft.com/downloads/thankyou.aspx?familyId=dd172063-9517-41d8-82af-29c38f7437b6&displayLang=zh-hk 
    
    namespace ToText
    {   
        /// <summary>
        /// Optical Character Recognition光学字符识别
        /// 20140507 Geovin Du
        /// 涂聚文
        /// </summary>
        public static class OCRGetstring
        {
            /// <summary>
            /// 语言类型
            /// </summary>
            /// <returns></returns>
            public static DataTable getLanguageList()
            {
                DataTable dt = new DataTable();
                dt.Columns.Add("ID",typeof(int));
                dt.Columns.Add("LanguageName", typeof(string));
                dt.Columns.Add("LanguageLCID", typeof(string));
                //dt.Rows.Add(1, "", 1);
                dt.Rows.Add(1,"简体中文", "2052");
                dt.Rows.Add(2,"繁体中文", "1028");
                dt.Rows.Add(3,"英语", "9");
                dt.Rows.Add(4,"捷克语", "5");
                dt.Rows.Add(5,"丹麦语", "6");
                dt.Rows.Add(6,"德语", "7");
                dt.Rows.Add(7,"希腊语", "8");
                dt.Rows.Add(8,"西班牙语", "10");
                dt.Rows.Add(9,"芬兰语", "11");
                dt.Rows.Add(10,"法语", "12");
                dt.Rows.Add(11, "匈牙利语", "14");
                dt.Rows.Add(12, "意大利语", "16");
                dt.Rows.Add(13, "日语", "17");
                dt.Rows.Add(14, "韩语", "18");
                dt.Rows.Add(15, "荷兰语", "19");
                dt.Rows.Add(16, "挪威语", "20");
                dt.Rows.Add(17, "波兰语", "21");
                dt.Rows.Add(18, "葡萄牙语", "22");
                dt.Rows.Add(19, "俄语", "25");
                dt.Rows.Add(20,"瑞典语", "29");
                dt.Rows.Add(21,"土耳其语", "31");
                return dt;
            }
    
            /// <summary>
            /// 
            /// </summary>
            /// <param name="sValue"></param>
            /// <returns></returns>
            private static MODI.MiLANGUAGES GetLanuageType(string sValue)
            {
                switch (sValue)
                {
                    case "2052":
                        return MODI.MiLANGUAGES.miLANG_CHINESE_SIMPLIFIED;
                    case "5":
                        return MODI.MiLANGUAGES.miLANG_CZECH;
                    case "6":
                        return MODI.MiLANGUAGES.miLANG_DANISH;
                    case "7":
                        return MODI.MiLANGUAGES.miLANG_GERMAN;
                    case "8":
                        return MODI.MiLANGUAGES.miLANG_GREEK;
                    case "9":
                        return MODI.MiLANGUAGES.miLANG_ENGLISH;
                    case "10":
                        return MODI.MiLANGUAGES.miLANG_SPANISH;
                    case "11":
                        return MODI.MiLANGUAGES.miLANG_FINNISH;
                    case "12":
                        return MODI.MiLANGUAGES.miLANG_FRENCH;
                    case "14":
                        return MODI.MiLANGUAGES.miLANG_HUNGARIAN;
                    case "16":
                        return MODI.MiLANGUAGES.miLANG_ITALIAN;
                    case "17":
                        return MODI.MiLANGUAGES.miLANG_JAPANESE;
                    case "18":
                        return MODI.MiLANGUAGES.miLANG_KOREAN;
                    case "19":
                        return MODI.MiLANGUAGES.miLANG_DUTCH;
                    case "20":
                        return MODI.MiLANGUAGES.miLANG_NORWEGIAN;
                    case "21":
                        return MODI.MiLANGUAGES.miLANG_POLISH;
                    case "22":
                        return MODI.MiLANGUAGES.miLANG_PORTUGUESE;
                    case "25":
                        return MODI.MiLANGUAGES.miLANG_RUSSIAN;
                    case "29":
                        return MODI.MiLANGUAGES.miLANG_SWEDISH;
                    case "31":
                        return MODI.MiLANGUAGES.miLANG_TURKISH;
                    case "1028":
                        return MODI.MiLANGUAGES.miLANG_CHINESE_TRADITIONAL;
                    default:
                        return MODI.MiLANGUAGES.miLANG_ENGLISH;
                }
            }
    
    
            /// <summary>
            ///  Images轉換文字
            /// </summary>
            /// <param name="image">Image</param>
            /// <param name="language">语言类型</param>
            /// <returns></returns>
            public static string ExtractText(this System.Drawing.Image image,string language)
            {
                var tmpFile = Path.GetTempFileName();
                StringBuilder sb = new StringBuilder();
                //string text;
                try
                {
                    var bmp = new Bitmap(Math.Max(image.Width, 1024), Math.Max(image.Height, 768));
                    var gfxResize = Graphics.FromImage(bmp);
                    gfxResize.DrawImage(image, new Rectangle(0, 0, image.Width, image.Height));
                    bmp.Save(tmpFile + ".bmp", ImageFormat.Bmp);
                    var doc = new MODI.Document();
                    doc.Create(tmpFile + ".bmp");
                    // doc.OCR(MODI.MiLANGUAGES.miLANG_ENGLISH, true, true);
                    doc.OCR(GetLanuageType(language), true, true);  // 识别文字类型
                    var img = (MODI.Image)doc.Images[0];
                    var layout = img.Layout;
                    sb.Append(layout.Text);
                    //text = sb.ToString();// layout.Text;
                }
                finally
                {
                    File.Delete(tmpFile);
                    File.Delete(tmpFile + ".bmp");
                }
    
                return sb.ToString();// text;
            }
            /// <summary>
            /// 来源图片文件轉換文字
            /// </summary>
            /// <param name="fileToOCR">file文件</param>
            /// <param name="language">语言类型</param>
            /// <returns></returns>
            public static string getFileToOCR(string fileToOCR, string language)
            {
                StringBuilder sb = new StringBuilder();
                if (File.Exists(fileToOCR))
                {
    
                    MODI.Document md = new MODI.Document();
                    md.Create(fileToOCR);
                    md.OCR(GetLanuageType(language), true, true);
                    MODI.Image img;
                    MODI.Layout layout;
                    for (int i = 0; i < md.Images.Count; i++)
                    {
                        img = (MODI.Image)md.Images[i];
                        layout = img.Layout;
                        sb.Append(layout.Text);
                    }
    
                    md.Close(false);
                    
                }
                else
                {
                    sb.Append("");
                }
                 return sb.ToString();
            }
        }
    }
    
  • 相关阅读:
    IDEA最常用快捷键汇总+快速写出Main函数
    设计模式之代理模式
    Java多线程中join、yield、sleep方法详解
    git基础命令详解
    用友网络科技Java高级开发面试题(2019)
    Java内部类超详细总结(含代码示例)
    构造器中绝对绝对不能调用可被覆盖的方法
    写给小白看的Git的安装配置与使用
    Vue学习笔记5--前端工程化
    Vue学习笔记4--vue-router路由
  • 原文地址:https://www.cnblogs.com/geovindu/p/3715294.html
Copyright © 2011-2022 走看看