zoukankan      html  css  js  c++  java
  • 利用OCR识别扫描的jpg、tif文件的文字

    第一步:下载老马哥的从 office和sharepoint 提取出来的注册表和dll  http://115.com/file/dpa4qrt2  

    或者直接安装office和sharepoint2007

    第二步:下载我的demo   http://files.cnblogs.com/0banana0/OCR.zip

    ***识别度不是百分之百的  当然需要校准啦   在 编辑器里边修改错误的东西 !

     第三步:发布

    本地环境无措 发布到iis包错 “Object hasn't been initialized and can't be used yet”

    解决办法:Go to IIS->ApplicatioPools->Default Applicationpool->Identity->custom account->Give the user name and password.

    ****发布的时候 iis还报一个错:Compiler Error Message: CS0016拒绝访问

    解决办法:给c:windows emp 加上 network service (只加这个我的不行)和 iis_iusrs(后来加上和这个才行)权限

    using System;
    using System.Collections.Generic;
    using System.Linq;
    using System.Web;
    using System.Collections;
    using System.IO;
    using System.Text;
    using ContractManage.DAL;
    using System.Threading;
    using System.Runtime.CompilerServices;
    using System.Runtime.InteropServices;
    
    namespace ContractManage.uploadify
    {
        /// <summary>
        /// Summary description for OCR
        /// </summary>
        public class OCR : IHttpHandler
        {
            private MODI.Document _document;
            private MODI.Images _images;
            private MODI.Image _image;
            private MODI.Layout _layout;
            private ManualResetEvent _completedOCR = new ManualResetEvent(false);
            public void ProcessRequest(HttpContext context)
            {
                context.Response.ContentType = "text/plain";
                if (!string.IsNullOrEmpty(context.Request.QueryString["cid"]))
                {
                    string cid = context.Request.QueryString["cid"];
                    string sPath = context.Server.MapPath("uploadify");
                    string path = sPath.Substring(0, sPath.Length - 10) + "\OCRFile\" + cid;
                    CheckFileType(path, cid);
                }
                context.Response.Write(" ");
            }
    
            public void CheckFileType(string directoryPath, string cid)
            {
                IEnumerator files = Directory.GetFiles(directoryPath).GetEnumerator();
                while (files.MoveNext())
                {
                    //get file extension 
                    string fileExtension = Path.GetExtension(Convert.ToString(files.Current));
    
                    //get file path without extenstion 
                    string filePath = Convert.ToString(files.Current).Replace(fileExtension, string.Empty);
    
                    //get fileName
                    string fileName = Path.GetFileName(Convert.ToString(files.Current));
                    //Check for JPG File Format 
                    if (fileExtension == ".jpg" || fileExtension == ".JPG" || fileExtension == ".tif" || fileExtension == ".tiff") // or // ImageFormat.Jpeg.ToString()
                    {
                        try
                        {
                            //OCR Operations ... 
                            MODI.Document _document = new MODI.Document();
                            _document.OnOCRProgress += new MODI._IDocumentEvents_OnOCRProgressEventHandler(_document_OnOCRProgress);
                            _document.Create(Convert.ToString(files.Current));
                            _document.OCR(MODI.MiLANGUAGES.miLANG_CHINESE_SIMPLIFIED, true, true);
    
                            _completedOCR.WaitOne(5000);
                            //_document.Save();
    
                            _images = _document.Images;
                            _image = (MODI.Image)_images[0];
                            //FileStream createFile = new FileStream(filePath + ".txt", FileMode.CreateNew);
                            //StreamWriter writeFile = new StreamWriter(createFile);
                            _layout = _image.Layout;
                            string strContent = _layout.Text;
                            _document.Close(false);
                            //string strPath = "uploadify/OCRFile/" + cid + "/" + fileName;
                            //string strContent = "没有内容";
                            string strPath = "uploadify/OCRFile/" + cid + "/" + fileName;
                            SaveDate(strPath, strContent, cid);
                            //writeFile.Write(strContent);
                            //writeFile.Close();
    
    
                        }
                        catch (Exception ex)
                        {
                            throw ex;
                            //MessageBox.Show("This Image hasn't a text or has a problem", 
                            //"OCR Notifications", 
                            //MessageBoxButtons.OK, MessageBoxIcon.Information); 
                        }
                        finally
                        {
                        }
    
                    }
                }
            }
            void _document_OnOCRProgress(int Progress, ref bool Cancel)
            {
                if (Progress == 100)
                {
                    _completedOCR.Set();
                }
            }
            private static void SetComObjectToNull(params object[] objects)
            {
                for (int i = 0; i < objects.Length; i++)
                {
                    object o = objects[i];
                    if (o != null)
                    {
                        Marshal.FinalReleaseComObject(o);
                        o = null;
                    }
                }
            }
    
    
            [MethodImpl(MethodImplOptions.NoInlining)]
            public void Dispose()
            {
                SetComObjectToNull(_layout, _image, _images, _document);
                GC.Collect();
                GC.WaitForPendingFinalizers();
            }
    
    
    
            public void SaveDate(string strPath, string strContent, string cid)
            {
                try
                {
                    Pt_ContractImg img = new Pt_ContractImg();
                    img.Content = strContent;
                    img.Path = strPath;
                    img.ContractID = Convert.ToInt32(cid);
                    Pt_ContractImg_DAO.Insert(img);
                }
                catch (Exception ex)
                {
    
                }
                finally
                {
    
                }
            }
            public bool IsReusable
            {
                get
                {
                    return false;
                }
            }
    
        }
    }
  • 相关阅读:
    搭建NLP相关的python环境
    win10 系统更新后系统第三方软件无法切换微软拼音输入中文
    NLP文本清理时常用的python小函数
    python 复制
    #论文阅读# Universial language model fine-tuning for text classification
    conda 里的 jupyter
    #论文阅读#attention is all you need
    关于多类别分类任务
    大白话AOP
    搭建JavaEE项目是遇到的几个问题
  • 原文地址:https://www.cnblogs.com/0banana0/p/3184886.html
Copyright © 2011-2022 走看看