zoukankan      html  css  js  c++  java
  • python实现的ocr接口

    太累了,,有时间再补解释

    import pytesseract
    import requests
    from PIL import Image
    from PIL import ImageFilter
    from StringIO import StringIO
    from werkzeug.utils import secure_filename
    from gevent import monkey
    from gevent.pywsgi import WSGIServer
    monkey.patch_all()
    from flask import Flask,render_template,jsonify,request,send_from_directory
    import time
    import os
    import base64
    import random
    
    
    app = Flask(__name__)
    UPLOAD_FOLDER='upload'
    app.config['UPLOAD_FOLDER'] = UPLOAD_FOLDER
    basedir = os.path.abspath(os.path.dirname(__file__))
    ALLOWED_EXTENSIONS = set(['png','jpg','JPG','PNG'])
    
    def allowed_file(filename):
        return '.' in filename and filename.rsplit('.',1)[1] in ALLOWED_EXTENSIONS
    
    @app.route('/',methods=['GET'],strict_slashes=False)
    def indexpage():
        return render_template('index.html')
    
    @app.route('/',methods=['POST'],strict_slashes=False)
    def api_upload():
        log = open("error.log","w+")
        file_dir = os.path.join(basedir, app.config['UPLOAD_FOLDER'])
        if not os.path.exists(file_dir):
            os.makedirs(file_dir)
        print request.headers
        print >> log, request.headers
        f = request.files['file']
        postLang  = request.form.get("lang", type=str) 
    
        log.close()
    
        if f and allowed_file(f.filename):
            fname = secure_filename(f.filename)
            ext = fname.rsplit('.',1)[1]
            unix_time = int(time.time())
            new_filename = str( random.randrange(0, 10001, 2))+str(unix_time)+'.'+ext
            f.save(os.path.join(file_dir,new_filename))
            if cmp(postLang, "chi_sim"):
                strboxs = pytesseract.image_to_boxes(Image.open("/var/OCRhtml/upload/" + new_filename), lang="chi_sim")
                strdata = pytesseract.image_to_string(Image.open("/var/OCRhtml/upload/" + new_filename), lang="chi_sim")
                print "Chinese"
            else:
                strboxs = pytesseract.image_to_boxes(Image.open("/var/OCRhtml/upload/"+new_filename))
                strdata = pytesseract.image_to_string(Image.open("/var/OCRhtml/upload/"+new_filename))
            return jsonify({"errno":0, "msg":"succeed ","data":strdata,"info":strboxs})
        else:
            return jsonify({"errno":1001, "errmsg":u"failed"})
    
    if __name__ == '__main__':
        http_server = WSGIServer(('', 80), app)
        http_server.serve_forever()
  • 相关阅读:
    设置开发环境
    安装开发软件
    学习路线
    预备知识
    Spring是什么
    yum安装nginx
    .net 哈希
    Excel文件处理Demo
    汉字处理组件
    Log4Net
  • 原文地址:https://www.cnblogs.com/HadesBlog/p/11564346.html
Copyright © 2011-2022 走看看