zoukankan      html  css  js  c++  java
  • 使用阿里云的图片识别成表格ocr(将图片表格转换成excel)

    为了简便财务总是要对照着别人发来的表格图片制作成自己的表格

    效果图如下

    效果图

    整合的代码
    package com.xai.wuye.controller.api;
    
    import com.alibaba.fastjson.JSON;
    import com.alibaba.fastjson.JSONArray;
    import com.alibaba.fastjson.JSONException;
    import com.alibaba.fastjson.JSONObject;
    import com.xai.wuye.common.JsonResult;
    import com.xai.wuye.exception.ResultException;
    import com.xai.wuye.model.AParam
    import com.xai.wuye.service.CarService;
    import com.xai.wuye.util.HttpUtils;
    import org.apache.http.HttpResponse;
    import org.apache.http.util.EntityUtils;
    import org.springframework.beans.factory.annotation.Autowired;
    import org.springframework.core.io.FileSystemResource;
    import org.springframework.http.HttpHeaders;
    import org.springframework.http.MediaType;
    import org.springframework.http.ResponseEntity;
    import org.springframework.scheduling.annotation.EnableAsync;
    import org.springframework.stereotype.Controller;
    import org.springframework.web.bind.annotation.RequestMapping;
    import org.springframework.web.bind.annotation.RequestParam;
    import org.springframework.web.bind.annotation.ResponseBody;
    import org.springframework.web.multipart.MultipartFile;
    
    import java.io.*;
    import java.util.Date;
    import java.util.HashMap;
    import java.util.Map;
    
    import static org.apache.tomcat.util.codec.binary.Base64.encodeBase64;
    
    @Controller
    @EnableAsync
    @RequestMapping("/api/ocr")
    public class AliOCRImages {
    
    
        @Autowired
        CarService carService;
    
    
        private String OcrPath = "/home/runApp/car/orc/";
    
    
        @ResponseBody
        @RequestMapping("table")
        public JsonResult getFirstLicence(@RequestParam(value = "file", required = false) MultipartFile file) {
            if (file == null || file.isEmpty()||file.getSize() > 1204*1204*3)
                throw new ResultException(0,"文件为null,且不能大于3M");
    
            String filename = file.getOriginalFilename();
            String filepath = OcrPath+"temp/"+filename;
            File newFile = new File(filepath);
            try {
                file.transferTo(newFile);
    
                String host = "https://form.market.alicloudapi.com";
                String path = "/api/predict/ocr_table_parse";
                
                // 输入阿里的code
                String appcode = "4926a667ee6c41329c278361*****";
                String imgFile = "图片路径";
                Boolean is_old_format = false;//如果文档的输入中含有inputs字段,设置为True, 否则设置为False
                //请根据线上文档修改configure字段
                JSONObject configObj = new JSONObject();
                configObj.put("format", "xlsx");
                configObj.put("finance", false);
                configObj.put("dir_assure", false);
                String config_str = configObj.toString();
                //            configObj.put("min_size", 5);
                //String config_str = "";
    
                String method = "POST";
                Map<String, String> headers = new HashMap<String, String>();
                //最后在header中的格式(中间是英文空格)为Authorization:APPCODE 83359fd73fe94948385f570e3c139105
                headers.put("Authorization", "APPCODE " + appcode);
    
                Map<String, String> querys = new HashMap<String, String>();
    
                // 对图像进行base64编码
                String imgBase64 = "";
                try {
    
                    byte[] content = new byte[(int) newFile.length()];
                    FileInputStream finputstream = new FileInputStream(newFile);
                    finputstream.read(content);
                    finputstream.close();
                    imgBase64 = new String(encodeBase64(content));
                } catch (IOException e) {
                    e.printStackTrace();
                    return null;
                }
                // 拼装请求body的json字符串
                JSONObject requestObj = new JSONObject();
                try {
                    if(is_old_format) {
                        JSONObject obj = new JSONObject();
                        obj.put("image", getParam(50, imgBase64));
                        if(config_str.length() > 0) {
                            obj.put("configure", getParam(50, config_str));
                        }
                        JSONArray inputArray = new JSONArray();
                        inputArray.add(obj);
                        requestObj.put("inputs", inputArray);
                    }else{
                        requestObj.put("image", imgBase64);
                        if(config_str.length() > 0) {
                            requestObj.put("configure", config_str);
                        }
                    }
                } catch (JSONException e) {
                    e.printStackTrace();
                }
                String bodys = requestObj.toString();
    
                try {
                    /**
                     * 重要提示如下:
                     * HttpUtils请从
                     * https://github.com/aliyun/api-gateway-demo-sign-java/blob/master/src/main/java/com/aliyun/api/gateway/demo/util/HttpUtils.java
                     * 下载
                     *
                     * 相应的依赖请参照
                     * https://github.com/aliyun/api-gateway-demo-sign-java/blob/master/pom.xml
                     */
                    HttpResponse response = HttpUtils.doPost(host, path, method, headers, querys, bodys);
                    int stat = response.getStatusLine().getStatusCode();
                    if(stat != 200){
                        System.out.println("Http code: " + stat);
                        System.out.println("http header error msg: "+ response.getFirstHeader("X-Ca-Error-Message"));
                        System.out.println("Http body error msg:" + EntityUtils.toString(response.getEntity()));
                        return null;
                    }
    
                    String res = EntityUtils.toString(response.getEntity());
                    JSONObject res_obj = JSON.parseObject(res);
                    Long fileName = System.currentTimeMillis();
                    if(is_old_format) {
    
    
    
                        JSONArray outputArray = res_obj.getJSONArray("outputs");
                        String output = outputArray.getJSONObject(0).getJSONObject("outputValue").getString("dataValue");
                        JSONObject out = JSON.parseObject(output);
                        System.out.println(out.toJSONString());
    
    
                    }else{
    
                        String tmp_base64path = OcrPath + fileName;
                        File tmp_base64file = new File(tmp_base64path);
                        if(!tmp_base64file.exists()){
                            tmp_base64file.getParentFile().mkdirs();
                        }
                        tmp_base64file.createNewFile();
    
                        // write
                        FileWriter fw = new FileWriter(tmp_base64file, true);
                        BufferedWriter bw = new BufferedWriter(fw);
                        bw.write(res_obj.getString("tables"));
                        bw.flush();
                        bw.close();
                        fw.close();
    
                        String exelFilePath = OcrPath + fileName + "_1.xlsx";
                        Runtime.getRuntime().exec("touch "+exelFilePath).destroy();
                        Process exec = Runtime.getRuntime().exec("sed -i -e 's/\\n/\n/g' " + tmp_base64path);
                        exec.waitFor();
                        exec.destroy();
    
                        Process exec1 = null;
                        String[] cmd = { "/bin/sh", "-c", "base64 -d " + tmp_base64path + " > " + exelFilePath };
                        exec1 = Runtime.getRuntime().exec(cmd);
                        exec1.waitFor();
                        exec1.destroy();
    
    
                        return JsonResult.success(fileName);
                    }
                } catch (Exception e) {
                    e.printStackTrace();
                }
    
    
    
            } catch (IOException e) {
                e.printStackTrace();
            }
    
            return null;
        }
    
    
        @ResponseBody
        @RequestMapping("getId")
        public ResponseEntity<FileSystemResource> getFirstLicence(String id) {
            String exelFilePath = OcrPath + id + "_1.xlsx";
            return export(new File(exelFilePath));
        }
    
    
        public ResponseEntity<FileSystemResource> export(File file) {
            if (file == null) {
                return null;
            }
            HttpHeaders headers = new HttpHeaders();
            headers.add("Cache-Control", "no-cache, no-store, must-revalidate");
            headers.add("Content-Disposition", "attachment; filename=" + System.currentTimeMillis() + ".xls");
            headers.add("Pragma", "no-cache");
            headers.add("Expires", "0");
            headers.add("Last-Modified", new Date().toString());
            headers.add("ETag", String.valueOf(System.currentTimeMillis()));
    
            return ResponseEntity
                    .ok()
                    .headers(headers)
                    .contentLength(file.length())
                    .contentType(MediaType.parseMediaType("application/octet-stream"))
                    .body(new FileSystemResource(file));
        }
    
        public static JSONObject getParam(int type, String dataValue) {
            JSONObject obj = new JSONObject();
            try {
                obj.put("dataType", type);
                obj.put("dataValue", dataValue);
            } catch (JSONException e) {
                e.printStackTrace();
            }
            return obj;
        }
    
    }
    
    

    大功告成

    • 以下是静态页面代码
    <!DOCTYPE html>
    <html>
    <head>
      <meta charset="UTF-8">
      <!-- import CSS -->
      <link rel="stylesheet" href="https://unpkg.com/element-ui/lib/theme-chalk/index.css">
        <title>table</title>
    </head>
    <body>
      <div id="app">
          <el-upload
                  class="upload-demo"
                  drag
                  action="https://www.***.com/car/api/ocr/table"
                  :file-list="imagelist"
                  :on-preview="pre"
                    >
              <i class="el-icon-upload"></i>
              <div class="el-upload__text">将文件拖到此处,或<em>点击上传</em></div>
              <div class="el-upload__tip" slot="tip">只能上传jpg/png文件,且不超过500kb</div>
          </el-upload>
          <div class="img-content" v-for="(item,key) in imagelist" :key="key">
              <img :src="item.url">
              <div class="name">
                  <div>{{ item.name }}</div>
                  <el-button type="text" @click="handleFileName(item,key)">修改名字</el-button>
              </div>
              <!-- 删除icon -->
              <div class="del">
                  <i @click="handleFileRemove(item,key)" class="el-icon-delete2"></i>
              </div>
              <!-- 放大icon -->
              <div class="layer" @click="handleFileEnlarge(item.url)">
                  <i class="el-icon-view"></i>
              </div>
          </div>
      </div>
    </body>
      <!-- import Vue before Element -->
      <script src="https://unpkg.com/vue/dist/vue.js"></script>
      <!-- import JavaScript -->
      <script src="https://unpkg.com/element-ui/lib/index.js"></script>
      <script>
        new Vue({
          el: '#app',
          data: function() {
            return {
                visible: false,
                imagelist: [
    
                ]
    
            }
          },
            methods: {
                pre(res) {
                    console.log(res.response.msg)
                    window.open("https://www.***.com/api/ocr/getId?id="+res.response.data);
                }
            }
    
        })
      </script>
    </html>
    
  • 相关阅读:
    MySQL安装失败,提示需安装MicroSoft Visual C++ 2013 Redistributable
    Selinium登录系统cookies的重复使用
    脚本绕开验证码,自动执行的方法
    Firebug显示停用状态
    web自动化测试中绕开验证码登陆的方式
    java使用poi包将数据写入Excel表格
    读取config配置
    定位元素的等待方法
    jxl读取Excel表格数据
    php中的魔术常量
  • 原文地址:https://www.cnblogs.com/rolandlee/p/10671544.html
Copyright © 2011-2022 走看看