zoukankan      html  css  js  c++  java
  • SpringBoot整合openoffice实现word文档的读取和导入及报错处理

    先安装openoffice4

    Linux系统安装参考:https://www.cnblogs.com/pxblog/p/11622969.html

    Windows系统安装参考:https://www.cnblogs.com/pxblog/p/14346148.html

    引入jar包

    https://yvioo.lanzous.com/b00o97q6d
    密码:1cjp

    如果是pom文件的话

    <dependency>
                <groupId>local</groupId>
                <artifactId>jodconverter</artifactId>
                <version>2.2.2</version>
                <scope>system</scope>
                <systemPath>${project.basedir}/src/main/webapp/WEB-INF/lib/jodconverter-2.2.2.jar
                </systemPath>
            </dependency>
            <dependency>
                <groupId>local</groupId>
                <artifactId>jodconverter-cli</artifactId>
                <version>2.2.2</version>
                <scope>system</scope>
                <systemPath>${project.basedir}/src/main/webapp/WEB-INF/lib/jodconverter-cli-2.2.2.jar
                </systemPath>
            </dependency>
            <dependency>
                <groupId>local</groupId>
                <artifactId>jodconverter-core</artifactId>
                <version>3.0-beta-4</version>
                <scope>system</scope>
                <systemPath>${project.basedir}/src/main/webapp/WEB-INF/lib/jodconverter-core-3.0-beta-4.jar
                </systemPath>
            </dependency>

    然后把jar包放到项目/webapp/WEB-INF/lib/下,这位置可以根据自己的来,然后pom文件路径也做相应修改即可

    日志注解用到了

    <!--lombok插件-->
            <dependency>
                <groupId>org.projectlombok</groupId>
                <artifactId>lombok</artifactId>
                <optional>true</optional>
            </dependency> 

    application.yml

    spring:
      main:
        allow-bean-definition-overriding: true
      servlet:
        multipart:
          enabled: true #是否处理上传
          max-file-size: 50MB #允许最大的单个上传大小,单位可以是kb
          max-request-size: 50MB #允许最大请求大小
    
    #文件上传目录
    fileUploadPath: E://test//
    
    openoffice:
      officeHome: D:openoffice4  #openoffice的安装路径
      officePort: 8002   #openoffice启动端口

    UploadUtils.java

    import org.apache.commons.lang.RandomStringUtils;
    
    
    public class UploadUtils {
    
    
        /**
         * 36个小写字母和数字
         */
        public static final char[] N36_CHARS = { '0', '1', '2', '3', '4', '5', '6',
                '7', '8', '9', 'a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j',
                'k', 'l', 'm', 'n', 'o', 'p', 'q', 'r', 's', 't', 'u', 'v', 'w',
                'x', 'y', 'z' };
    
    
        public static String generateFilename(String path, String ext) {
            return path + RandomStringUtils.random(8, N36_CHARS) + "." + ext;
        }
    
    
    
    
    }

    启动类

    OpenOfficeConverter.java

    import com.test.UploadUtils;
    import lombok.extern.slf4j.Slf4j;
    import org.artofsolving.jodconverter.OfficeDocumentConverter;
    import org.artofsolving.jodconverter.office.DefaultOfficeManagerConfiguration;
    import org.artofsolving.jodconverter.office.OfficeManager;
    import org.springframework.beans.factory.annotation.Autowired;
    import org.springframework.beans.factory.annotation.Value;
    import org.springframework.stereotype.Component;
    
    import java.io.File;
    import java.io.FileNotFoundException;
    
    
    @Component
    @Slf4j
    public class OpenOfficeConverter {
    
    
        @Value("${openoffice.officeHome}")
        public String officeHome;
    
        @Value("${openoffice.officePort}")
        public Integer officePort;
    
    
        public void startService() {
            DefaultOfficeManagerConfiguration configuration = new DefaultOfficeManagerConfiguration();
            try {
                log.info("准备启动服务....");
                //设置OpenOffice.org安装目录
                configuration.setOfficeHome(getOfficeHome());
                //设置转换端口,默认为8100
                configuration.setPortNumber(getPort());
                //设置任务执行超时为5分钟
                configuration.setTaskExecutionTimeout(1000 * 60 * 5L);
                //设置任务队列超时为24小时
                configuration.setTaskQueueTimeout(1000 * 60 * 60 * 24L);
    
                officeManager = configuration.buildOfficeManager();
                //启动服务
                officeManager.start();
                log.info("office转换服务启动成功!");
            } catch (Exception ce) {
                log.error("office转换服务启动失败!详细信息:" + ce);
            }
        }
    
        public void stopService() {
            log.info("关闭office转换服务....");
            if (officeManager != null) {
                officeManager.stop();
            }
            log.info("关闭office转换成功!");
        }
    
    
        /**
         * 转换格式
         *
         * @param inputFile 需要转换的原文件路径
         * @param fileType  要转换的目标文件类型 html,pdf
         */
        public File convert(String inputFile, String fileType) {
            String outputFile = UploadUtils.generateFilename(getFilePath(), fileType);
            if (inputFile.endsWith(".txt")) {
                String odtFile = FileUtils.getFilePrefix(inputFile) + ".odt";
                if (new File(odtFile).exists()) {
                    log.error("odt文件已存在!");
                    inputFile = odtFile;
                } else {
                    try {
                        FileUtils.copyFile(inputFile, odtFile);
                        inputFile = odtFile;
                    } catch (FileNotFoundException e) {
                        log.error("文档不存在!");
                        e.printStackTrace();
                    }
                }
            }
            OfficeDocumentConverter converter = new OfficeDocumentConverter(officeManager);
            File output = new File(outputFile);
            converter.convert(new File(inputFile), output);
            return output;
        }
    
    
        public void init() {
            OpenOfficeConverter coverter = new OpenOfficeConverter(officeHome, officePort);
            coverter.startService();
            this.openOfficeConverter = coverter;
        }
    
        public void destroy() {
            this.openOfficeConverter.stopService();
        }
    
    
        @Autowired
        private OpenOfficeConverter openOfficeConverter;
        private static OfficeManager officeManager;
        public static final String HTML = "html";
        public static final String PDF = "pdf";
        public static final String TXT = "txt";
        public static final String DOC = "doc";
        public static final String DOCX = "docx";
        public static final String XLS = "xls";
        public static final String XLSX = "xlsx";
        public static final String PPT = "ppt";
        public static final String PPTX = "pptx";
        public static final String WPS = "wps";
        private int port = 8100;
        private String filePath;
    
    
        public OpenOfficeConverter(String officeHome, int port, String filePath) {
            super();
            this.officeHome = officeHome;
            this.port = port;
            this.filePath = filePath;
        }
    
        public OpenOfficeConverter(String officeHome, int port) {
            super();
            this.officeHome = officeHome;
            this.port = port;
        }
    
        public OpenOfficeConverter() {
            super();
        }
    
    
        public String getOfficeHome() {
            return officeHome;
        }
    
    
        public int getPort() {
            return port;
        }
    
    
        public String getFilePath() {
            return filePath;
        }
    
        public void setFilePath(String filePath) {
            this.filePath = filePath;
        }
    
    }

    配置类

    OpenOfficeConfig.java

    import org.springframework.context.annotation.Bean;
    import org.springframework.context.annotation.Configuration;
    
    @Configuration
    public class OpenOfficeConfig {
    
    
        @Bean(initMethod = "init",destroyMethod = "destroy")
        public OpenOfficeConverter openOfficeConverter(){
            return new OpenOfficeConverter();
        }
    
    }

    文件工具类

    FileUtils.java

    import java.io.*;
    import java.util.HashSet;
    import java.util.Set;
    import java.util.regex.Matcher;
    import java.util.regex.Pattern;
    
    /**
     * @author Tom
     */
    public class FileUtils {
    
        public static String getFilePrefix(String fileName) {
            int splitIndex = fileName.lastIndexOf(".");
            return fileName.substring(0, splitIndex);
        }
    
    
        public static void copyFile(String inputFile, String outputFile)
                throws FileNotFoundException {
            File sFile = new File(inputFile);
            File tFile = new File(outputFile);
            FileInputStream fis = new FileInputStream(sFile);
            FileOutputStream fos = new FileOutputStream(tFile);
            int temp = 0;
            try {
                while ((temp = fis.read()) != -1) {
                    fos.write(temp);
                }
            } catch (IOException e) {
                e.printStackTrace();
            } finally {
                try {
                    fis.close();
                    fos.close();
                } catch (IOException e) {
                    e.printStackTrace();
                }
            }
        }
    
        public static String toHtmlString(File file) {
            // 获取HTML文件流
            StringBuffer htmlSb = new StringBuffer();
            try {
                BufferedReader br = new BufferedReader(new InputStreamReader(
                        new FileInputStream(file), "gb2312"));
                while (br.ready()) {
                    htmlSb.append(br.readLine());
                }
                br.close();
                // 删除临时文件
                file.delete();
            } catch (FileNotFoundException e) {
                e.printStackTrace();
            } catch (IOException e) {
                e.printStackTrace();
            }
            // HTML文件字符串
            String htmlStr = htmlSb.toString();
            // 返回经过清洁的html文本
            return htmlStr;
        }
        
        
        public static String subString(String html,String prefix,String subfix) {
            return html.substring(html.indexOf(prefix)+prefix.length(), html.indexOf(subfix));
        }
    
        /**
         * 清除一些不需要的html标记
         * 
         * @param htmlStr
         *            带有复杂html标记的html语句
         * @return 去除了不需要html标记的语句
         */
        public static String clearFormat(String htmlStr, String docImgPath) {
            // 获取body内容的正则
            String bodyReg = "<BODY .*</BODY>";
            Pattern bodyPattern = Pattern.compile(bodyReg);
            Matcher bodyMatcher = bodyPattern.matcher(htmlStr);
            if (bodyMatcher.find()) {
                // 获取BODY内容,并转化BODY标签为DIV
                htmlStr = bodyMatcher.group().replaceFirst("<BODY", "<DIV")
                        .replaceAll("</BODY>", "</DIV>");
            }
            // 调整图片地址
            htmlStr = htmlStr.replaceAll("<IMG SRC="", "<IMG SRC="" + docImgPath
                    + "/");
            // 把<P></P>转换成</div></div>保留样式
            // content = content.replaceAll("(<P)([^>]*>.*?)(<\/P>)",
            // "<div$2</div>");
            // 把<P></P>转换成</div></div>并删除样式
            htmlStr = htmlStr.replaceAll("(<P)([^>]*)(>.*?)(<\/P>)", "<p$3</p>");
            // 删除不需要的标签
            htmlStr = htmlStr
                    .replaceAll(
                            "<[/]?(font|FONT|span|SPAN|xml|XML|del|DEL|ins|INS|meta|META|[ovwxpOVWXP]:\w+)[^>]*?>",
                            "");
            // 删除不需要的属性
            htmlStr = htmlStr
                    .replaceAll(
                            "<([^>]*)(?:lang|LANG|class|CLASS|style|STYLE|size|SIZE|face|FACE|[ovwxpOVWXP]:\w+)=(?:'[^']*'|""[^""]*""|[^>]+)([^>]*)>",
                            "<$1$2>");
            return htmlStr;
        }
    }

    控制器使用类

    OpenOfficeController.java

    import org.json.JSONObject;
    import org.springframework.beans.factory.annotation.Autowired;
    import org.springframework.beans.factory.annotation.Value;
    import org.springframework.util.FileCopyUtils;
    import org.springframework.web.bind.annotation.RequestMapping;
    import org.springframework.web.bind.annotation.RequestMethod;
    import org.springframework.web.bind.annotation.RequestParam;
    import org.springframework.web.bind.annotation.RestController;
    import org.springframework.web.multipart.MultipartFile;
    
    import java.io.File;
    import java.io.FileOutputStream;
    import java.text.DateFormat;
    import java.text.SimpleDateFormat;
    import java.util.Date;
    import java.util.UUID;
    
    /**
     * @author yvioo
     */
    @RestController
    public class OpenOfficeController {
    
        public static final DateFormat YEAR_MONTH_FORMAT = new SimpleDateFormat(
                "yyyyMM");
    
    
        @Autowired
        private OpenOfficeConverter openOfficeConverter;
    
    
        @Value("${fileUploadPath}")
        private String fileUploadPath ;
    
    
        @RequestMapping(value = "/o_docUpload", method = RequestMethod.POST)
        public String docUpload(@RequestParam(value = "Filedata", required = false) MultipartFile file) {
            JSONObject data = new JSONObject();
            String origName=file.getOriginalFilename();
            // TODO 检查允许上传的后缀
    
            //先把文件上传到服务器
            String extName = file.getOriginalFilename().substring(file.getOriginalFilename().lastIndexOf("."));
            String fileName = UUID.randomUUID().toString() + extName;
            //文件所在绝对路径 上传路径和文件名
            String path = fileUploadPath + fileName;
            File toFile=new File(path);
            if (!toFile.getParentFile().exists()){
                //文件夹不存在,先创建文件夹
                toFile.getParentFile().mkdirs();
            }
            try {
                //进行文件复制上传
                FileCopyUtils.copy(file.getInputStream(), new FileOutputStream(toFile));
            } catch (Exception e) {
                //上传失败
                e.printStackTrace();
            }
    
            //这个是word文档图片存放的路径
            String docImgPath=fileUploadPath+generateMonthname()+"/";
            openOfficeConverter.setFilePath(docImgPath);
            path = path.replace("\", "/");
            try {
                File outFile = openOfficeConverter.convert(path, OpenOfficeConverter.HTML);
                String html = FileUtils.toHtmlString(outFile);
                String txt = FileUtils.clearFormat(FileUtils.subString(html, "<HTML>", "</HTML>"), docImgPath);
                System.out.println(txt);
                data.put("status", 0);
                data.put("txt", txt);
                data.put("title", origName);
                return  data.toString();
            } catch (Exception e) {
                e.printStackTrace();
                data.put("status", 1);
            }
            return "";
        }
    
    
        /**
         * 根据月份生成文件夹名称
         * @return
         */
        public static String generateMonthname() {
            return YEAR_MONTH_FORMAT.format(new Date());
        }
    }

    如果idea启动报错了 

    Description:
    
    The bean 'openOfficeConverter', defined in class path resource [com/web/openoffice/OpenOfficeConfig.class], could not be registered. A bean with that name has already been defined in file [D:admin	argetclassescomwebopenofficeOpenOfficeConverter.class] and overriding is disabled.
    
    Action:
    
    Consider renaming one of the beans or enabling overriding by setting spring.main.allow-bean-definition-overriding=true
    
    Disconnected from the target VM, address: '127.0.0.1:50132', transport: 'socket'
    
    Process finished with exit code 0

    就在application.properties 配置里面增加

    spring.main.allow-bean-definition-overriding=true

    如果报错

    Caused by: java.lang.ClassNotFoundException: com.sun.star.lang.XEventListener
        at java.net.URLClassLoader.findClass(URLClassLoader.java:381)
        at java.lang.ClassLoader.loadClass(ClassLoader.java:424)
        at sun.misc.Launcher$AppClassLoader.loadClass(Launcher.java:331)
        at java.lang.ClassLoader.loadClass(ClassLoader.java:357)
        ... 60 common frames omitted

    加入maven

     <!-- https://mvnrepository.com/artifact/org.openoffice/ridl -->
            <dependency>
                <groupId>org.openoffice</groupId>
                <artifactId>ridl</artifactId>
                <version>2.2.1</version>
            </dependency>

    报错

    Caused by: java.util.concurrent.ExecutionException: java.lang.NoClassDefFoundError: com/sun/star/comp/helper/Bootstrap
        at java.util.concurrent.FutureTask.report(FutureTask.java:122)
        at java.util.concurrent.FutureTask.get(FutureTask.java:192)
        at org.artofsolving.jodconverter.office.ManagedOfficeProcess.startAndWait(ManagedOfficeProcess.java:62)
        ... 45 more

    引入maven

    <!-- https://mvnrepository.com/artifact/org.openoffice/juh -->
    <dependency>
        <groupId>org.openoffice</groupId>
        <artifactId>juh</artifactId>
        <version>2.2.1</version>
    </dependency>

    报错

    Caused by: java.lang.NoClassDefFoundError: com/sun/star/frame/XComponentLoader
        at org.artofsolving.jodconverter.AbstractConversionTask.loadDocument(AbstractConversionTask.java:86)
        at org.artofsolving.jodconverter.AbstractConversionTask.execute(AbstractConversionTask.java:59)
        at org.artofsolving.jodconverter.office.PooledOfficeManager$2.run(PooledOfficeManager.java:80)
        at java.util.concurrent.Executors$RunnableAdapter.call(Executors.java:511)
        at java.util.concurrent.FutureTask.run$$$capture(FutureTask.java:266)
        at java.util.concurrent.FutureTask.run(FutureTask.java)
        at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1142)
        at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:617)
        ... 1 more

    引入

     <!-- https://mvnrepository.com/artifact/org.openoffice/unoil -->
            <dependency>
                <groupId>org.openoffice</groupId>
                <artifactId>unoil</artifactId>
                <version>2.2.1</version>
            </dependency>
    -----------------------有任何问题可以在评论区评论,也可以私信我,我看到的话会进行回复,欢迎大家指教------------------------ (蓝奏云官网有些地址失效了,需要把请求地址lanzous改成lanzoux才可以)
  • 相关阅读:
    使用Jenkins自动编译 .net 项目
    Windows平台下Git服务器搭建
    在MAC上搭建cordova3.4.0的IOS和android开发环境
    检索 COM 类工厂中 CLSID 为 {820280E0-8ADA-4582-A1D9-960A83CE8BB5} 的组件失败,原因是出现以下错误: 80040154 没有注册类 (异常来自 HRESULT:0x80040154 (REGDB_E_CLASSNOTREG))。
    IIS7 404 模块 IIS Web Core 通知 MapRequestHandler 处理程序 StaticFile 错误代码 0x80070002
    mac 端口被占用及kill端口
    查询数据库表大小sql
    开启关闭keditor 过滤
    sql修改字段名称
    Android客户端性能优化
  • 原文地址:https://www.cnblogs.com/pxblog/p/14345975.html
Copyright © 2011-2022 走看看