先安装openoffice4
Linux系统安装参考:https://www.cnblogs.com/pxblog/p/11622969.html
Windows系统安装参考:https://www.cnblogs.com/pxblog/p/14346148.html
引入jar包
https://yvioo.lanzous.com/b00o97q6d
密码:1cjp
密码:1cjp
如果是pom文件的话
<dependency> <groupId>local</groupId> <artifactId>jodconverter</artifactId> <version>2.2.2</version> <scope>system</scope> <systemPath>${project.basedir}/src/main/webapp/WEB-INF/lib/jodconverter-2.2.2.jar </systemPath> </dependency> <dependency> <groupId>local</groupId> <artifactId>jodconverter-cli</artifactId> <version>2.2.2</version> <scope>system</scope> <systemPath>${project.basedir}/src/main/webapp/WEB-INF/lib/jodconverter-cli-2.2.2.jar </systemPath> </dependency> <dependency> <groupId>local</groupId> <artifactId>jodconverter-core</artifactId> <version>3.0-beta-4</version> <scope>system</scope> <systemPath>${project.basedir}/src/main/webapp/WEB-INF/lib/jodconverter-core-3.0-beta-4.jar </systemPath> </dependency>
然后把jar包放到项目/webapp/WEB-INF/lib/下,这位置可以根据自己的来,然后pom文件路径也做相应修改即可
日志注解用到了
<!--lombok插件--> <dependency> <groupId>org.projectlombok</groupId> <artifactId>lombok</artifactId> <optional>true</optional> </dependency>
application.yml
spring:
main:
allow-bean-definition-overriding: true
servlet:
multipart:
enabled: true #是否处理上传
max-file-size: 50MB #允许最大的单个上传大小,单位可以是kb
max-request-size: 50MB #允许最大请求大小
#文件上传目录
fileUploadPath: E://test//
openoffice:
officeHome: D:openoffice4 #openoffice的安装路径
officePort: 8002 #openoffice启动端口
UploadUtils.java
import org.apache.commons.lang.RandomStringUtils; public class UploadUtils { /** * 36个小写字母和数字 */ public static final char[] N36_CHARS = { '0', '1', '2', '3', '4', '5', '6', '7', '8', '9', 'a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm', 'n', 'o', 'p', 'q', 'r', 's', 't', 'u', 'v', 'w', 'x', 'y', 'z' }; public static String generateFilename(String path, String ext) { return path + RandomStringUtils.random(8, N36_CHARS) + "." + ext; } }
启动类
OpenOfficeConverter.java
import com.test.UploadUtils; import lombok.extern.slf4j.Slf4j; import org.artofsolving.jodconverter.OfficeDocumentConverter; import org.artofsolving.jodconverter.office.DefaultOfficeManagerConfiguration; import org.artofsolving.jodconverter.office.OfficeManager; import org.springframework.beans.factory.annotation.Autowired; import org.springframework.beans.factory.annotation.Value; import org.springframework.stereotype.Component; import java.io.File; import java.io.FileNotFoundException; @Component @Slf4j public class OpenOfficeConverter { @Value("${openoffice.officeHome}") public String officeHome; @Value("${openoffice.officePort}") public Integer officePort; public void startService() { DefaultOfficeManagerConfiguration configuration = new DefaultOfficeManagerConfiguration(); try { log.info("准备启动服务...."); //设置OpenOffice.org安装目录 configuration.setOfficeHome(getOfficeHome()); //设置转换端口,默认为8100 configuration.setPortNumber(getPort()); //设置任务执行超时为5分钟 configuration.setTaskExecutionTimeout(1000 * 60 * 5L); //设置任务队列超时为24小时 configuration.setTaskQueueTimeout(1000 * 60 * 60 * 24L); officeManager = configuration.buildOfficeManager(); //启动服务 officeManager.start(); log.info("office转换服务启动成功!"); } catch (Exception ce) { log.error("office转换服务启动失败!详细信息:" + ce); } } public void stopService() { log.info("关闭office转换服务...."); if (officeManager != null) { officeManager.stop(); } log.info("关闭office转换成功!"); } /** * 转换格式 * * @param inputFile 需要转换的原文件路径 * @param fileType 要转换的目标文件类型 html,pdf */ public File convert(String inputFile, String fileType) { String outputFile = UploadUtils.generateFilename(getFilePath(), fileType); if (inputFile.endsWith(".txt")) { String odtFile = FileUtils.getFilePrefix(inputFile) + ".odt"; if (new File(odtFile).exists()) { log.error("odt文件已存在!"); inputFile = odtFile; } else { try { FileUtils.copyFile(inputFile, odtFile); inputFile = odtFile; } catch (FileNotFoundException e) { log.error("文档不存在!"); e.printStackTrace(); } } } OfficeDocumentConverter converter = new OfficeDocumentConverter(officeManager); File output = new File(outputFile); converter.convert(new File(inputFile), output); return output; } public void init() { OpenOfficeConverter coverter = new OpenOfficeConverter(officeHome, officePort); coverter.startService(); this.openOfficeConverter = coverter; } public void destroy() { this.openOfficeConverter.stopService(); } @Autowired private OpenOfficeConverter openOfficeConverter; private static OfficeManager officeManager; public static final String HTML = "html"; public static final String PDF = "pdf"; public static final String TXT = "txt"; public static final String DOC = "doc"; public static final String DOCX = "docx"; public static final String XLS = "xls"; public static final String XLSX = "xlsx"; public static final String PPT = "ppt"; public static final String PPTX = "pptx"; public static final String WPS = "wps"; private int port = 8100; private String filePath; public OpenOfficeConverter(String officeHome, int port, String filePath) { super(); this.officeHome = officeHome; this.port = port; this.filePath = filePath; } public OpenOfficeConverter(String officeHome, int port) { super(); this.officeHome = officeHome; this.port = port; } public OpenOfficeConverter() { super(); } public String getOfficeHome() { return officeHome; } public int getPort() { return port; } public String getFilePath() { return filePath; } public void setFilePath(String filePath) { this.filePath = filePath; } }
配置类
OpenOfficeConfig.java
import org.springframework.context.annotation.Bean; import org.springframework.context.annotation.Configuration; @Configuration public class OpenOfficeConfig { @Bean(initMethod = "init",destroyMethod = "destroy") public OpenOfficeConverter openOfficeConverter(){ return new OpenOfficeConverter(); } }
文件工具类
FileUtils.java
import java.io.*; import java.util.HashSet; import java.util.Set; import java.util.regex.Matcher; import java.util.regex.Pattern; /** * @author Tom */ public class FileUtils { public static String getFilePrefix(String fileName) { int splitIndex = fileName.lastIndexOf("."); return fileName.substring(0, splitIndex); } public static void copyFile(String inputFile, String outputFile) throws FileNotFoundException { File sFile = new File(inputFile); File tFile = new File(outputFile); FileInputStream fis = new FileInputStream(sFile); FileOutputStream fos = new FileOutputStream(tFile); int temp = 0; try { while ((temp = fis.read()) != -1) { fos.write(temp); } } catch (IOException e) { e.printStackTrace(); } finally { try { fis.close(); fos.close(); } catch (IOException e) { e.printStackTrace(); } } } public static String toHtmlString(File file) { // 获取HTML文件流 StringBuffer htmlSb = new StringBuffer(); try { BufferedReader br = new BufferedReader(new InputStreamReader( new FileInputStream(file), "gb2312")); while (br.ready()) { htmlSb.append(br.readLine()); } br.close(); // 删除临时文件 file.delete(); } catch (FileNotFoundException e) { e.printStackTrace(); } catch (IOException e) { e.printStackTrace(); } // HTML文件字符串 String htmlStr = htmlSb.toString(); // 返回经过清洁的html文本 return htmlStr; } public static String subString(String html,String prefix,String subfix) { return html.substring(html.indexOf(prefix)+prefix.length(), html.indexOf(subfix)); } /** * 清除一些不需要的html标记 * * @param htmlStr * 带有复杂html标记的html语句 * @return 去除了不需要html标记的语句 */ public static String clearFormat(String htmlStr, String docImgPath) { // 获取body内容的正则 String bodyReg = "<BODY .*</BODY>"; Pattern bodyPattern = Pattern.compile(bodyReg); Matcher bodyMatcher = bodyPattern.matcher(htmlStr); if (bodyMatcher.find()) { // 获取BODY内容,并转化BODY标签为DIV htmlStr = bodyMatcher.group().replaceFirst("<BODY", "<DIV") .replaceAll("</BODY>", "</DIV>"); } // 调整图片地址 htmlStr = htmlStr.replaceAll("<IMG SRC="", "<IMG SRC="" + docImgPath + "/"); // 把<P></P>转换成</div></div>保留样式 // content = content.replaceAll("(<P)([^>]*>.*?)(<\/P>)", // "<div$2</div>"); // 把<P></P>转换成</div></div>并删除样式 htmlStr = htmlStr.replaceAll("(<P)([^>]*)(>.*?)(<\/P>)", "<p$3</p>"); // 删除不需要的标签 htmlStr = htmlStr .replaceAll( "<[/]?(font|FONT|span|SPAN|xml|XML|del|DEL|ins|INS|meta|META|[ovwxpOVWXP]:\w+)[^>]*?>", ""); // 删除不需要的属性 htmlStr = htmlStr .replaceAll( "<([^>]*)(?:lang|LANG|class|CLASS|style|STYLE|size|SIZE|face|FACE|[ovwxpOVWXP]:\w+)=(?:'[^']*'|""[^""]*""|[^>]+)([^>]*)>", "<$1$2>"); return htmlStr; } }
控制器使用类
OpenOfficeController.java
import org.json.JSONObject; import org.springframework.beans.factory.annotation.Autowired; import org.springframework.beans.factory.annotation.Value; import org.springframework.util.FileCopyUtils; import org.springframework.web.bind.annotation.RequestMapping; import org.springframework.web.bind.annotation.RequestMethod; import org.springframework.web.bind.annotation.RequestParam; import org.springframework.web.bind.annotation.RestController; import org.springframework.web.multipart.MultipartFile; import java.io.File; import java.io.FileOutputStream; import java.text.DateFormat; import java.text.SimpleDateFormat; import java.util.Date; import java.util.UUID; /** * @author yvioo */ @RestController public class OpenOfficeController { public static final DateFormat YEAR_MONTH_FORMAT = new SimpleDateFormat( "yyyyMM"); @Autowired private OpenOfficeConverter openOfficeConverter; @Value("${fileUploadPath}") private String fileUploadPath ; @RequestMapping(value = "/o_docUpload", method = RequestMethod.POST) public String docUpload(@RequestParam(value = "Filedata", required = false) MultipartFile file) { JSONObject data = new JSONObject(); String origName=file.getOriginalFilename(); // TODO 检查允许上传的后缀 //先把文件上传到服务器 String extName = file.getOriginalFilename().substring(file.getOriginalFilename().lastIndexOf(".")); String fileName = UUID.randomUUID().toString() + extName; //文件所在绝对路径 上传路径和文件名 String path = fileUploadPath + fileName; File toFile=new File(path); if (!toFile.getParentFile().exists()){ //文件夹不存在,先创建文件夹 toFile.getParentFile().mkdirs(); } try { //进行文件复制上传 FileCopyUtils.copy(file.getInputStream(), new FileOutputStream(toFile)); } catch (Exception e) { //上传失败 e.printStackTrace(); } //这个是word文档图片存放的路径 String docImgPath=fileUploadPath+generateMonthname()+"/"; openOfficeConverter.setFilePath(docImgPath); path = path.replace("\", "/"); try { File outFile = openOfficeConverter.convert(path, OpenOfficeConverter.HTML); String html = FileUtils.toHtmlString(outFile); String txt = FileUtils.clearFormat(FileUtils.subString(html, "<HTML>", "</HTML>"), docImgPath); System.out.println(txt); data.put("status", 0); data.put("txt", txt); data.put("title", origName); return data.toString(); } catch (Exception e) { e.printStackTrace(); data.put("status", 1); } return ""; } /** * 根据月份生成文件夹名称 * @return */ public static String generateMonthname() { return YEAR_MONTH_FORMAT.format(new Date()); } }
如果idea启动报错了
Description:
The bean 'openOfficeConverter', defined in class path resource [com/web/openoffice/OpenOfficeConfig.class], could not be registered. A bean with that name has already been defined in file [D:admin argetclassescomwebopenofficeOpenOfficeConverter.class] and overriding is disabled.
Action:
Consider renaming one of the beans or enabling overriding by setting spring.main.allow-bean-definition-overriding=true
Disconnected from the target VM, address: '127.0.0.1:50132', transport: 'socket'
Process finished with exit code 0
就在application.properties 配置里面增加
spring.main.allow-bean-definition-overriding=true
如果报错
Caused by: java.lang.ClassNotFoundException: com.sun.star.lang.XEventListener
at java.net.URLClassLoader.findClass(URLClassLoader.java:381)
at java.lang.ClassLoader.loadClass(ClassLoader.java:424)
at sun.misc.Launcher$AppClassLoader.loadClass(Launcher.java:331)
at java.lang.ClassLoader.loadClass(ClassLoader.java:357)
... 60 common frames omitted
加入maven
<!-- https://mvnrepository.com/artifact/org.openoffice/ridl --> <dependency> <groupId>org.openoffice</groupId> <artifactId>ridl</artifactId> <version>2.2.1</version> </dependency>
报错
Caused by: java.util.concurrent.ExecutionException: java.lang.NoClassDefFoundError: com/sun/star/comp/helper/Bootstrap
at java.util.concurrent.FutureTask.report(FutureTask.java:122)
at java.util.concurrent.FutureTask.get(FutureTask.java:192)
at org.artofsolving.jodconverter.office.ManagedOfficeProcess.startAndWait(ManagedOfficeProcess.java:62)
... 45 more
引入maven
<!-- https://mvnrepository.com/artifact/org.openoffice/juh --> <dependency> <groupId>org.openoffice</groupId> <artifactId>juh</artifactId> <version>2.2.1</version> </dependency>
报错
Caused by: java.lang.NoClassDefFoundError: com/sun/star/frame/XComponentLoader at org.artofsolving.jodconverter.AbstractConversionTask.loadDocument(AbstractConversionTask.java:86) at org.artofsolving.jodconverter.AbstractConversionTask.execute(AbstractConversionTask.java:59) at org.artofsolving.jodconverter.office.PooledOfficeManager$2.run(PooledOfficeManager.java:80) at java.util.concurrent.Executors$RunnableAdapter.call(Executors.java:511) at java.util.concurrent.FutureTask.run$$$capture(FutureTask.java:266) at java.util.concurrent.FutureTask.run(FutureTask.java) at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1142) at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:617) ... 1 more
引入
<!-- https://mvnrepository.com/artifact/org.openoffice/unoil --> <dependency> <groupId>org.openoffice</groupId> <artifactId>unoil</artifactId> <version>2.2.1</version> </dependency>