zoukankan      html  css  js  c++  java
  • java把Word文件转成html的字符串返回出去

    1、需求是把前端上传的word文件解析出来,生成html的字符串返回给前端去展示,Word里面的图片可以忽略不显示,所以这段代码去掉了解析图片的代码

    package com.lieni.core.util;
    
    import java.io.ByteArrayOutputStream;
    import java.io.IOException;
    import java.io.InputStream;
    
    import javax.xml.parsers.DocumentBuilderFactory;
    import javax.xml.parsers.ParserConfigurationException;
    import javax.xml.transform.OutputKeys;
    import javax.xml.transform.Transformer;
    import javax.xml.transform.TransformerException;
    import javax.xml.transform.TransformerFactory;
    import javax.xml.transform.dom.DOMSource;
    import javax.xml.transform.stream.StreamResult;
    
    import org.apache.poi.hwpf.HWPFDocument;
    import org.apache.poi.hwpf.converter.WordToHtmlConverter;
    import org.apache.poi.xwpf.converter.xhtml.XHTMLConverter;
    import org.apache.poi.xwpf.usermodel.XWPFDocument;
    import org.springframework.web.multipart.MultipartFile;
    import org.w3c.dom.Document;
    
    import com.itextpdf.text.log.Logger;
    import com.itextpdf.text.log.LoggerFactory;
    
    /**
     * Created by LTmei on 2018/10/10 10:00
     */
    public class Word2HtmlUtil {
    
        /**
         * logger
         */
        private static final Logger logger = LoggerFactory.getLogger(Word2HtmlUtil.class);
    
        public static String Word2007ToHtml(MultipartFile file) throws IOException {
    
            if (file.isEmpty() || file.getSize() <= 0) {
                logger.error("Sorry File does not Exists!");
                return null;
            } else {
                if (file.getOriginalFilename().endsWith(".docx") || file.getOriginalFilename().endsWith(".DOCX")) {
    
                    // 1) 加载word文档生成 XWPFDocument对象
                    InputStream in = file.getInputStream();
                    XWPFDocument document = new XWPFDocument(in);
    
                    // 也可以使用字符数组流获取解析的内容
                    ByteArrayOutputStream baos = new ByteArrayOutputStream();
                    XHTMLConverter.getInstance().convert(document, baos, null);
                    String content = baos.toString();
                    baos.close();
                    return content;
                } else {
                    logger.error("Enter only MS Office 2007+ files");
                    return null;
                }
            }
        }
    
        public static String Word2003ToHtml(MultipartFile file)
                throws IOException, ParserConfigurationException, TransformerException {
    
            if (file.isEmpty() || file.getSize() <= 0) {
                logger.error("Sorry File does not Exists!");
                return null;
            } else {
                if (file.getOriginalFilename().endsWith(".doc") || file.getOriginalFilename().endsWith(".DOC")) {
                    InputStream input = file.getInputStream();
                    HWPFDocument wordDocument = new HWPFDocument(input);
                    WordToHtmlConverter wordToHtmlConverter = new WordToHtmlConverter(
                            DocumentBuilderFactory.newInstance().newDocumentBuilder().newDocument());
    
                    // 解析word文档
                    wordToHtmlConverter.processDocument(wordDocument);
                    Document htmlDocument = wordToHtmlConverter.getDocument();
    
                    // 也可以使用字符数组流获取解析的内容
                    ByteArrayOutputStream baos = new ByteArrayOutputStream();
                    DOMSource domSource = new DOMSource(htmlDocument);
                    StreamResult streamResult = new StreamResult(baos);
    
                    TransformerFactory factory = TransformerFactory.newInstance();
                    Transformer serializer = factory.newTransformer();
                    serializer.setOutputProperty(OutputKeys.ENCODING, "utf-8");
                    serializer.setOutputProperty(OutputKeys.INDENT, "yes");
                    serializer.setOutputProperty(OutputKeys.METHOD, "html");
                    serializer.transform(domSource, streamResult);
    
                    // 也可以使用字符数组流获取解析的内容
                    String content = new String(baos.toByteArray());
                    baos.close();
                    return content;
                } else {
                    logger.error("Enter only MS Office 2003 files");
                    return null;
                }
            }
    
        }
    
    }
  • 相关阅读:
    DirectX标准规定 DirectX和OpenGL的不同
    Android 抽屉效果的导航菜单实现
    Servlet基础(三) Servlet的多线程同步问题
    Java微服务之Spring Boot on Docker
    Spring Cloud 微服务架构学习笔记与示例
    从你的全世界路过—一群程序员的稻城亚丁游记
    从一个国内普通开发者的视角谈谈Sitecore
    吴军《硅谷来信》思维导图笔记
    .NET Core微服务之基于Jenkins+Docker实现持续部署(Part 1)
    2018OKR年中回顾
  • 原文地址:https://www.cnblogs.com/LTmei/p/9779275.html
Copyright © 2011-2022 走看看