zoukankan      html  css  js  c++  java
  • java将Word文件转换为html文件

    我这里是maven项目,只需在资源文件中配置,会自动下载ar包

    在pox.xml中配置

    <!--word转html https://mvnrepository.com/artifact/org.apache.poi/poi-scratchpad -->
                <dependency>
                    <groupId>org.apache.poi</groupId>
                    <artifactId>poi-scratchpad</artifactId>
                    <version>3.17</version>
                </dependency>
                        
                <!-- https://mvnrepository.com/artifact/org.apache.poi/poi-ooxml -->
                <dependency>
                    <groupId>org.apache.poi</groupId>
                    <artifactId>poi-ooxml</artifactId>
                    <version>3.17</version>
                </dependency>
                    
                <!-- https://mvnrepository.com/artifact/fr.opensagres.xdocreport/fr.opensagres.xdocreport.converter.docx.xwpf -->
                <dependency>
                    <groupId>fr.opensagres.xdocreport</groupId>
                    <artifactId>fr.opensagres.xdocreport.converter.docx.xwpf</artifactId>
                    <version>2.0.1</version>
                </dependency>

    java代码

    package com.lmt.service.file;
    
    import java.io.ByteArrayInputStream;
    import java.io.ByteArrayOutputStream;
    import java.io.File;
    import java.io.FileInputStream;
    import java.io.FileOutputStream;
    import java.io.FileWriter;
    import java.io.IOException;
    import java.io.InputStream;
    import java.util.UUID;
    
    import javax.xml.parsers.DocumentBuilderFactory;
    import javax.xml.parsers.ParserConfigurationException;
    import javax.xml.transform.OutputKeys;
    import javax.xml.transform.Transformer;
    import javax.xml.transform.TransformerException;
    import javax.xml.transform.TransformerFactory;
    import javax.xml.transform.dom.DOMSource;
    import javax.xml.transform.stream.StreamResult;
    
    import org.apache.poi.hwpf.HWPFDocument;
    import org.apache.poi.hwpf.converter.PicturesManager;
    import org.apache.poi.hwpf.converter.WordToHtmlConverter;
    import org.apache.poi.hwpf.usermodel.PictureType;
    import org.apache.poi.util.IOUtils;
    import org.apache.poi.xwpf.usermodel.XWPFDocument;
    import org.slf4j.Logger;
    import org.slf4j.LoggerFactory;
    import org.springframework.beans.factory.annotation.Autowired;
    import org.springframework.stereotype.Component;
    import org.springframework.web.multipart.MultipartFile;
    import org.w3c.dom.Document;
    
    
    import fr.opensagres.poi.xwpf.converter.core.ImageManager;
    import fr.opensagres.poi.xwpf.converter.xhtml.XHTMLConverter;
    import fr.opensagres.poi.xwpf.converter.xhtml.XHTMLOptions;
    
    @Component
    public class WordToHtml {
        private static final Logger logger = LoggerFactory.getLogger(WordToHtml.class);
        
        
        @Autowired
        private ParseFile parseFile;
        
        public File convert(MultipartFile file) {
            String filename = file.getOriginalFilename();
            String suffix=filename.substring(filename.lastIndexOf("."));
            String newName=UUID.randomUUID().toString();
            // TODO 需要保存在一个新的位置
            File convFile = new File("D:/test/" + newName +suffix);
            FileOutputStream fos = null;
            try {
                convFile.createNewFile(); 
                fos = new FileOutputStream(convFile); 
                fos.write(file.getBytes());
            } catch (IOException ex) {
                logger.error("上传文件出错!", ex);
                return null;
            } finally {
                IOUtils.closeQuietly(fos);
            }
            
            // 输入文件名的所在文件夹
            // 加上反斜杠
            String parentDirectory = convFile.getParent();
            if (!parentDirectory.endsWith("\")) {
                parentDirectory = parentDirectory + "\";
            }
            
            if (filename.endsWith(".docx")) {
                return docxConvert(parentDirectory, convFile.getAbsolutePath(),newName);
            } else if (filename.endsWith(".doc")) {
                return docConvert(parentDirectory, convFile.getAbsolutePath(),newName);
            } else {
                logger.error("不支持的文件格式!");
                return null;
            }
        }
        
        
        private File docxConvert(String parentDirectory, String filename,String newName) {
            try {
                XWPFDocument document = new XWPFDocument(new FileInputStream(filename));
                XHTMLOptions options = XHTMLOptions.create().setImageManager(new ImageManager(new File(parentDirectory), UUID.randomUUID().toString())).indent(4);
                FileOutputStream out = new FileOutputStream(new File(parentDirectory + newName+ ".html"));
                XHTMLConverter.getInstance().convert(document, out, options);
                return new File(parentDirectory + newName+ ".html");
            } catch (IOException ex) {
                logger.error("word转化出错!", ex);
                return null;
            }
            
        }
        
        
        private File docConvert(String parentDirectory, String filename,String newName) {
            try {
                HWPFDocument document = new HWPFDocument(new FileInputStream(filename));
                WordToHtmlConverter wordToHtmlConverter = new WordToHtmlConverter(
                        DocumentBuilderFactory.newInstance().newDocumentBuilder()
                                .newDocument());
                
                // converter默认对图片不作处理,需要手动下载图片并嵌入到html中
                 wordToHtmlConverter.setPicturesManager(new PicturesManager() {
                        @Override
                        public String savePicture(byte[] bytes, PictureType pictureType, String s, float v, float v1) {
                            String imageFilename = parentDirectory + "";
                            String identity=UUID.randomUUID().toString();
                            File imageFile = new File(imageFilename, identity+s);
                            imageFile.getParentFile().mkdirs();
                            InputStream in = null;
                            FileOutputStream out = null;
    
                            try {
                                in = new ByteArrayInputStream(bytes);
                                out = new FileOutputStream(imageFile);
                                IOUtils.copy(in, out);
    
                            } catch (IOException ex) {
                                logger.error("word转化出错!", ex);
                            } finally {
                                if (in != null) {
                                    IOUtils.closeQuietly(in);
                                }
    
                                if (out != null) {
                                    IOUtils.closeQuietly(out);
                                }
    
                            }
                            return imageFile.getName();
                        }
                    });
                
                wordToHtmlConverter.processDocument(document);
                Document htmlDocument = wordToHtmlConverter.getDocument();
                ByteArrayOutputStream out = new ByteArrayOutputStream();
                DOMSource domSource = new DOMSource(htmlDocument);
                StreamResult streamResult = new StreamResult(out);
    
                TransformerFactory tf = TransformerFactory.newInstance();
                Transformer serializer = tf.newTransformer();
                serializer.setOutputProperty(OutputKeys.ENCODING, "UTF-8");
                serializer.setOutputProperty(OutputKeys.INDENT, "yes");
                serializer.setOutputProperty(OutputKeys.METHOD, "html");
                serializer.transform(domSource, streamResult);
                out.close();
    
                String result = new String(out.toByteArray());
                FileWriter writer = new FileWriter(parentDirectory + newName + ".html");
                writer.write(result);
                writer.close();                        
            } catch (IOException | TransformerException | ParserConfigurationException ex) {
                logger.error("word转化出错!", ex);
            }
            return new File(parentDirectory + newName + ".html");
        }
        
        /**
         * 将上传的Word文档转化成HTML字符串
         * @param attachfile
         * @return
         */
        public String convertToHtml(MultipartFile attachfile) {
            String wordContent = "";
            // 将Word文件转换为html
            File file = convert(attachfile);
            // 读取html文件
            if (file != null) {
                wordContent = parseFile.readHtml(file);
            }
            return wordContent;
        }
        
    }
  • 相关阅读:
    剖析C语言中a=a+++++a的无聊问题
    [转]精确到1%秒的单片机计时器汇编程序
    [转]学DSP、FPGA、ARM,哪个更有前途?
    【Java】Eclipse导出JAR包
    二维码生成器(支持历史记录点击和清空)
    移动端开发注意之一二
    localStorage实现按钮点击禁用
    JavaScript之查找元素
    扒拉扒拉table
    解惑之JavaScript
  • 原文地址:https://www.cnblogs.com/chen-yun/p/9274242.html
Copyright © 2011-2022 走看看