zoukankan      html  css  js  c++  java
  • 《《《Java word转html

    转载地址:https://www.cnblogs.com/len0031/p/12108737.html

    java代码

      1 import org.apache.logging.log4j.LogManager;
      2 import org.apache.logging.log4j.Logger;
      3 import org.apache.poi.hwpf.HWPFDocument;
      4 import org.apache.poi.hwpf.converter.PicturesManager;
      5 import org.apache.poi.hwpf.converter.WordToHtmlConverter;
      6 import org.apache.poi.hwpf.usermodel.PictureType;
      7 import org.apache.poi.xwpf.converter.core.BasicURIResolver;
      8 import org.apache.poi.xwpf.converter.core.FileImageExtractor;
      9 import org.apache.poi.xwpf.converter.xhtml.XHTMLConverter;
     10 import org.apache.poi.xwpf.converter.xhtml.XHTMLOptions;
     11 import org.apache.poi.xwpf.usermodel.XWPFDocument;
     12 import org.springframework.stereotype.Controller;
     13 import org.springframework.web.bind.annotation.RequestMapping;
     14 import org.w3c.dom.Document;
     15 import javax.xml.parsers.DocumentBuilderFactory;
     16 import javax.xml.parsers.ParserConfigurationException;
     17 import javax.xml.transform.OutputKeys;
     18 import javax.xml.transform.Transformer;
     19 import javax.xml.transform.TransformerException;
     20 import javax.xml.transform.TransformerFactory;
     21 import javax.xml.transform.dom.DOMSource;
     22 import javax.xml.transform.stream.StreamResult;
     23 import java.io.*;
     24 @Controller
     25 @RequestMapping("/manual/")
     26 public class ManualController {
     27 
     28     private static final Logger logger = LogManager.getLogger(ManualController.class);
     29 
     30     /**
     31      * 将word2003转换为html文件
     32      *
     33      * @param wordPath word文件路径
     34      * @param wordName word文件名称无后缀
     35      * @param suffix   word文件后缀
     36      * @param htmlPath html存储地址
     37      * @throws IOException
     38      * @throws TransformerException
     39      * @throws ParserConfigurationException
     40      */
     41     public static String Word2003ToHtml(String wordPath, String wordName, String suffix, String htmlPath)
     42             throws IOException, TransformerException, ParserConfigurationException {
     43         String htmlName = wordName + ".html";
     44         final String imagePath = htmlPath + "image" + File.separator;
     45         // 判断html文件是否存在
     46         File htmlFile = new File(htmlPath + htmlName);
     47         if (htmlFile.exists()) {
     48             return htmlFile.getAbsolutePath();
     49         }
     50         // 原word文档
     51         final String file = wordPath + File.separator + wordName + suffix;
     52         InputStream input = new FileInputStream(new File(file));
     53         HWPFDocument wordDocument = new HWPFDocument(input);
     54         WordToHtmlConverter wordToHtmlConverter = new WordToHtmlConverter(
     55                 DocumentBuilderFactory.newInstance().newDocumentBuilder().newDocument());
     56         // 设置图片存放的位置
     57         wordToHtmlConverter.setPicturesManager(new PicturesManager() {
     58             public String savePicture(byte[] content, PictureType pictureType, String suggestedName, float widthInches,
     59                                       float heightInches) {
     60                 File imgPath = new File(imagePath);
     61                 if (!imgPath.exists()) {// 图片目录不存在则创建
     62                     imgPath.mkdirs();
     63                 }
     64                 File file = new File(imagePath + suggestedName);
     65                 try {
     66                     OutputStream os = new FileOutputStream(file);
     67                     os.write(content);
     68                     os.close();
     69                 } catch (FileNotFoundException e) {
     70                     e.printStackTrace();
     71                 } catch (IOException e) {
     72                     e.printStackTrace();
     73                 }
     74                 // 图片在html文件上的路径 相对路径
     75                 return "image/" + suggestedName;
     76             }
     77         });
     78         // 解析word文档
     79         wordToHtmlConverter.processDocument(wordDocument);
     80         Document htmlDocument = wordToHtmlConverter.getDocument();
     81         // 生成html文件上级文件夹
     82         File folder = new File(htmlPath);
     83         if (!folder.exists()) {
     84             folder.mkdirs();
     85         }
     86         OutputStream outStream = new FileOutputStream(htmlFile);
     87         DOMSource domSource = new DOMSource(htmlDocument);
     88         StreamResult streamResult = new StreamResult(outStream);
     89         TransformerFactory factory = TransformerFactory.newInstance();
     90         Transformer serializer = factory.newTransformer();
     91         serializer.setOutputProperty(OutputKeys.ENCODING, "utf-8");
     92         serializer.setOutputProperty(OutputKeys.INDENT, "yes");
     93         serializer.setOutputProperty(OutputKeys.METHOD, "html");
     94         serializer.transform(domSource, streamResult);
     95         return htmlFile.getAbsolutePath();
     96     }
     97     /**
     98      *
     99      * 2007版本word转换成html
    100      *
    101      * @param wordPath  word文件路径
    102      * @param wordName word文件名称无后缀
    103      * @param suffix   word文件后缀
    104      * @param htmlPath html存储地址
    105      * @return
    106      * @throws IOException
    107      */
    108     public static String Word2007ToHtml(String wordPath, String wordName, String suffix, String htmlPath)
    109             throws IOException {
    110         String htmlName = wordName + ".html";
    111         String imagePath = htmlPath + "image" + File.separator;
    112         // 判断html文件是否存在
    113         File htmlFile = new File(htmlPath + htmlName);
    114         if (htmlFile.exists()) {
    115             return htmlFile.getAbsolutePath();
    116         }
    117         // word文件
    118         File wordFile = new File(wordPath + File.separator + wordName + suffix);
    119         // 1) 加载word文档生成 XWPFDocument对象
    120         InputStream in = new FileInputStream(wordFile);
    121         XWPFDocument document = new XWPFDocument(in);
    122         // 2) 解析 XHTML配置 (这里设置IURIResolver来设置图片存放的目录)
    123         File imgFolder = new File(imagePath);
    124         XHTMLOptions options = XHTMLOptions.create();
    125         options.setExtractor(new FileImageExtractor(imgFolder));
    126         // html中图片的路径 相对路径
    127         options.URIResolver(new BasicURIResolver("image"));
    128         options.setIgnoreStylesIfUnused(false);
    129         options.setFragment(true);
    130         // 3) 将 XWPFDocument转换成XHTML
    131         // 生成html文件上级文件夹
    132         File folder = new File(htmlPath);
    133         if (!folder.exists()) {
    134             folder.mkdirs();
    135         }
    136         OutputStream out = new FileOutputStream(htmlFile);
    137         XHTMLConverter.getInstance().convert(document, out, options);
    138         return htmlFile.getAbsolutePath();
    139     }
    140 
    141     public static void main(String[] args) {
    142         try {
    143             Word2007ToHtml("D:\Ning\word2html\", "33", ".docx", "D://Ning//word2html/");
    144         } catch (Exception e) {
    145             e.printStackTrace();
    146         }
    147     }
    148 }

    xml

     1 <dependency>
     2             <groupId>org.apache.poi</groupId>
     3             <artifactId>poi-scratchpad</artifactId>
     4             <version>3.14</version>
     5         </dependency>
     6         <dependency>
     7             <groupId>org.apache.poi</groupId>
     8             <artifactId>poi-ooxml</artifactId>
     9             <version>3.14</version>
    10         </dependency>
    11         <dependency>
    12             <groupId>fr.opensagres.xdocreport</groupId>
    13             <artifactId>xdocreport</artifactId>
    14             <version>1.0.6</version>
    15         </dependency>
    16         <dependency>
    17             <groupId>org.apache.poi</groupId>
    18             <artifactId>poi-ooxml-schemas</artifactId>
    19             <version>3.14</version>
    20         </dependency>
    21         <dependency>
    22             <groupId>org.apache.poi</groupId>
    23             <artifactId>ooxml-schemas</artifactId>
    24             <version>1.3</version>
    25         </dependency>
    26         <dependency>
    27             <groupId>org.jsoup</groupId>
    28             <artifactId>jsoup</artifactId>
    29             <version>1.11.3</version>
    30         </dependency>

     需要自己新建一个测试docx文件

    找到生成文件路径

     生成的图片位置

    打开生成的html文档(图片地址为生成的文件夹图片路径)

     

  • 相关阅读:
    穿越之我是码农 1024 篇
    误删文件机房停电黑客入侵_你最怕什么?
    AI觉醒进行时:程序员你怕了吗?
    未来已来!阿里小蜜AI技术揭秘
    千人千面智能淘宝店铺背后的算法研究登陆人工智能顶级会议AAAI 2017
    CDN缓存不命中排查
    现实需求巨大_技术尚未成熟_学界与业界思维大碰撞
    围观阿里云最会赚钱的人!价值2万元邀请码不限量发送
    今晚19:30直播阿里巴巴大规模持续集成的技术演进之路_欢迎免费观看
    工作压力山大?码农这么减压最有效
  • 原文地址:https://www.cnblogs.com/lidar/p/14277978.html
Copyright © 2011-2022 走看看