zoukankan      html  css  js  c++  java
  • word2html文件

     1 package com.wesib2b.lw.app.util;
     2 
     3 import java.io.ByteArrayOutputStream;
     4 import java.io.File;
     5 import java.io.FileInputStream;
     6 import java.io.FileNotFoundException;
     7 import java.io.FileOutputStream;
     8 import java.io.InputStream;
     9 import java.util.List;
    10 import javax.xml.parsers.DocumentBuilderFactory;
    11 import javax.xml.transform.OutputKeys;
    12 import javax.xml.transform.Transformer;
    13 import javax.xml.transform.TransformerFactory;
    14 import javax.xml.transform.dom.DOMSource;
    15 import javax.xml.transform.stream.StreamResult;
    16 import org.apache.commons.io.FileUtils;
    17 import org.apache.poi.hwpf.HWPFDocument;
    18 import org.apache.poi.hwpf.converter.PicturesManager;
    19 import org.apache.poi.hwpf.converter.WordToHtmlConverter;
    20 import org.apache.poi.hwpf.usermodel.Picture;
    21 import org.apache.poi.hwpf.usermodel.PictureType;
    22 import org.w3c.dom.Document;
    23 
    24 public class test {
    25     public static void main(String[] args) throws Throwable {
    26         final String path = "D:\";
    27         final String file = "aaa.doc";
    28         InputStream input = new FileInputStream(path + file);
    29         HWPFDocument wordDocument = new HWPFDocument(input);
    30         WordToHtmlConverter wordToHtmlConverter = new WordToHtmlConverter(
    31                 DocumentBuilderFactory.newInstance().newDocumentBuilder()
    32                         .newDocument());
    33         wordToHtmlConverter.setPicturesManager(new PicturesManager() {
    34             public String savePicture(byte[] content, PictureType pictureType,
    35                     String suggestedName, float widthInches, float heightInches) {
    36                 return suggestedName;
    37             }
    38         });
    39         wordToHtmlConverter.processDocument(wordDocument);
    40         List pics = wordDocument.getPicturesTable().getAllPictures();
    41         if (pics != null) {
    42             for (int i = 0; i < pics.size(); i++) {
    43                 Picture pic = (Picture) pics.get(i);
    44                 try {
    45                     pic.writeImageContent(new FileOutputStream(path
    46                             + pic.suggestFullFileName()));
    47                 } catch (FileNotFoundException e) {
    48                     e.printStackTrace();
    49                 }
    50             }
    51         }
    52         Document htmlDocument = wordToHtmlConverter.getDocument();
    53         ByteArrayOutputStream outStream = new ByteArrayOutputStream();
    54         DOMSource domSource = new DOMSource(htmlDocument);
    55         StreamResult streamResult = new StreamResult(outStream);
    56         TransformerFactory tf = TransformerFactory.newInstance();
    57         Transformer serializer = tf.newTransformer();
    58         serializer.setOutputProperty(OutputKeys.ENCODING, "utf-8");
    59         serializer.setOutputProperty(OutputKeys.INDENT, "yes");
    60         serializer.setOutputProperty(OutputKeys.METHOD, "html");
    61         serializer.transform(domSource, streamResult);
    62         outStream.close();
    63         String content = new String(outStream.toByteArray());
    64         FileUtils.write(new File(path, "1.html"), content, "utf-8");
    65     }
    66 }
  • 相关阅读:
    『深度应用』NLP机器翻译深度学习实战课程·零(基础概念)
    Sorl初始
    Hadoop简介
    lucene的分词器宝典
    Lucene 更新、删除、分页操作以及IndexWriter优化
    Lucene 初步 之 HelloWorld
    lucene介绍和存储介绍
    Spring 集成rabbiatmq
    RabbitMQ 之消息确认机制(事务+Confirm)
    RabbitMQ 的路由模式 Topic模式
  • 原文地址:https://www.cnblogs.com/jason123/p/7039937.html
Copyright © 2011-2022 走看看