zoukankan      html  css  js  c++  java
  • word2Html

    package com.zxs.common;
    
    import java.io.BufferedWriter;
    import java.io.File;
    import java.io.FileInputStream;
    import java.io.FileNotFoundException;
    import java.io.FileOutputStream;
    import java.io.IOException;
    import java.io.OutputStreamWriter;
    
    import javax.xml.parsers.DocumentBuilderFactory;
    import javax.xml.parsers.ParserConfigurationException;
    import javax.xml.transform.OutputKeys;
    import javax.xml.transform.Transformer;
    import javax.xml.transform.TransformerException;
    import javax.xml.transform.TransformerFactory;
    import javax.xml.transform.dom.DOMSource;
    import javax.xml.transform.stream.StreamResult;
    
    import org.apache.commons.io.output.ByteArrayOutputStream;
    import org.apache.poi.hwpf.HWPFDocument;
    import org.apache.poi.hwpf.converter.PicturesManager;
    import org.apache.poi.hwpf.converter.WordToHtmlConverter;
    import org.apache.poi.hwpf.usermodel.PictureType;
    import org.w3c.dom.Document;
    /**
     * 
     * @author 
    * */ public class Word2Html { private static String outPictureDir = ""; private static final String ENCODING = "GB2312"; public static void main(String argv[]) { try { doc2Html("E://test//2.doc", "E://test//2.html"); } catch (Exception e) { e.printStackTrace(); } } private static void initOutDir(String outPutPath) { File file = new File(outPutPath); File outdir = file.getParentFile(); if(!outdir.exists()){ outdir.mkdirs(); } String outFileName = file.getName(); File pictureDir = new File(outdir, outFileName.substring(0, outFileName.lastIndexOf("."))); if(!pictureDir.exists()){ pictureDir.mkdirs(); } outPictureDir = pictureDir.getPath(); } /** * doc转换为html * * @param fileName * @param outPutFile * @throws TransformerException * @throws IOException * @throws ParserConfigurationException */ public static void doc2Html(String fileName, String outPutFile) throws TransformerException, IOException, ParserConfigurationException { long startTime = System.currentTimeMillis(); initOutDir(outPutFile); HWPFDocument wordDocument = new HWPFDocument(new FileInputStream(fileName)); WordToHtmlConverter wordToHtmlConverter = new WordToHtmlConverter(DocumentBuilderFactory.newInstance() .newDocumentBuilder().newDocument()); wordToHtmlConverter.setPicturesManager(new PicturesManager() { public String savePicture(byte[] content, PictureType pictureType, String suggestedName, float widthInches, float heightInches) { String path = new File(outPictureDir).getName() + "//"+suggestedName; try { FileOutputStream file = new FileOutputStream(outPictureDir +"//"+ suggestedName); file.write(content); file.close(); } catch (FileNotFoundException e) { e.printStackTrace(); } catch (IOException e) { e.printStackTrace(); } return path; } }); wordToHtmlConverter.processDocument(wordDocument); Document htmlDocument = wordToHtmlConverter.getDocument(); ByteArrayOutputStream out = new ByteArrayOutputStream(); DOMSource domSource = new DOMSource(htmlDocument); StreamResult streamResult = new StreamResult(out); TransformerFactory tf = TransformerFactory.newInstance(); Transformer serializer = tf.newTransformer(); serializer.setOutputProperty(OutputKeys.ENCODING, ENCODING); serializer.setOutputProperty(OutputKeys.INDENT, "yes"); serializer.setOutputProperty(OutputKeys.METHOD, "html"); serializer.transform(domSource, streamResult); out.close(); writeFile(new String(out.toByteArray()).replaceAll("<span.*>\s*TOC\s*.*</span>", ""), outPutFile); System.out.println("Generate " + outPutFile + " with " + (System.currentTimeMillis() - startTime) + " ms."); } /** * 写文件 * * @param content * @param path */ public static void writeFile(String content, String path) { FileOutputStream fos = null; BufferedWriter bw = null; try { File file = new File(path); fos = new FileOutputStream(file); bw = new BufferedWriter(new OutputStreamWriter(fos, ENCODING)); bw.write(content); } catch (FileNotFoundException fnfe) { fnfe.printStackTrace(); } catch (IOException ioe) { ioe.printStackTrace(); } finally { try { if (bw != null) bw.close(); if (fos != null) fos.close(); } catch (IOException ie) { } } } }
  • 相关阅读:
    为什么Python是数据科学领域最受欢迎的语言之一?
    AOF持久化
    centos6更换yum源和epel源
    centos6更换yum源和epel源
    centos6更换yum源和epel源
    centos6更换yum源和epel源
    MySQL 备份与恢复
    MySQL 备份与恢复
    MySQL 备份与恢复
    MySQL 备份与恢复
  • 原文地址:https://www.cnblogs.com/zhangxuesong/p/5787751.html
Copyright © 2011-2022 走看看