zoukankan      html  css  js  c++  java
  • POI操作word和html相互转化

    下面是里两个类:第一个类是html转为word,第二个是word转html(最下面附上jar包下载链接)

    package com.wz.poi.wordHtml;

    /**
    * 2018/4/24
    * @author Administrator
    *
    */

    import java.io.BufferedReader;
    import java.io.ByteArrayInputStream;
    import java.io.FileInputStream;
    import java.io.FileOutputStream;
    import java.io.IOException;
    import java.io.InputStream;
    import java.io.InputStreamReader;
    import java.io.OutputStream;

    import org.apache.poi.poifs.filesystem.POIFSFileSystem;

    public class HtmlToWord {

    /**
    * 外部接口
    * @param htmlPath html文件的路径
    * @param cssPath css文件的路径
    * @param wordPath word文件的路径(保存本地的路径)
    * @param code 编码方式(一般都为utf-8)
    * @throws Exception
    */
    public void htmlToWord2(String htmlPath, String cssPath, String wordPath, String code) throws Exception {
    /*InputStream bodyIs = new FileInputStream("H:\MyTest\Java\test_show\test.html");
    InputStream cssIs = new FileInputStream("H:\MyTest\Java\test_show\test.css");*/
    InputStream bodyIs = new FileInputStream(htmlPath);
    InputStream cssIs = new FileInputStream(cssPath);
    String body = this.getContent(bodyIs);
    String css = this.getContent(cssIs);
    // 拼一个标准的HTML格式文档
    String content = "<html><head><style>" + css + "</style></head><body>" + body + "</body></html>";
    InputStream is = new ByteArrayInputStream(content.getBytes(code));
    OutputStream os = new FileOutputStream(wordPath);
    this.inputStreamToWord(is, os);
    }

    /**
    * 把is写入到对应的word输出流os中 不考虑异常的捕获,直接抛出
    *
    * @param is
    * @param os
    * @throws IOException
    */
    private void inputStreamToWord(InputStream is, OutputStream os) throws IOException {
    POIFSFileSystem fs = new POIFSFileSystem();
    // 对应于org.apache.poi.hdf.extractor.WordDocument
    fs.createDocument(is, "WordDocument");
    fs.writeFilesystem(os);
    os.close();
    is.close();
    fs.close();
    }

    /**
    * 把输入流里面的内容以UTF-8编码当文本取出。 不考虑异常,直接抛出
    *
    * @param ises
    * @return
    * @throws IOException
    */
    private String getContent(InputStream... ises) throws IOException {
    if (ises != null) {
    StringBuilder result = new StringBuilder();
    BufferedReader br;
    String line;
    for (InputStream is : ises) {
    br = new BufferedReader(new InputStreamReader(is, "UTF-8"));
    while ((line = br.readLine()) != null) {
    result.append(line);
    }
    }
    return result.toString();
    }
    return null;
    }


    }

    package com.wz.poi.wordHtml;

    /**
    * 2018/4/24
    * @author Administrator
    *
    */

    import java.io.File;
    import java.io.FileInputStream;
    import java.io.FileNotFoundException;
    import java.io.FileOutputStream;
    import java.io.IOException;
    import java.io.InputStream;
    import java.io.OutputStream;

    import javax.xml.parsers.DocumentBuilderFactory;
    import javax.xml.parsers.ParserConfigurationException;
    import javax.xml.transform.OutputKeys;
    import javax.xml.transform.Transformer;
    import javax.xml.transform.TransformerException;
    import javax.xml.transform.TransformerFactory;
    import javax.xml.transform.dom.DOMSource;
    import javax.xml.transform.stream.StreamResult;

    import org.apache.poi.hwpf.HWPFDocument;
    import org.apache.poi.hwpf.converter.PicturesManager;
    import org.apache.poi.hwpf.converter.WordToHtmlConverter;
    import org.apache.poi.hwpf.usermodel.PictureType;
    import org.apache.poi.xwpf.converter.core.BasicURIResolver;
    import org.apache.poi.xwpf.converter.core.FileImageExtractor;
    import org.apache.poi.xwpf.converter.xhtml.XHTMLConverter;
    import org.apache.poi.xwpf.converter.xhtml.XHTMLOptions;
    import org.apache.poi.xwpf.usermodel.XWPFDocument;
    import org.w3c.dom.Document;

    public class WordToHtml {

    /**
    * 调用的模板
    * @param args
    */
    public static void main(String[] args) {
    try {
    Word2003ToHtml("H:\MyTest\Java\","test",".doc");
    } catch (IOException e) {
    // TODO Auto-generated catch block
    e.printStackTrace();
    } catch (TransformerException e) {
    // TODO Auto-generated catch block
    e.printStackTrace();
    } catch (ParserConfigurationException e) {
    // TODO Auto-generated catch block
    e.printStackTrace();
    }
    }

    /**
    * 将word2003转换为html文件 2017-2-27
    * @param wordPath word文件路径
    * @param wordName word文件名称无后缀
    * @param suffix word文件后缀
    * @throws IOException
    * @throws TransformerException
    * @throws ParserConfigurationException
    */
    public static String Word2003ToHtml(String wordPath,String wordName,String suffix) throws IOException, TransformerException, ParserConfigurationException {
    String htmlPath = wordPath + File.separator + wordName + "_show" + File.separator;
    String htmlName = wordName + ".html";
    final String imagePath = htmlPath + "image" + File.separator;

    //判断html文件是否存在
    File htmlFile = new File(htmlPath + htmlName);
    if(htmlFile.exists()){
    return htmlFile.getAbsolutePath();
    }

    //原word文档
    final String file = wordPath + File.separator + wordName + suffix;
    InputStream input = new FileInputStream(new File(file));

    HWPFDocument wordDocument = new HWPFDocument(input);
    WordToHtmlConverter wordToHtmlConverter = new WordToHtmlConverter(DocumentBuilderFactory.newInstance().newDocumentBuilder().newDocument());
    //设置图片存放的位置
    wordToHtmlConverter.setPicturesManager(new PicturesManager() {
    public String savePicture(byte[] content, PictureType pictureType, String suggestedName, float widthInches, float heightInches) {
    File imgPath = new File(imagePath);
    if(!imgPath.exists()){//图片目录不存在则创建
    imgPath.mkdirs();
    }
    File file = new File(imagePath + suggestedName);
    try {
    OutputStream os = new FileOutputStream(file);
    os.write(content);
    os.close();
    } catch (FileNotFoundException e) {
    e.printStackTrace();
    } catch (IOException e) {
    e.printStackTrace();
    }
    //图片在html文件上的路径 相对路径
    return "image/" + suggestedName;
    }
    });

    //解析word文档
    wordToHtmlConverter.processDocument(wordDocument);
    Document htmlDocument = wordToHtmlConverter.getDocument();

    //生成html文件上级文件夹
    File folder = new File(htmlPath);
    if(!folder.exists()){
    folder.mkdirs();
    }

    //生成html文件地址
    OutputStream outStream = new FileOutputStream(htmlFile);

    DOMSource domSource = new DOMSource(htmlDocument);
    StreamResult streamResult = new StreamResult(outStream);

    TransformerFactory factory = TransformerFactory.newInstance();
    Transformer serializer = factory.newTransformer();
    serializer.setOutputProperty(OutputKeys.ENCODING, "utf-8");
    serializer.setOutputProperty(OutputKeys.INDENT, "yes");
    serializer.setOutputProperty(OutputKeys.METHOD, "html");

    serializer.transform(domSource, streamResult);

    outStream.close();

    return htmlFile.getAbsolutePath();
    }

    /**
    * 2007版本word转换成html 2017-2-27
    * @param wordPath word文件路径
    * @param wordName word文件名称无后缀
    * @param suffix word文件后缀
    * @return
    * @throws IOException
    */
    public String Word2007ToHtml(String wordPath,String wordName,String suffix) throws IOException {
    String htmlPath = wordPath + File.separator + wordName + "_show" + File.separator;
    String htmlName = wordName + ".html";
    String imagePath = htmlPath + "image" + File.separator;

    //判断html文件是否存在
    File htmlFile = new File(htmlPath + htmlName);
    if(htmlFile.exists()){
    return htmlFile.getAbsolutePath();
    }

    //word文件
    File wordFile = new File(wordPath + File.separator + wordName + suffix);

    // 1) 加载word文档生成 XWPFDocument对象
    InputStream in = new FileInputStream(wordFile);
    XWPFDocument document = new XWPFDocument(in);

    // 2) 解析 XHTML配置 (这里设置IURIResolver来设置图片存放的目录)
    File imgFolder = new File(imagePath);
    XHTMLOptions options = XHTMLOptions.create();
    options.setExtractor(new FileImageExtractor(imgFolder));
    //html中图片的路径 相对路径
    options.URIResolver(new BasicURIResolver("image"));
    options.setIgnoreStylesIfUnused(false);
    options.setFragment(true);

    // 3) 将 XWPFDocument转换成XHTML
    //生成html文件上级文件夹
    File folder = new File(htmlPath);
    if(!folder.exists()){
    folder.mkdirs();
    }
    OutputStream out = new FileOutputStream(htmlFile);
    XHTMLConverter.getInstance().convert(document, out, options);

    return htmlFile.getAbsolutePath();
    }

    }

    附上百度网盘下载连接:

    链接:https://pan.baidu.com/s/1t_jXUq3CuhZo9j_UI4URAQ 密码:r2qi

  • 相关阅读:
    《Linux内核设计与实现》读书笔记(4) 中断和中断处理程序
    《Linux内核设计与实现》读书笔记(11) 内存管理(1)
    《Linux内核设计与实现》读书笔记(10) 定时器和时间管理(2)
    《Linux内核设计与实现》读书笔记(7) 内核同步方法(1)
    《Linux内核设计与实现》读书笔记(9) 定时器和时间管理(1)
    Mac os 10.7.1(Lion) 下vmware fusion里的windows有时无法工作在NAT模式下的问题
    偶然发现7年前受到的offer。7年,一晃而过
    IE下对文件(图片)进行base64转换
    IE这个bug真是弱爆了
    腾讯举办创意马拉松活动庆祝成立14周年
  • 原文地址:https://www.cnblogs.com/wadmwz/p/8926737.html
Copyright © 2011-2022 走看看