zoukankan      html  css  js  c++  java
  • 使用POI把Word Excel转为HTML

    此方法是针对Office2003的,但是word中如果有图片,图片能够解析出来但是HTML文件中不显示。也不支持excel中的图片解析。

    所需jar包如下如下:

    1:PoiUtil.java

    package com.wzh.poi;
    
    import java.io.BufferedWriter;
    import java.io.ByteArrayOutputStream;
    import java.io.File;
    import java.io.FileInputStream;
    import java.io.FileNotFoundException;
    import java.io.FileOutputStream;
    import java.io.IOException;
    import java.io.InputStream;
    import java.io.OutputStreamWriter;
    import java.io.UnsupportedEncodingException;
    import java.util.List;
    import java.util.logging.Level;
    import java.util.logging.Logger;
    import javax.xml.parsers.DocumentBuilderFactory;
    import javax.xml.parsers.ParserConfigurationException;
    import javax.xml.transform.OutputKeys;
    import javax.xml.transform.Transformer;
    import javax.xml.transform.TransformerConfigurationException;
    import javax.xml.transform.TransformerException;
    import javax.xml.transform.TransformerFactory;
    import javax.xml.transform.dom.DOMSource;
    import javax.xml.transform.stream.StreamResult;
    import org.apache.poi.hssf.converter.ExcelToHtmlConverter;
    import org.apache.poi.hssf.usermodel.HSSFWorkbook;
    import org.apache.poi.hwpf.HWPFDocument;
    import org.apache.poi.hwpf.converter.PicturesManager;
    import org.apache.poi.hwpf.converter.WordToHtmlConverter;
    import org.apache.poi.hwpf.usermodel.Picture;
    import org.apache.poi.hwpf.usermodel.PictureType;
    import org.w3c.dom.Document;
    
    /**
     * @date 2015-3-16 17:22:05
     * @author y
     * @desc
     */
    public class PoiUtil {
    
        /**
         * Excel 转为 HTML
         * @param fileName
         * @param outputFile
         * @throws FileNotFoundException
         * @throws IOException
         * @throws ParserConfigurationException
         * @throws TransformerConfigurationException
         * @throws TransformerException 
         */
        public static void excelToHtml(String fileName, String outputFile)
                throws FileNotFoundException, IOException, ParserConfigurationException, 
                    TransformerConfigurationException, TransformerException {
            InputStream is = new FileInputStream(fileName);
    
            HSSFWorkbook excelBook = new HSSFWorkbook(is);
    
            ExcelToHtmlConverter ethc = new ExcelToHtmlConverter(
                    DocumentBuilderFactory.newInstance().newDocumentBuilder().newDocument());
            ethc.setOutputColumnHeaders(false);
            ethc.setOutputRowNumbers(false);
    
            ethc.processWorkbook(excelBook);
    
            Document htmlDocument = ethc.getDocument();
            ByteArrayOutputStream out = new ByteArrayOutputStream();
            DOMSource domSource = new DOMSource(htmlDocument);
            StreamResult streamResult = new StreamResult(out);
            
            TransformerFactory tf = TransformerFactory.newInstance();
            Transformer serializer = tf.newTransformer();
            serializer.setOutputProperty(OutputKeys.ENCODING, "UTF-8");
            serializer.setOutputProperty(OutputKeys.INDENT, "yes");
            serializer.setOutputProperty(OutputKeys.METHOD, "html");
            serializer.transform(domSource, streamResult);
            out.close();
            
            String htmlStr = new String(out.toByteArray());
            
            htmlStr = htmlStr.replace("<h2>Sheet1</h2>", "")
                             .replace("<h2>Sheet2</h2>", "")
                             .replace("<h2>Sheet3</h2>", "")
                             .replace("<h2>Sheet4</h2>", "")
                             .replace("<h2>Sheet5</h2>", "");
            
            writeFile(htmlStr, outputFile);
        }
    
        /**
         * Word 转为 HTML
         *
         * @param fileName
         * @param outputFile
         * @throws IOException
         * @throws ParserConfigurationException
         * @throws TransformerException
         */
        public static void wordToHtml(String fileName, String outputFile) throws
                IOException, ParserConfigurationException, TransformerException {
            HWPFDocument wordDoc = new HWPFDocument(new FileInputStream(fileName));
    
            WordToHtmlConverter wthc = new WordToHtmlConverter(
                    DocumentBuilderFactory.newInstance().newDocumentBuilder().newDocument());
    
            wthc.setPicturesManager(new PicturesManager() {
    
                @Override
                public String savePicture(byte[] bytes, PictureType pt, String string, float f, float f1) {
                    return string;
                }
    
            });
    
            wthc.processDocument(wordDoc);
    
            List<Picture> pics = wordDoc.getPicturesTable().getAllPictures();
            if (null != pics && pics.size() > 0) {
                for (Picture pic : pics) {
                    pic.writeImageContent(new FileOutputStream(pic.suggestFullFileName()));
                }
            }
    
            Document htmlDocument = wthc.getDocument();
            ByteArrayOutputStream out = new ByteArrayOutputStream();
            DOMSource domSource = new DOMSource(htmlDocument);
            StreamResult streamResult = new StreamResult(out);
    
            TransformerFactory tf = TransformerFactory.newInstance();
            Transformer serializer = tf.newTransformer();
            serializer.setOutputProperty(OutputKeys.ENCODING, "UTF-8");
            serializer.setOutputProperty(OutputKeys.INDENT, "yes");
            serializer.setOutputProperty(OutputKeys.METHOD, "html");
            serializer.transform(domSource, streamResult);
    
            out.close();
    
            String htmlStr = new String(out.toByteArray());
            writeFile(htmlStr, outputFile);
        }
    
        public static void writeFile(String content, String path) {
            FileOutputStream fos = null;
            BufferedWriter bw = null;
    
            File file = new File(path);
    
            try {
                fos = new FileOutputStream(file);
    
                bw = new BufferedWriter(new OutputStreamWriter(fos, "UTF-8"));
                bw.write(content);
            } catch (FileNotFoundException ex) {
                Logger.getLogger(PoiUtil.class.getName()).log(Level.SEVERE, null, ex);
            } catch (UnsupportedEncodingException ex) {
                Logger.getLogger(PoiUtil.class.getName()).log(Level.SEVERE, null, ex);
            } catch (IOException ex) {
                Logger.getLogger(PoiUtil.class.getName()).log(Level.SEVERE, null, ex);
            } finally {
                try {
                    if (null != bw) {
                        bw.close();
                    }
                    if (null != fos) {
                        fos.close();
                    }
                } catch (IOException ex) {
                    Logger.getLogger(PoiUtil.class.getName()).log(Level.SEVERE, null, ex);
                }
    
            }
        }
    }

    2.Test.java

    import com.wzh.poi.PoiUtil;
    import java.io.IOException;
    import java.util.logging.Level;
    import java.util.logging.Logger;
    import javax.xml.parsers.ParserConfigurationException;
    import javax.xml.transform.TransformerException;
    
    
    
    /*
     * To change this license header, choose License Headers in Project Properties.
     * To change this template file, choose Tools | Templates
     * and open the template in the editor.
     */
    
    /**
     *
     * @author y
     */
    public class Test {
    
        /**
         * @param args the command line arguments
         */
        public static void main(String[] args) {
            try {
                PoiUtil.excelToHtml("t2.xls", "test.html");
            } catch (IOException ex) {
                Logger.getLogger(Test.class.getName()).log(Level.SEVERE, null, ex);
            } catch (ParserConfigurationException ex) {
                Logger.getLogger(Test.class.getName()).log(Level.SEVERE, null, ex);
            } catch (TransformerException ex) {
                Logger.getLogger(Test.class.getName()).log(Level.SEVERE, null, ex);
            }
            
        }
        
    }
  • 相关阅读:
    保险行业电话外呼型呼叫中心方案
    12355青少年服务台呼叫中心解决方案
    未能找到类型集或命名空间名称 "xxxxxx" (是否缺少using 指令或引用?)
    Smarty中section的使用
    什么是Asterisk,它如何帮助我们的呼叫中心?
    高效呼叫中心的8个健康工作习惯
    Python 爬起数据时 'gbk' codec can't encode character 'xa0' 的问题
    Python 网页解析器
    Python 爬虫入门3种方法
    Python open 读写小栗子
  • 原文地址:https://www.cnblogs.com/yshyee/p/4342717.html
Copyright © 2011-2022 走看看