zoukankan      html  css  js  c++  java
  • [JavaWeb基础] 025.JAVA把word转换成html

    用第三方插件POIword文档转换成HTML,下面直接上代码

    package com.babybus.sdteam.wordtopdf;
    
    import java.io.BufferedWriter;
    import java.io.ByteArrayOutputStream;
    import java.io.File;
    import java.io.FileInputStream;
    import java.io.FileNotFoundException;
    import java.io.FileOutputStream;
    import java.io.IOException;
    import java.io.OutputStreamWriter;
    import java.util.List;
    
    import javax.xml.parsers.DocumentBuilderFactory;
    import javax.xml.parsers.ParserConfigurationException;
    import javax.xml.transform.OutputKeys;
    import javax.xml.transform.Transformer;
    import javax.xml.transform.TransformerConfigurationException;
    import javax.xml.transform.TransformerException;
    import javax.xml.transform.TransformerFactory;
    import javax.xml.transform.dom.DOMSource;
    import javax.xml.transform.stream.StreamResult;
    
    import org.apache.poi.hwpf.usermodel.Picture;
    import org.apache.poi.hwpf.HWPFDocument;
    import org.apache.poi.hwpf.converter.PicturesManager;
    import org.apache.poi.hwpf.converter.WordToHtmlConverter;
    import org.apache.poi.hwpf.usermodel.PictureType;
    import org.jsoup.Jsoup;
    
    import org.w3c.dom.Document;
    
    public class WordToHtml {
    
    	/**
    	 * 转换word到html
    	 * 
    	 * @param path
    	 * @return
    	 * @throws IOException
    	 * @throws FileNotFoundException
    	 * @throws ParserConfigurationException
    	 * @throws TransformerException
    	 */
    	public static String convertWordToHtml(String path)
    			throws FileNotFoundException, IOException,
    			ParserConfigurationException, TransformerException {
    		// 转换的结果路径
    		String htmlPath = "D://test//1.html";
    		// 创建word文档
    		HWPFDocument wordDocument = new HWPFDocument(new FileInputStream(path));
    		// 兼容2007 以上版本
    		// XSSFWorkbook xssfwork=new XSSFWorkbook(new FileInputStream(fileName));
    		
    		// 创建一个转换器
    		WordToHtmlConverter wordToHtmlConverter = new WordToHtmlConverter(
    				DocumentBuilderFactory.newInstance().newDocumentBuilder()
    						.newDocument());
    		// 设置图片管理器
    		wordToHtmlConverter.setPicturesManager(new PicturesManager() {
    			public String savePicture(byte[] content, PictureType pictureType,
    					String suggestedName, float widthInches, float heightInches) {
    				return "test/" + suggestedName;
    			}
    		});
    		
    		// 处理word文档
    		wordToHtmlConverter.processDocument(wordDocument);
    		
    		// 保存图片集合
    		List pics = wordDocument.getPicturesTable().getAllPictures();
    		if (pics != null) {
    			for (int i = 0; i < pics.size(); i++) {
    				Picture pic = (Picture) pics.get(i);
    				try {
    					pic.writeImageContent(new FileOutputStream("D:/test/"
    							+ pic.suggestFullFileName()));
    				} catch (FileNotFoundException e) {
    					e.printStackTrace();
    				}
    			}
    		}
    		
    		// 取出转换的文档
    		Document htmlDocument = wordToHtmlConverter.getDocument();
            
    		// 创建输出流 和创建DOM源
    		ByteArrayOutputStream out = new ByteArrayOutputStream();
    		DOMSource domSource = new DOMSource(htmlDocument);
    		StreamResult streamResult = new StreamResult(out);
            // 转换工厂
    		TransformerFactory tf = TransformerFactory.newInstance();
    		Transformer serializer = tf.newTransformer();
    		serializer.setOutputProperty(OutputKeys.ENCODING, "GB2312");
    		serializer.setOutputProperty(OutputKeys.INDENT, "yes");
    		serializer.setOutputProperty(OutputKeys.METHOD, "HTML");
    		serializer.transform(domSource, streamResult);
    		out.close();
    		// 写入文件
    		writeFile(new String(out.toByteArray()), htmlPath);
    
    		return htmlPath;
    	}
    
    	/**
    	 * 写入文件
    	 * 
    	 * @param content
    	 * @param path
    	 */
    	public static void writeFile(String content, String path) {
    		FileOutputStream fos = null;
    		BufferedWriter bw = null;
    		org.jsoup.nodes.Document doc = Jsoup.parse(content);
    		content = doc.html();
    		try {
    			File file = new File(path);
    			fos = new FileOutputStream(file);
    			bw = new BufferedWriter(new OutputStreamWriter(fos, "GB2312"));
    			bw.write(content);
    		} catch (FileNotFoundException fnfe) {
    			fnfe.printStackTrace();
    		} catch (IOException ioe) {
    			ioe.printStackTrace();
    		} finally {
    			try {
    				if (bw != null)
    					bw.close();
    				if (fos != null)
    					fos.close();
    			} catch (IOException ie) {
    			}
    		}
    	}
    }
    

     编码必须用GB2312,用UTF8会有乱码问题。

     

    本站文章为 宝宝巴士 SD.Team 原创,转载务必在明显处注明:(作者官方网站: 宝宝巴士 

    转载自【宝宝巴士SuperDo团队】 原文链接: http://www.cnblogs.com/superdo/p/4893022.html

  • 相关阅读:
    [Python] Array Attributes of Numpy lib
    《火球——UML大战需求分析》(第2章 耗尽脑汁的需求分析工作)——2.1 需求分析面面观
    UVA 10201 Adventures in Moving
    《史蒂夫·乔布斯传》官方正式中文版电子书(高清晰完整版)
    为什么要用BitSet
    sed 技巧一例:特定位置插入
    Mac+IPAD上使用wireshark抓包
    【经验谈】XmlSerializer的坑
    HTML语言简单回顾
    不可思议的每日培训(1)——日复一日的每日分享
  • 原文地址:https://www.cnblogs.com/superdo/p/4893022.html
Copyright © 2011-2022 走看看