zoukankan      html  css  js  c++  java
  • poi解析word文档转换成html(包括图片解析)

    需求:将本地上传的word文档解析并放入数据库中

    代码:

    import java.io.ByteArrayOutputStream;
    import java.io.File;
    import java.io.FileNotFoundException;
    import java.io.IOException;
    import java.util.List;

    import javax.xml.parsers.DocumentBuilderFactory;
    import javax.xml.transform.OutputKeys;
    import javax.xml.transform.Transformer;
    import javax.xml.transform.TransformerFactory;
    import javax.xml.transform.dom.DOMSource;
    import javax.xml.transform.stream.StreamResult;

    import org.apache.poi.hwpf.HWPFDocument;
    import org.apache.poi.hwpf.converter.PicturesManager;
    import org.apache.poi.hwpf.converter.WordToHtmlConverter;
    import org.apache.poi.hwpf.usermodel.Picture;
    import org.apache.poi.hwpf.usermodel.PictureType;
    import org.springframework.beans.factory.annotation.Autowired;
    import org.springframework.stereotype.Service;
    import org.springframework.web.multipart.MultipartFile;
    import org.w3c.dom.Document;

    import com.google.common.io.Files;
    import com.zhaozhi.writing.service.service.FileService;
    import com.zhaozhi.writing.service.service.WordParseService;
    import com.zhaozhi.writing.service.util.FileUtil;

    @Service
    public class WordParseServiceImpl implements WordParseService {

    @Autowired
    private FileService fileService;

    @Override
    public String docToHtmlResult(MultipartFile file) throws Exception {
    HWPFDocument wordDocument = new HWPFDocument(file.getInputStream());

    Document document = DocumentBuilderFactory.newInstance().newDocumentBuilder().newDocument();
    WordToHtmlConverter wordToHtmlConverter = new WordToHtmlConverter(document);
    long currentTime = System.currentTimeMillis();
    //设置图片URL
    wordToHtmlConverter.setPicturesManager( new PicturesManager()
    {
    public String savePicture( byte[] content,
    PictureType pictureType, String suggestedName,
    float widthInches, float heightInches )
    {
    return FileUtil.OSS_DOMAIN+"/"+currentTime+"_"+suggestedName;
    }
    } );
    // save pictures
    List<Picture> pics = wordDocument.getPicturesTable().getAllPictures();
    if (pics != null) {
    for (int i = 0; i < pics.size(); i++) {
    Picture pic = (Picture) pics.get(i);
    System.out.println();
    try {
    String path = System.getProperty("java.io.tmpdir")+"/"+currentTime+"_"+pic.suggestFullFileName();
    File picFile = new File(path);
    Files.write(pic.getContent(),picFile );
    fileService.upload(picFile);
    } catch (FileNotFoundException e) {
    e.printStackTrace();
    } catch (IOException e) {
    e.printStackTrace();
    }
    }
    }

    wordToHtmlConverter.processDocument(wordDocument);
    Document htmlDocument = wordToHtmlConverter.getDocument();
    ByteArrayOutputStream out = new ByteArrayOutputStream();
    DOMSource domSource = new DOMSource(htmlDocument);
    StreamResult streamResult = new StreamResult(out);

    TransformerFactory tf = TransformerFactory.newInstance();
    Transformer serializer = tf.newTransformer();
    serializer.setOutputProperty(OutputKeys.ENCODING, "utf-8");
    serializer.setOutputProperty(OutputKeys.INDENT, "yes");
    serializer.setOutputProperty(OutputKeys.METHOD, "html");
    serializer.transform(domSource, streamResult);
    out.close();

    return new String(out.toByteArray());
    }

    }

    通过poi包中的WordToHtmlConverter类实现word与html的转换;

    关于图片的解析思路:先将word文档中的图片抽取出来,把图片流生成到tomcat的临时文件中(详见代码):

    String path = System.getProperty("java.io.tmpdir")+"/"+currentTime+"_"+pic.suggestFullFileName();
    File picFile = new File(path);
    Files.write(pic.getContent(),picFile );

    再将临时文件上传到阿里云服务器上生成图片url,最后将url地址放到html中进行展示即可;

  • 相关阅读:
    JS 心得总结
    zk 隐藏网页文件后缀
    zk label控件内容换行
    ZK 代码自动提示
    zk 获取session,request,servletContext,response
    zk回车事件
    zk jquery的使用
    zk listbox 点击列标题实现排序功能
    Django框架学习----视图与模板(最新文章实现)
    Django框架学习----视图与模板(分页功能)
  • 原文地址:https://www.cnblogs.com/yzf666/p/6958012.html
Copyright © 2011-2022 走看看