zoukankan      html  css  js  c++  java
  • poi解析word文档转换成html(包括图片解析)

    需求:将本地上传的word文档解析并放入数据库中

    代码:

    import java.io.ByteArrayOutputStream;
    import java.io.File;
    import java.io.FileNotFoundException;
    import java.io.IOException;
    import java.util.List;

    import javax.xml.parsers.DocumentBuilderFactory;
    import javax.xml.transform.OutputKeys;
    import javax.xml.transform.Transformer;
    import javax.xml.transform.TransformerFactory;
    import javax.xml.transform.dom.DOMSource;
    import javax.xml.transform.stream.StreamResult;

    import org.apache.poi.hwpf.HWPFDocument;
    import org.apache.poi.hwpf.converter.PicturesManager;
    import org.apache.poi.hwpf.converter.WordToHtmlConverter;
    import org.apache.poi.hwpf.usermodel.Picture;
    import org.apache.poi.hwpf.usermodel.PictureType;
    import org.springframework.beans.factory.annotation.Autowired;
    import org.springframework.stereotype.Service;
    import org.springframework.web.multipart.MultipartFile;
    import org.w3c.dom.Document;

    import com.google.common.io.Files;
    import com.zhaozhi.writing.service.service.FileService;
    import com.zhaozhi.writing.service.service.WordParseService;
    import com.zhaozhi.writing.service.util.FileUtil;

    @Service
    public class WordParseServiceImpl implements WordParseService {

    @Autowired
    private FileService fileService;

    @Override
    public String docToHtmlResult(MultipartFile file) throws Exception {
    HWPFDocument wordDocument = new HWPFDocument(file.getInputStream());

    Document document = DocumentBuilderFactory.newInstance().newDocumentBuilder().newDocument();
    WordToHtmlConverter wordToHtmlConverter = new WordToHtmlConverter(document);
    long currentTime = System.currentTimeMillis();
    //设置图片URL
    wordToHtmlConverter.setPicturesManager( new PicturesManager()
    {
    public String savePicture( byte[] content,
    PictureType pictureType, String suggestedName,
    float widthInches, float heightInches )
    {
    return FileUtil.OSS_DOMAIN+"/"+currentTime+"_"+suggestedName;
    }
    } );
    // save pictures
    List<Picture> pics = wordDocument.getPicturesTable().getAllPictures();
    if (pics != null) {
    for (int i = 0; i < pics.size(); i++) {
    Picture pic = (Picture) pics.get(i);
    System.out.println();
    try {
    String path = System.getProperty("java.io.tmpdir")+"/"+currentTime+"_"+pic.suggestFullFileName();
    File picFile = new File(path);
    Files.write(pic.getContent(),picFile );
    fileService.upload(picFile);
    } catch (FileNotFoundException e) {
    e.printStackTrace();
    } catch (IOException e) {
    e.printStackTrace();
    }
    }
    }

    wordToHtmlConverter.processDocument(wordDocument);
    Document htmlDocument = wordToHtmlConverter.getDocument();
    ByteArrayOutputStream out = new ByteArrayOutputStream();
    DOMSource domSource = new DOMSource(htmlDocument);
    StreamResult streamResult = new StreamResult(out);

    TransformerFactory tf = TransformerFactory.newInstance();
    Transformer serializer = tf.newTransformer();
    serializer.setOutputProperty(OutputKeys.ENCODING, "utf-8");
    serializer.setOutputProperty(OutputKeys.INDENT, "yes");
    serializer.setOutputProperty(OutputKeys.METHOD, "html");
    serializer.transform(domSource, streamResult);
    out.close();

    return new String(out.toByteArray());
    }

    }

    通过poi包中的WordToHtmlConverter类实现word与html的转换;

    关于图片的解析思路:先将word文档中的图片抽取出来,把图片流生成到tomcat的临时文件中(详见代码):

    String path = System.getProperty("java.io.tmpdir")+"/"+currentTime+"_"+pic.suggestFullFileName();
    File picFile = new File(path);
    Files.write(pic.getContent(),picFile );

    再将临时文件上传到阿里云服务器上生成图片url,最后将url地址放到html中进行展示即可;

  • 相关阅读:
    springBoot启动异常 Failed to load ApplicationContext
    mysql存储过程
    springBoot集成Swagger
    groupmems命令:更改和查看组成员 和 usermod命令修改组
    javaBean简介
    Angular获取dom元素,以及父子组建之间相互传值
    Lambda表达式
    坐标转换
    扩展方法
    Binding的Path(路径)
  • 原文地址:https://www.cnblogs.com/yzf666/p/6958012.html
Copyright © 2011-2022 走看看