zoukankan html css js c++ java

最近一段时间全在导入，excel, word 还有bulabula。。。。。

　　导入excel已经日常化了，经常没事弄一个，但是word确实还是第一次弄，于是弄了个记录

贴一段代码

解析word首先要做的是用压缩软件解压word，你会得到一个包含document.xml的文件夹，然后研究下这个xml文件，配合我上一篇博客，就改懂了，但是，本文有个缺陷，word导入时共识无法确定位置，有好的方法，也请告诉我

先上pom

<!--emf 转 png -->
    <dependency>
      <groupId>org.apache.poi</groupId>
      <artifactId>ooxml-schemas</artifactId>
      <version>1.3</version>
    </dependency>
    <dependency>
      <groupId>org.freehep</groupId>
      <artifactId>freehep-graphicsio-emf</artifactId>
      <version>2.1.3</version>
    </dependency>
    <dependency>
      <groupId>org.freehep</groupId>
      <artifactId>freehep-io</artifactId>
      <version>2.0.5</version>
    </dependency>
    <!-- wmf转svg转png -->
    <dependency>
      <groupId>xml-apis</groupId>
      <artifactId>xml-apis-ext</artifactId>
      <version>1.3.04</version>
    </dependency>

    <!-- https://mvnrepository.com/artifact/org.apache.xmlgraphics/batik-codec -->
    <dependency>
      <groupId>org.apache.xmlgraphics</groupId>
      <artifactId>batik-codec</artifactId>
      <version>1.7</version>
    </dependency>
    <dependency>
      <groupId>net.arnx</groupId>
      <artifactId>wmf2svg</artifactId>
      <version>0.9.8</version>
    </dependency>

    <!-- https://mvnrepository.com/artifact/dom4j/dom4j -->
    <dependency>
      <groupId>dom4j</groupId>
      <artifactId>dom4j</artifactId>
      <version>1.6.1</version>
    </dependency>

再上关键代码

以下代码为读取一个段落的text其中的保存图片已根据自己项目改掉，又要复用的，请自行修改

public  String readImageInParagraph(XWPFDocument docx, XWPFParagraph paragraph, String path, String targetPath,HttpServletRequest request, HttpServletResponse response) throws Exception {
        StringBuilder sb = new StringBuilder();
        //图片索引List
        List<String> imageBundleList = new ArrayList<String>();
//        sb.append("<p>");
        //段落中所有XWPFRun
        List<XWPFRun> runList = paragraph.getRuns();

//        List<CTOMath> oMathList = paragraph.getCTP().getOMathList();

        //  公式存在  todo 公式导入
        if (!CollectionUtils.isEmpty(paragraph.getCTP().getOMathList())){
            return null;
        }

        for (XWPFRun run : runList) {
            //XWPFRun是POI对xml元素解析后生成的自己的属性，无法通过xml解析，需要先转化成CTR
            CTR ctr = run.getCTR();
            //对子元素进行遍历
            XmlCursor c = ctr.newCursor();
            //这个就是拿到所有的子元素：
            c.selectPath("./*");
            while (c.toNextSelection()) {
                XmlObject o = c.getObject();
                //如果子元素是<w:drawing>这样的形式，使用CTDrawing保存图片
                if (o instanceof CTDrawing) {
                    CTDrawing drawing = (CTDrawing) o;
                    CTInline[] ctInlines = drawing.getInlineArray();
                    for (CTInline ctInline : ctInlines) {
                        CTGraphicalObject graphic = ctInline.getGraphic();
                        //
                        XmlCursor cursor = graphic.getGraphicData().newCursor();
                        cursor.selectPath("./*");
                        while (cursor.toNextSelection()) {
                            XmlObject xmlObject = cursor.getObject(); // 如果子元素是<pic:pic>这样的形式
                            if (xmlObject instanceof CTPicture) {
                                org.openxmlformats.schemas.drawingml.x2006.picture.CTPicture picture = (org.openxmlformats.schemas.drawingml.x2006.picture.CTPicture) xmlObject;
                                //拿到元素的属性
//                                imageBundleList.add(picture.getBlipFill().getBlip().getEmbed());
                                String embed = picture.getBlipFill().getBlip().getEmbed();
                                XWPFPictureData pictureDataByID = docx.getPictureDataByID(embed);
                                String text = saveWordPic(pictureDataByID, path, targetPath, request, response);
                                sb.append(text);
                            }
                        }
                    }
                }
                //使用CTObject保存图片　　　　　　　　　　//<w:object>形式
                if (o instanceof CTObject) {
                    CTObject object = (CTObject) o;
                    XmlCursor w = object.newCursor();
                    w.selectPath("./*");
                    while (w.toNextSelection()) {
                        XmlObject xmlObject = w.getObject();
                        if (xmlObject instanceof CTShape) {
                            CTShape shape = (CTShape) xmlObject;
                            imageBundleList.add(shape.getImagedataArray()[0].getId2());
                            XWPFPictureData pictureDataByID = docx.getPictureDataByID(shape.getImagedataArray()[0].getId2());
                            String text = saveWordPic(pictureDataByID, path, targetPath, request, response);
                            sb.append(text);
                        }
                    }
                }
                if(o instanceof org.openxmlformats.schemas.wordprocessingml.x2006.main.CTPicture){
                    org.openxmlformats.schemas.wordprocessingml.x2006.main.CTPicture object = (org.openxmlformats.schemas.wordprocessingml.x2006.main.CTPicture) o;
                    XmlCursor w = object.newCursor();
                    w.selectPath("./*");
                    while (w.toNextSelection()) {
                        XmlObject xmlObject = w.getObject();
                        if (xmlObject instanceof CTShape) {
                            CTShape shape = (CTShape) xmlObject;
                            imageBundleList.add(shape.getImagedataArray()[0].getId2());
                            XWPFPictureData pictureDataByID = docx.getPictureDataByID(shape.getImagedataArray()[0].getId2());
                            String text = saveWordPic(pictureDataByID, path, targetPath, request, response);
                            sb.append(text);
                        }
                    }
                }
                if (o instanceof CTText){
                    CTText object = (CTText) o;
//                    System.err.println("文本 : " + object.getStringValue());
                    if (StringUtil.isNotEmpty(object.getStringValue())){
                        sb.append(object.getStringValue());
                    }
                }
            }
        }
        return sb.toString();
    }

saveWordPic  该方法将读取到的图片全部转为png

private String saveWordPic(XWPFPictureData pictureData, String path, String targetPath, HttpServletRequest request, HttpServletResponse response) throws Exception {
//        System.err.println(WebPathHelper.getUploadPath());
        StringBuilder sb = new StringBuilder("");
        byte[] data = pictureData.getData();
        String description = pictureData.getFileName();
        targetPath = path + File.separator + pictureData.getFileName();
        // 如果文件名是emf或者wmf,转换格式
        if(description.endsWith("emf") || description.endsWith("EMF")){
            EMFInputStream emfInputStream = new EMFInputStream(new ByteArrayInputStream(data));
            EMFRenderer emfRenderer = new EMFRenderer(emfInputStream);
            int width = (int)emfInputStream.readHeader().getBounds().getWidth();
            int height = (int)emfInputStream.readHeader().getBounds().getHeight();
//            System.out.println("widht = " + width + " and height = " + height);
            BufferedImage result = new BufferedImage(width+100, height+100, BufferedImage.TYPE_INT_ARGB);
            Graphics2D g2 = (Graphics2D)result.createGraphics();
            g2.setFont(new java.awt.Font("宋体",  java.awt.Font.BOLD, 20));
            response.setHeader("ContentType","image/png");
            emfRenderer.paint(g2);
            String png = replaceFileName(targetPath,"png");
            ImageIO.write(result, "png", new File(png));
            png = getReturnPath(png,request);
            sb.append("<img src="").append(png).append(""").append(">");

        }else if (description.endsWith("wmf") || description.endsWith("WMF")){
//              将wmf转换为svg
            String svg = replaceFileName(targetPath, "svg");
            convert(targetPath, svg);
            String name = convert2PNG(svg);
            name = getReturnPath(name,request);
            sb.append("<img src="").append(name).append(""").append(">");
        }else if (description.endsWith("png") || description.endsWith("jpeg") || description.endsWith("jpg")){
　　　　　　　　//  其实这里可以加各种类型，就是没
            String name = getReturnPath(PathUtil.join(path, description),request);
            sb.append("<img src="").append(name).append(""").append(">");
        }
        targetPath = PathUtil.join(path, description);
        File targetFile = new File(targetPath);
        targetFile.createNewFile();
        FileOutputStream out = new FileOutputStream(targetFile);
        out.write(data);
        out.close();
        return sb.toString();

剩下的都是自己的业务逻辑。。。请自行脑补，

接下来导入一个zip包。。。我是跟导入这个恶心的事情干上了么。。。哎，想换地方了

如有错误，请邮件zs253499660@sina.com,如有更好的方法，可以推荐

查看全文

相关阅读:
sql server 操作文件
 sql server T-sql查询执行顺序
 js 时间相关函数
 js页面：函数名 is not defined
C# 通过文件路径获取文件名
 WRAR下载及注册
 Java 中xml解析
 string 与 byte[] 互转时的注意事项
 Spring MVC表单标签
 java 中基本类型与字符串之间的互相转换

原文地址：https://www.cnblogs.com/senjiang/p/13964669.html