zoukankan      html  css  js  c++  java
  • C# Net 使用 openxml 提取ppt中的音频、视频、图片、文本

    C# Net 使用 openxml 提取ppt中的音频、视频、图片、文本

    名称空间:

    using System;
    using DocumentFormat.OpenXml.Packaging;
    using System.IO;
    using System.Linq;
    using DocumentFormat.OpenXml;
    using DocumentFormat.OpenXml.Presentation;
    using A = DocumentFormat.OpenXml.Drawing;
    using P14 = DocumentFormat.OpenXml.Office2010.PowerPoint;
    

      

    代码如下:

      

            public void PptInfo(string path)
            {
                using (var doc = PresentationDocument.Open(path, false))
                {
                    var presentation = doc.PresentationPart.Presentation;
    
                    foreach (SlideId slideId in presentation.SlideIdList)
                    {
                        SlidePart slidePart = doc.PresentationPart.GetPartById(slideId.RelationshipId) as SlidePart;
                        if (slidePart == null || slidePart.Slide == null)
                            continue;
    
                        //ppt中显示的真实编号
                        var SlideNumber = presentation.FirstSlideNum?.Value ?? 1 + presentation.SlideIdList.ToList().IndexOf(slideId);
    
                        Slide slide = slidePart.Slide;
    
                        //音频
                        var audioList = slide.Descendants<Audio>();
                        //视频
                        var videoList = slide.Descendants<Video>();
                        //图片
                        var picList = slide.CommonSlideData.ShapeTree.Descendants<Picture>().Where(o => !o.NonVisualPictureProperties.ApplicationNonVisualDrawingProperties.Any());
                        //文本框
                        var txBodyList = slide.CommonSlideData.ShapeTree.Descendants<TextBody>();
    
                        //提取音视频(将 audioList 换成 videoList )就行了
                        foreach (var media in audioList)
                        {
                            //音频关联的形状
                            var spTgt = media.CommonMediaNode.TargetElement.ShapeTarget;
                            //形状属性
                            var cNvPr = slide.Descendants<NonVisualDrawingProperties>().FirstOrDefault(o => o.Id == spTgt.ShapeId);
    
                            //形状信息
                            var ShapeId = cNvPr.Id.Value;
                            var ShapeName = cNvPr.Name.Value;
                            var ShapeDescr = cNvPr.Description?.Value;
    
                            //上级和上上级
                            var nvPicPr = (NonVisualPictureProperties)cNvPr.Parent;
                            var pic = (Picture)nvPicPr.Parent;
    
                            //音频文件关联
                            var audioFile = nvPicPr.ApplicationNonVisualDrawingProperties.Elements<A.AudioFromFile>().FirstOrDefault();
                            ////视频文件关联
                            //var videoFile = nvPicPr.ApplicationNonVisualDrawingProperties.Elements<A.VideoFromFile>().FirstOrDefault();
    
                            //获取音视频文件 外部/内部
                            var uri = slidePart.ExternalRelationships.FirstOrDefault(o => o.Id == audioFile.Link)?.Uri;//外部关系
                            if (uri == null)
                            {
                                var dataPartReferenceRelationship = slidePart.DataPartReferenceRelationships.FirstOrDefault(o => o.Id == audioFile.Link);//内部关系
                                var mediaStream = dataPartReferenceRelationship.DataPart.GetStream();
                                uri = dataPartReferenceRelationship?.Uri;
                            }
    
                            //媒体文件关联的图片
                            var embed = pic.BlipFill.Blip.Embed.Value;
                            var part = slidePart.GetPartById(embed);
                            var imgStream = part.GetStream();
                        }
    
                        //提取图片
                        foreach (var pic in picList)
                        {
                            var cNvPr = pic.NonVisualPictureProperties.NonVisualDrawingProperties;
    
                            //形状信息
                            var ShapeId = cNvPr.Id.Value;
                            var ShapeName = cNvPr.Name.Value;
                            var ShapeDescr = cNvPr.Description?.Value;
    
                            //获取图片
                            var embed = pic.BlipFill.Blip.Embed.Value;
                            var part = slidePart.GetPartById(embed);
                            var imgStream = part.GetStream();
                        }
    
                        //提取文本
                        foreach (var txBody in txBodyList)
                        {
                            //上级
                            var sp = (Shape)txBody.Parent;
                            //形状属性
                            var cNvPr = sp.NonVisualShapeProperties.NonVisualDrawingProperties;
    
                            //形状信息
                            var ShapeId = cNvPr.Id.Value;
                            var ShapeName = cNvPr.Name.Value;
    
                            //获取文本信息
                            //方式1
                            var text = txBody.InnerText;
                            //方式2
                            var texts = txBody.Descendants<A.Text>();
                            text = string.Join(null, texts.Select(o => o.Text));
                            //获取文本信息(含段落)
                            var ps = txBody.Descendants<A.Paragraph>();
                            text = string.Join(Environment.NewLine, ps.Select(o => o.InnerText));
                        }
    
                    }
                }
            }
    

      

    ppt文档的形状结构大概为:

    完毕

  • 相关阅读:
    Flume入门与进阶
    git如何忽略已经加入版本控制的文件
    Redis常用命令
    如何在宝塔面板上添加创建一个定时任务
    PHP代码篇(九)PHP接口开发如何使用JWT进行验证身份
    七. Go并发编程--sync.Once
    六. Go并发编程--WaitGroup
    5. Go 并发编程--sync/atomic
    4. Go并发编程--Mutex/RWMutex
    docker内服务访问宿主机服务
  • 原文地址:https://www.cnblogs.com/ping9719/p/13497923.html
Copyright © 2011-2022 走看看