zoukankan      html  css  js  c++  java
  • C# Net 使用 openxml 提取ppt中的音频、视频、图片、文本

    C# Net 使用 openxml 提取ppt中的音频、视频、图片、文本

    名称空间:

    using System;
    using DocumentFormat.OpenXml.Packaging;
    using System.IO;
    using System.Linq;
    using DocumentFormat.OpenXml;
    using DocumentFormat.OpenXml.Presentation;
    using A = DocumentFormat.OpenXml.Drawing;
    using P14 = DocumentFormat.OpenXml.Office2010.PowerPoint;
    

      

    代码如下:

      

            public void PptInfo(string path)
            {
                using (var doc = PresentationDocument.Open(path, false))
                {
                    var presentation = doc.PresentationPart.Presentation;
    
                    foreach (SlideId slideId in presentation.SlideIdList)
                    {
                        SlidePart slidePart = doc.PresentationPart.GetPartById(slideId.RelationshipId) as SlidePart;
                        if (slidePart == null || slidePart.Slide == null)
                            continue;
    
                        //ppt中显示的真实编号
                        var SlideNumber = presentation.FirstSlideNum?.Value ?? 1 + presentation.SlideIdList.ToList().IndexOf(slideId);
    
                        Slide slide = slidePart.Slide;
    
                        //音频
                        var audioList = slide.Descendants<Audio>();
                        //视频
                        var videoList = slide.Descendants<Video>();
                        //图片
                        var picList = slide.CommonSlideData.ShapeTree.Descendants<Picture>().Where(o => !o.NonVisualPictureProperties.ApplicationNonVisualDrawingProperties.Any());
                        //文本框
                        var txBodyList = slide.CommonSlideData.ShapeTree.Descendants<TextBody>();
    
                        //提取音视频(将 audioList 换成 videoList )就行了
                        foreach (var media in audioList)
                        {
                            //音频关联的形状
                            var spTgt = media.CommonMediaNode.TargetElement.ShapeTarget;
                            //形状属性
                            var cNvPr = slide.Descendants<NonVisualDrawingProperties>().FirstOrDefault(o => o.Id == spTgt.ShapeId);
    
                            //形状信息
                            var ShapeId = cNvPr.Id.Value;
                            var ShapeName = cNvPr.Name.Value;
                            var ShapeDescr = cNvPr.Description?.Value;
    
                            //上级和上上级
                            var nvPicPr = (NonVisualPictureProperties)cNvPr.Parent;
                            var pic = (Picture)nvPicPr.Parent;
    
                            //音频文件关联
                            var audioFile = nvPicPr.ApplicationNonVisualDrawingProperties.Elements<A.AudioFromFile>().FirstOrDefault();
                            ////视频文件关联
                            //var videoFile = nvPicPr.ApplicationNonVisualDrawingProperties.Elements<A.VideoFromFile>().FirstOrDefault();
    
                            //获取音视频文件 外部/内部
                            var uri = slidePart.ExternalRelationships.FirstOrDefault(o => o.Id == audioFile.Link)?.Uri;//外部关系
                            if (uri == null)
                            {
                                var dataPartReferenceRelationship = slidePart.DataPartReferenceRelationships.FirstOrDefault(o => o.Id == audioFile.Link);//内部关系
                                var mediaStream = dataPartReferenceRelationship.DataPart.GetStream();
                                uri = dataPartReferenceRelationship?.Uri;
                            }
    
                            //媒体文件关联的图片
                            var embed = pic.BlipFill.Blip.Embed.Value;
                            var part = slidePart.GetPartById(embed);
                            var imgStream = part.GetStream();
                        }
    
                        //提取图片
                        foreach (var pic in picList)
                        {
                            var cNvPr = pic.NonVisualPictureProperties.NonVisualDrawingProperties;
    
                            //形状信息
                            var ShapeId = cNvPr.Id.Value;
                            var ShapeName = cNvPr.Name.Value;
                            var ShapeDescr = cNvPr.Description?.Value;
    
                            //获取图片
                            var embed = pic.BlipFill.Blip.Embed.Value;
                            var part = slidePart.GetPartById(embed);
                            var imgStream = part.GetStream();
                        }
    
                        //提取文本
                        foreach (var txBody in txBodyList)
                        {
                            //上级
                            var sp = (Shape)txBody.Parent;
                            //形状属性
                            var cNvPr = sp.NonVisualShapeProperties.NonVisualDrawingProperties;
    
                            //形状信息
                            var ShapeId = cNvPr.Id.Value;
                            var ShapeName = cNvPr.Name.Value;
    
                            //获取文本信息
                            //方式1
                            var text = txBody.InnerText;
                            //方式2
                            var texts = txBody.Descendants<A.Text>();
                            text = string.Join(null, texts.Select(o => o.Text));
                            //获取文本信息(含段落)
                            var ps = txBody.Descendants<A.Paragraph>();
                            text = string.Join(Environment.NewLine, ps.Select(o => o.InnerText));
                        }
    
                    }
                }
            }
    

      

    ppt文档的形状结构大概为:

    完毕

  • 相关阅读:
    【leetcode】Binary Search Tree Iterator
    【leetcode】Palindrome Partitioning II
    【leetcode】Best Time to Buy and Sell Stock III
    【leetcode】Best Time to Buy and Sell Stock II
    【leetcode】Longest Consecutive Sequence
    【leetcode】Factorial Trailing Zeroes
    【leetcode】Simplify Path
    【leetcode】Generate Parentheses
    【leetcode】Combination Sum II
    【leetcode】Combination Sum
  • 原文地址:https://www.cnblogs.com/ping9719/p/13497923.html
Copyright © 2011-2022 走看看