zoukankan      html  css  js  c++  java
  • 使用C#程序处理PowerPoint文件中的字符串

    最近, 有同事偶然发现Microsoft  Office PowerPoint可以被看作是一个压缩包,然后通过WinRAR解压出来一组XML文件。解压出来的文件包括:

    一个索引文件名称为:[Content_Types].xml,

    一个名为ppt的文件夹,在其内有两个重要的子文件夹:slides 和notesSlides

    其中, [Content_Types].xml记录了每一张Slide的相对路径,每一个Slide note的相对路径。其内容如下图:

    我们发现PPT中的所有内容都被记录在XML的<a:t></a:t>节点中, 所以,我们通过把所有a:t节点的内容导出,然后对内容进行修改,修改后再替换回原文件,这样将这一组文件进行压缩,生成了修改后的PowerPoint文件,该过程为PowerPoint的内容本地化提供了便捷途径。

    这种做法相比较于调用Microsoft.Office.Interop.PowerPoint中的API的做法来说, 保留了原文的100%的格式,不需要后期PPT刷格式的操作。

    以下是我们写的C#代码, 思路是将每张Slide的字符串导出到一个txt文件,通过trados翻译txt文件中的字符串,然后将修改后内容导入到PPT包内相应的XML文件中。

    PPTZIPCommon

    class PPTZIPCommon
        {
            /// <summary>
            /// read file [Content_Types].xml
            /// </summary>
            /// <param name="root"></param>
            /// <param name="SlideFiles">return slides </param>
            /// <param name="NotesFiles">return slide notes</param>
            internal static void ReadContentTypes(string root, ref List<string> SlideFiles, ref List<string> NotesFiles)
            {
                string ct_file = @"[Content_Types].xml";
                string ct_fullName = Path.Combine(root, ct_file);
    
                if (!File.Exists(ct_fullName))
                {
                    MessageBox.Show(string.Format("the [Content_Types].xml not exist in {0}", root));
                    return;
                }
                XmlDocument xml_doc = new XmlDocument();
                xml_doc.Load(ct_fullName);
                XmlElement rootElement = xml_doc.DocumentElement;
                string slide_types = "application/vnd.openxmlformats-officedocument.presentationml.slide+xml";
                string notes_types = "application/vnd.openxmlformats-officedocument.presentationml.notesSlide+xml";
    
                XmlNodeList nodes = rootElement.ChildNodes;
                foreach (XmlElement node in nodes)
                {
                    if (node.Attributes["ContentType"].Value == slide_types)
                    {
                        string relatedPath = node.Attributes["PartName"].Value.Remove(0, 1).Replace(@"/", @"");
                        string file = Path.Combine(root, relatedPath);
                        SlideFiles.Add(file);
                    }
                    else if (node.Attributes["ContentType"].Value == notes_types)
                    {
                        string relatedPath = node.Attributes["PartName"].Value.Remove(0, 1).Replace(@"/", @"");
                        string file = Path.Combine(root, relatedPath);
                        NotesFiles.Add(file);
                    }
                }
            }
    
            internal static string GetPPTNameFromFullPath(string scanFolder)
            {
                int lastIndexOfSlash = scanFolder.LastIndexOf(@"");
                if (lastIndexOfSlash == scanFolder.Length - 1)
                {
                    scanFolder = scanFolder.Remove(lastIndexOfSlash);
                }
    
                string lastString = scanFolder.Substring(scanFolder.LastIndexOf(@"") + 1);
                string[] names = lastString.Split(new string[] { "." }, StringSplitOptions.RemoveEmptyEntries);
                return names[0];
            }
        }

    PPTZIP

    class PPTZIP
        {
            private static List<string> SlideFiles = new List<string>();
            private static List<string> NotesFiles = new List<string>();
                 
            /// <summary>
            /// collect together all the <a:t>...</a:t> strings, put it in txt file
            /// txt file be saved to output<original PPT name>_<fileName>.txt
            /// </summary>
            /// <param name="file">xml file that contains <a:t>...</a:t></param>
            /// <param name="output">the txt file be saved to the output folder</param>
            /// <param name="pptName">original PowerPoint file name</param>
            private static void ReadATContent2TXT(string file, string output, string pptName)
            {
                StringBuilder sb = new StringBuilder();
                using (StreamReader reader = new StreamReader(file))
                {
                    string content = reader.ReadToEnd();
                    string pattern = @"<a:t>.[^<>]+</a:t>";
                    MatchCollection mc = Regex.Matches(content, pattern);
    
                    for (int i = 0; i < mc.Count; i++)
                    {
                        sb.AppendLine(string.Format("{0}^", mc[i].Value.Substring(5, mc[i].Value.LastIndexOf("<") - 5)));
                    }
                }
    
                FileInfo fi = new FileInfo(file);
                string txtFile = Path.Combine(output, pptName+"_"+fi.Name + ".txt");
                using (StreamWriter writer = new StreamWriter(txtFile))
                {
                    writer.Write(sb.ToString().Trim());
                    writer.Flush();
                    writer.Close();
                }
            }
    
            public static void Export2TXTs(string scanFolder)
            {
                string ppt_name = PPTZIPCommon.GetPPTNameFromFullPath(scanFolder);
    
                PPTZIPCommon.ReadContentTypes(scanFolder, ref SlideFiles, ref NotesFiles);
    
                if (null != SlideFiles && SlideFiles.Count > 0)
                {
                    foreach (var file in SlideFiles)
                    {
                        string outputfolder = Path.Combine(scanFolder, "SlideTXTs");
                        if (!Directory.Exists(outputfolder))
                            Directory.CreateDirectory(outputfolder);
                        string transFolder = Path.Combine(scanFolder, "SlideTXTs_Trans");
                        if (!Directory.Exists(transFolder))
                            Directory.CreateDirectory(transFolder);
    
                        ReadATContent2TXT(file, outputfolder, ppt_name);
                    }
                }
    
                if (null != NotesFiles && NotesFiles.Count > 0)
                {
                    foreach (var file in NotesFiles)
                    {
                        string outputfolder = Path.Combine(scanFolder, "NotesTXTs");
                        if (!Directory.Exists(outputfolder))
                            Directory.CreateDirectory(outputfolder);
                        string transFolder = Path.Combine(scanFolder, "NotesTXTs_Trans");
                        if (!Directory.Exists(transFolder))
                            Directory.CreateDirectory(transFolder);
    
                        ReadATContent2TXT(file, outputfolder,ppt_name);
                    }
    
                }
            }
        }

    PPTZIPWriter

    class PPTZIPWriter
        {
            private static List<string> SlideFiles = new List<string>();
            private static List<string> NotesFiles = new List<string>();              
             
            private static void Replace(string file, List<string> original, List<string> translated)
            {
                string content = string.Empty;
                using (StreamReader reader = new StreamReader(file))
                {
                    content = reader.ReadToEnd();
                    for (int i = 0; i < original.Count; i++)
                    {
                        content = content.Replace(string.Format("<a:t>{0}</a:t>", original[i]), string.Format("<a:t>{0}</a:t>", translated[i]));
                    }
    
                    reader.Close();
                }
    
                using (StreamWriter writer = new StreamWriter(file))
                {
                    writer.Write(content);
                    writer.Flush();
                    writer.Close();
                }
    
            }
    
            public static void Import2PPT(string scanFolder, string lan)
            {
                string ppt_name = PPTZIPCommon.GetPPTNameFromFullPath(scanFolder);
    
                // fullfill the two lists: SlideFiles and NotesFiles
                PPTZIPCommon.ReadContentTypes(scanFolder,ref SlideFiles, ref NotesFiles);
    
                string srcFolder = "SlideTXTs";
                string trgFolder = "SlideTXTs_Trans";
    
                string srcFullPath = Path.Combine(scanFolder, srcFolder);
                string trgFullPath = Path.Combine(scanFolder, trgFolder);
                foreach (var file in SlideFiles)
                {
                    ReplaceATContent(file, srcFullPath, trgFullPath, ppt_name, lan);
                }
    
                string srcFolderNotes = "NotesTXTs";
                string trgFolderNotes = "NotesTXTs_Trans";
                string srcFullPath_trans = Path.Combine(scanFolder, srcFolderNotes);
                string trgFullPath_trans = Path.Combine(scanFolder, trgFolderNotes);
                foreach (var file in NotesFiles)
                {
                    ReplaceATContent(file, srcFullPath_trans, trgFullPath_trans, ppt_name, lan);
                }
            }
    
            private static void ReplaceATContent(string file, string srcFolder, string trgFolder, string pptName, string lan)
            {
                if (!(Directory.Exists(srcFolder) && Directory.Exists(trgFolder)))
                {
                    MessageBox.Show("SlideTXTs/NotesTXTs or SlideTXTs_Trans/NotesTXTs_Trans not exist");
                    return;
                }
    
                FileInfo fi = new FileInfo(file);
                string srcFileName = string.Format("{0}_{1}.txt",pptName,fi.Name);
                string srcFileFullPath = Path.Combine(srcFolder, srcFileName);
    
                 string trgFileName= string.Empty;
                if(lan==string.Empty)
                    trgFileName = string.Format("{0}_{1}.txt", pptName, fi.Name);
                else
                    trgFileName = string.Format("{0}_{1}_{2}.txt",pptName, fi.Name,lan);
                string trgFileFullPath = Path.Combine(trgFolder, trgFileName);
    
                if (!(File.Exists(srcFileFullPath) && File.Exists(trgFileFullPath)))
                {
                    MessageBox.Show(string.Format(@"File {0} not replaced",file));
                    return;
                }
    
                List<string> originalString = new List<string>();
                using (StreamReader reader = new StreamReader(srcFileFullPath))
                {
                    string content = reader.ReadToEnd().Trim();
                    string[] strings = content.Split(new string[] { "^" }, StringSplitOptions.RemoveEmptyEntries);
                    for (int i = 0; i < strings.Length; i++)
                    {
                        originalString.Add(strings[i].Contains("
    ") ? strings[i].Remove(0, 2) : strings[i]);
                    }
                }
    
                List<string> translatedString = new List<string>();
                using (StreamReader reader = new StreamReader(trgFileFullPath))
                {
                    string content = reader.ReadToEnd().Trim();
                    string[] strings = content.Split(new string[] { "^" }, StringSplitOptions.RemoveEmptyEntries);
                    for (int i = 0; i < strings.Length; i++)
                    {
                        translatedString.Add(strings[i].Contains("
    ") ? strings[i].Remove(0, 2) : strings[i]);
                    }
                }
    
                if (originalString.Count != translatedString.Count)
                {
                    MessageBox.Show(string.Format(@"translation string count not match:{0}",file));
                    return;
                }
    
                Replace(file, originalString, translatedString);
            }        
        }
  • 相关阅读:
    zepto源码解读(二)——zpeto.init()——(4)$.each()函数 / likeArray函数
    zepto源码解读(二)——zpeto.init()——(3)isFunction函数
    JS小问题之—— 关于SetCapture() 和 ReleaseCapture()的用法
    zepto源码解读(二)——zpeto.init()函数——(2)zepto.fragment()函数详解
    zepto源码解读(二)——zpeto.init()函数——(1)结构整理
    JS小问题之——缓冲运动
    zepto源码解读(一)——整体架构
    jquery源码猜想(一)
    去除小括号和小括号里面的内容
    查看端口使用情况
  • 原文地址:https://www.cnblogs.com/qixue/p/4497691.html
Copyright © 2011-2022 走看看