最近, 有同事偶然发现Microsoft Office PowerPoint可以被看作是一个压缩包,然后通过WinRAR解压出来一组XML文件。解压出来的文件包括:
一个索引文件名称为:[Content_Types].xml,
一个名为ppt的文件夹,在其内有两个重要的子文件夹:slides 和notesSlides
其中, [Content_Types].xml记录了每一张Slide的相对路径,每一个Slide note的相对路径。其内容如下图:
我们发现PPT中的所有内容都被记录在XML的<a:t></a:t>节点中, 所以,我们通过把所有a:t节点的内容导出,然后对内容进行修改,修改后再替换回原文件,这样将这一组文件进行压缩,生成了修改后的PowerPoint文件,该过程为PowerPoint的内容本地化提供了便捷途径。
这种做法相比较于调用Microsoft.Office.Interop.PowerPoint中的API的做法来说, 保留了原文的100%的格式,不需要后期PPT刷格式的操作。
以下是我们写的C#代码, 思路是将每张Slide的字符串导出到一个txt文件,通过trados翻译txt文件中的字符串,然后将修改后内容导入到PPT包内相应的XML文件中。
PPTZIPCommon
class PPTZIPCommon { /// <summary> /// read file [Content_Types].xml /// </summary> /// <param name="root"></param> /// <param name="SlideFiles">return slides </param> /// <param name="NotesFiles">return slide notes</param> internal static void ReadContentTypes(string root, ref List<string> SlideFiles, ref List<string> NotesFiles) { string ct_file = @"[Content_Types].xml"; string ct_fullName = Path.Combine(root, ct_file); if (!File.Exists(ct_fullName)) { MessageBox.Show(string.Format("the [Content_Types].xml not exist in {0}", root)); return; } XmlDocument xml_doc = new XmlDocument(); xml_doc.Load(ct_fullName); XmlElement rootElement = xml_doc.DocumentElement; string slide_types = "application/vnd.openxmlformats-officedocument.presentationml.slide+xml"; string notes_types = "application/vnd.openxmlformats-officedocument.presentationml.notesSlide+xml"; XmlNodeList nodes = rootElement.ChildNodes; foreach (XmlElement node in nodes) { if (node.Attributes["ContentType"].Value == slide_types) { string relatedPath = node.Attributes["PartName"].Value.Remove(0, 1).Replace(@"/", @""); string file = Path.Combine(root, relatedPath); SlideFiles.Add(file); } else if (node.Attributes["ContentType"].Value == notes_types) { string relatedPath = node.Attributes["PartName"].Value.Remove(0, 1).Replace(@"/", @""); string file = Path.Combine(root, relatedPath); NotesFiles.Add(file); } } } internal static string GetPPTNameFromFullPath(string scanFolder) { int lastIndexOfSlash = scanFolder.LastIndexOf(@""); if (lastIndexOfSlash == scanFolder.Length - 1) { scanFolder = scanFolder.Remove(lastIndexOfSlash); } string lastString = scanFolder.Substring(scanFolder.LastIndexOf(@"") + 1); string[] names = lastString.Split(new string[] { "." }, StringSplitOptions.RemoveEmptyEntries); return names[0]; } }
PPTZIP
class PPTZIP { private static List<string> SlideFiles = new List<string>(); private static List<string> NotesFiles = new List<string>(); /// <summary> /// collect together all the <a:t>...</a:t> strings, put it in txt file /// txt file be saved to output<original PPT name>_<fileName>.txt /// </summary> /// <param name="file">xml file that contains <a:t>...</a:t></param> /// <param name="output">the txt file be saved to the output folder</param> /// <param name="pptName">original PowerPoint file name</param> private static void ReadATContent2TXT(string file, string output, string pptName) { StringBuilder sb = new StringBuilder(); using (StreamReader reader = new StreamReader(file)) { string content = reader.ReadToEnd(); string pattern = @"<a:t>.[^<>]+</a:t>"; MatchCollection mc = Regex.Matches(content, pattern); for (int i = 0; i < mc.Count; i++) { sb.AppendLine(string.Format("{0}^", mc[i].Value.Substring(5, mc[i].Value.LastIndexOf("<") - 5))); } } FileInfo fi = new FileInfo(file); string txtFile = Path.Combine(output, pptName+"_"+fi.Name + ".txt"); using (StreamWriter writer = new StreamWriter(txtFile)) { writer.Write(sb.ToString().Trim()); writer.Flush(); writer.Close(); } } public static void Export2TXTs(string scanFolder) { string ppt_name = PPTZIPCommon.GetPPTNameFromFullPath(scanFolder); PPTZIPCommon.ReadContentTypes(scanFolder, ref SlideFiles, ref NotesFiles); if (null != SlideFiles && SlideFiles.Count > 0) { foreach (var file in SlideFiles) { string outputfolder = Path.Combine(scanFolder, "SlideTXTs"); if (!Directory.Exists(outputfolder)) Directory.CreateDirectory(outputfolder); string transFolder = Path.Combine(scanFolder, "SlideTXTs_Trans"); if (!Directory.Exists(transFolder)) Directory.CreateDirectory(transFolder); ReadATContent2TXT(file, outputfolder, ppt_name); } } if (null != NotesFiles && NotesFiles.Count > 0) { foreach (var file in NotesFiles) { string outputfolder = Path.Combine(scanFolder, "NotesTXTs"); if (!Directory.Exists(outputfolder)) Directory.CreateDirectory(outputfolder); string transFolder = Path.Combine(scanFolder, "NotesTXTs_Trans"); if (!Directory.Exists(transFolder)) Directory.CreateDirectory(transFolder); ReadATContent2TXT(file, outputfolder,ppt_name); } } } }
PPTZIPWriter
class PPTZIPWriter { private static List<string> SlideFiles = new List<string>(); private static List<string> NotesFiles = new List<string>(); private static void Replace(string file, List<string> original, List<string> translated) { string content = string.Empty; using (StreamReader reader = new StreamReader(file)) { content = reader.ReadToEnd(); for (int i = 0; i < original.Count; i++) { content = content.Replace(string.Format("<a:t>{0}</a:t>", original[i]), string.Format("<a:t>{0}</a:t>", translated[i])); } reader.Close(); } using (StreamWriter writer = new StreamWriter(file)) { writer.Write(content); writer.Flush(); writer.Close(); } } public static void Import2PPT(string scanFolder, string lan) { string ppt_name = PPTZIPCommon.GetPPTNameFromFullPath(scanFolder); // fullfill the two lists: SlideFiles and NotesFiles PPTZIPCommon.ReadContentTypes(scanFolder,ref SlideFiles, ref NotesFiles); string srcFolder = "SlideTXTs"; string trgFolder = "SlideTXTs_Trans"; string srcFullPath = Path.Combine(scanFolder, srcFolder); string trgFullPath = Path.Combine(scanFolder, trgFolder); foreach (var file in SlideFiles) { ReplaceATContent(file, srcFullPath, trgFullPath, ppt_name, lan); } string srcFolderNotes = "NotesTXTs"; string trgFolderNotes = "NotesTXTs_Trans"; string srcFullPath_trans = Path.Combine(scanFolder, srcFolderNotes); string trgFullPath_trans = Path.Combine(scanFolder, trgFolderNotes); foreach (var file in NotesFiles) { ReplaceATContent(file, srcFullPath_trans, trgFullPath_trans, ppt_name, lan); } } private static void ReplaceATContent(string file, string srcFolder, string trgFolder, string pptName, string lan) { if (!(Directory.Exists(srcFolder) && Directory.Exists(trgFolder))) { MessageBox.Show("SlideTXTs/NotesTXTs or SlideTXTs_Trans/NotesTXTs_Trans not exist"); return; } FileInfo fi = new FileInfo(file); string srcFileName = string.Format("{0}_{1}.txt",pptName,fi.Name); string srcFileFullPath = Path.Combine(srcFolder, srcFileName); string trgFileName= string.Empty; if(lan==string.Empty) trgFileName = string.Format("{0}_{1}.txt", pptName, fi.Name); else trgFileName = string.Format("{0}_{1}_{2}.txt",pptName, fi.Name,lan); string trgFileFullPath = Path.Combine(trgFolder, trgFileName); if (!(File.Exists(srcFileFullPath) && File.Exists(trgFileFullPath))) { MessageBox.Show(string.Format(@"File {0} not replaced",file)); return; } List<string> originalString = new List<string>(); using (StreamReader reader = new StreamReader(srcFileFullPath)) { string content = reader.ReadToEnd().Trim(); string[] strings = content.Split(new string[] { "^" }, StringSplitOptions.RemoveEmptyEntries); for (int i = 0; i < strings.Length; i++) { originalString.Add(strings[i].Contains(" ") ? strings[i].Remove(0, 2) : strings[i]); } } List<string> translatedString = new List<string>(); using (StreamReader reader = new StreamReader(trgFileFullPath)) { string content = reader.ReadToEnd().Trim(); string[] strings = content.Split(new string[] { "^" }, StringSplitOptions.RemoveEmptyEntries); for (int i = 0; i < strings.Length; i++) { translatedString.Add(strings[i].Contains(" ") ? strings[i].Remove(0, 2) : strings[i]); } } if (originalString.Count != translatedString.Count) { MessageBox.Show(string.Format(@"translation string count not match:{0}",file)); return; } Replace(file, originalString, translatedString); } }