zoukankan      html  css  js  c++  java
  • office文档转Txt文档

    代码网络得到,稍作修改,使用从VS2008下取得的dll文件,office2007还支持另为pdf,下面代码运用的就是这种逻辑:

    using System;
    using System.Collections.Generic;
    using System.Linq;
    using System.Text;
    using Microsoft.Office.Core;
    using Microsoft.Office.Interop.Word;
    using Microsoft.Office.Interop.Excel;
    using Microsoft.Office.Interop.PowerPoint;
    using System.IO;


    namespace ConsoleApplication1
    {
        public interface FileConvert
        {
            void Convert(string inputfilepath, string outputfilepath);
        }
        public class FileConvertFactory
        {
            private FileConvertFactory() { }
            public static FileConvert CreateFileConvert(string inputfilepath)
            {
                FileConvert fileConvert;
                string extension = Path.GetExtension(inputfilepath);
                switch (extension.ToLower())
                {
                    case (".xls"):
                    case (".xlsx"):
                        fileConvert = new ExcelFileConvert();
                        break;
                    case (".ppt"):
                    case (".pptx"):
                        fileConvert = new PPTFileConvert();
                        break;
                    case (".doc"):
                    case (".docx"):
                    default:
                        fileConvert = new DocFileConvert();
                        break;
                }
                return fileConvert;
            }
        }
        public class DocFileConvert : FileConvert
        {
            public void Convert(string inputfilepath, string outputfilepath)
            {
                Microsoft.Office.Interop.Word.ApplicationClass wordApp = new Microsoft.Office.Interop.Word.ApplicationClass();
                object fileName = inputfilepath;
                object missing = Type.Missing;
                //打开文档
                _Document doc = wordApp.Documents.Open(
                    ref fileName, ref missing, ref missing, ref missing,
                    ref missing, ref missing, ref missing, ref missing,
                    ref missing, ref missing, ref missing, ref missing,
                    ref missing, ref missing, ref missing, ref missing);
                object saveFormat = Microsoft.Office.Interop.Word.WdSaveFormat.wdFormatTextLineBreaks;
                //object saveEncoding = MsoEncoding.msoEncodingUTF8;
                fileName = outputfilepath;
                //保存为TXT文档
                doc.SaveAs(
                    ref fileName, ref saveFormat, ref missing, ref missing,
                    ref missing, ref missing, ref missing, ref missing,
                    ref missing, ref missing, ref missing, ref missing,
                    ref missing, ref missing, ref missing, ref missing);
                doc.Close(ref missing, ref missing, ref missing);
                wordApp.Quit(ref missing, ref missing, ref missing);
            }
        }

        public class ExcelFileConvert : FileConvert
        {
            public void Convert(string inputfilepath, string outputfilepath)
            {
                Microsoft.Office.Interop.Excel.ApplicationClass wordApp = new Microsoft.Office.Interop.Excel.ApplicationClass();
                object fileName = inputfilepath;
                object missing = Type.Missing;
                Workbook doc = wordApp.Workbooks.Open(
                    inputfilepath, missing, missing, missing,
                     missing, missing, missing, missing,
                     missing, missing, missing, missing,
                     missing, missing, missing);
                object saveFormat = Microsoft.Office.Interop.Excel.XlFileFormat.xlUnicodeText;
                object saveResolution = XlSaveConflictResolution.xlLocalSessionChanges;
                doc.SaveAs(
                     outputfilepath, saveFormat, missing, missing,
                     missing, missing, XlSaveAsAccessMode.xlNoChange, saveResolution,
                     missing, missing, missing, missing);
                doc.Close(missing,missing,missing);
                wordApp.Quit();
            }
        }
        public class PPTFileConvert : FileConvert
        {
            public void Convert(string inputfilepath, string outputfilepath)
            {
                Microsoft.Office.Interop.PowerPoint.ApplicationClass wordApp = new Microsoft.Office.Interop.PowerPoint.ApplicationClass();
                object fileName = inputfilepath;
                object missing = Type.Missing;
                Presentation doc = wordApp.Presentations.Open(
                    inputfilepath, MsoTriState.msoCTrue, MsoTriState.msoCTrue, MsoTriState.msoFalse);
                object saveFormat = PpSaveAsFileType.ppSaveAsRTF;
                object saveResolution = XlSaveConflictResolution.xlLocalSessionChanges;
                string tempfile = Path.GetDirectoryName(outputfilepath) + DateTime.Now.ToFileTime() + "tmp.rtf";
                //保存为RTF文件
                doc.SaveAs(tempfile, PpSaveAsFileType.ppSaveAsRTF, MsoTriState.msoFalse);
                wordApp.Quit();
                //然后转换为txt文件
                DocFileConvert newConvert = new DocFileConvert();
                newConvert.Convert(tempfile, outputfilepath);
                File.Delete(tempfile);
            }
        }
    }

  • 相关阅读:
    Linux企业级项目实践之网络爬虫(6)——将程序设计成为守护进程
    Linux企业级项目实践之网络爬虫(5)——处理配置文件
    Linux企业级项目实践之网络爬虫(3)——设计自己的网络爬虫
    Linux企业级项目实践之网络爬虫(4)——主程序流程
    Linux企业级项目实践之网络爬虫(1)——项目概述及准备工作
    Linux企业级项目实践之网络爬虫(2)——网络爬虫的结构与工作流程
    泛化、依赖、关联、聚合、组合
    日常(停课后的月考)
    日常(停课后的月考)
    打击罪犯
  • 原文地址:https://www.cnblogs.com/qfcndtt/p/2495870.html
Copyright © 2011-2022 走看看