zoukankan      html  css  js  c++  java
  • 10-利用com组件读取office

    using System;
    using System.Collections.Generic;
    using System.Linq;
    using System.Text;
    using System.IO;
    using System.Text.RegularExpressions;
    using iTextSharp.text.pdf;
    using iTextSharp.text.pdf.parser;
    
    namespace _04查找文件中指定字符串的位置
    {
        class Program
        {
            static void Main(string[] args)
            {
                //===========操作word文档==========
                string docFileName = @"F:人力资源行业用语.docx";
                string docText = Doc2Text(docFileName);
    
                //===========操作pdf文档===========
                string pdfFileName = @"F:2级1课备课.pdf";
                string pdfText = Pdf2Text(pdfFileName);
    
                //===========操作xls文档===========
                string xlsFileName = @"F:学生信息导入表.xls";
                string xlsText = Xls2Text(xlsFileName);
            }
    
            #region 利用com组件读取office
    /// <summary> /// 判断文件是否存在 /// </summary> /// <param name="pFileName"></param> private static void IsExists(string pFileName) { if (!File.Exists(pFileName)) { throw new ApplicationException("指定目录下的无该文件"); } } //获得word文件的文本内容 public static string Doc2Text(string docFileName) { IsExists(docFileName); //实例化COM Microsoft.Office.Interop.Word.Application wordApp = new Microsoft.Office.Interop.Word.Application(); object fileobj = docFileName; object nullobj = System.Reflection.Missing.Value; //打开指定文件(不同版本的COM参数个数有差异,一般而言除第一个外都用nullobj就行了) Microsoft.Office.Interop.Word.Document doc = wordApp.Documents.Open(ref fileobj, ref nullobj, ref nullobj, ref nullobj, ref nullobj, ref nullobj, ref nullobj, ref nullobj, ref nullobj, ref nullobj, ref nullobj, ref nullobj, ref nullobj, ref nullobj, ref nullobj, ref nullobj ); //取得doc文件中的文本 string outText = doc.Content.Text; //关闭文件 doc.Close(ref nullobj, ref nullobj, ref nullobj); //关闭COM,关闭word程序 wordApp.Quit(ref nullobj, ref nullobj, ref nullobj); GC.Collect(); //返回 return outText; } //获得pdf文件的文本内容,添加 itextsgarp.dll 第三方组件 public static string Pdf2Text(string pdfFileName) { PdfReader reader = new PdfReader(pdfFileName); StringBuilder data = new StringBuilder(); for (int count = 1; count <= reader.NumberOfPages; count++) { data.Append(PdfTextExtractor.GetTextFromPage(reader, count)); } return data.ToString(); } //获得excel文件的文本内容 public static string Xls2Text(string xlsFileName) { IsExists(xlsFileName); Microsoft.Office.Interop.Excel.Application xlsApp = new Microsoft.Office.Interop.Excel.Application(); object nullobj = System.Reflection.Missing.Value; //打开Excel文档 Microsoft.Office.Interop.Excel.Workbook excel = xlsApp.Workbooks.Open(xlsFileName, nullobj, nullobj, nullobj, nullobj, nullobj, nullobj, nullobj, nullobj, nullobj, nullobj, nullobj, nullobj, nullobj, nullobj); //遍历Excel工作表 Microsoft.Office.Interop.Excel.Worksheet ews = null; StringBuilder builder = new StringBuilder(); try { for (int k = 1; k <= excel.Worksheets.Count; k++) { ews = (Microsoft.Office.Interop.Excel.Worksheet)excel.Worksheets[k]; //builder.Append(((Excel.Range)ews.UsedRange).Text); if (ews.UsedRange.Value2 != null) { for (int i = 1; i <= ews.UsedRange.Cells.Rows.Count; i++) { for (int j = 1; j <= ews.UsedRange.Cells.Columns.Count; j++) { if (((object[,])(ews.UsedRange.Value2))[i, j] != null) { builder.Append(((object[,])(ews.UsedRange.Value2))[i, j]).Append("|"); } } } } } } catch (Exception ex) { throw ex; } finally { excel.Close(nullobj, nullobj, nullobj); xlsApp.Quit(); GC.Collect(); } return builder.ToString(); } //获得PPT文件的文本内容 public static string Ppt2Text(string pptFileName) { IsExists(pptFileName); Microsoft.Office.Interop.PowerPoint.Application pptApp = new Microsoft.Office.Interop.PowerPoint.Application(); object nullobj = System.Reflection.Missing.Value; Microsoft.Office.Interop.PowerPoint.Presentation ppt = pptApp.Presentations.Open(pptFileName, Microsoft.Office.Core.MsoTriState.msoTrue, Microsoft.Office.Core.MsoTriState.msoFalse, Microsoft.Office.Core.MsoTriState.msoFalse); StringBuilder builder = new StringBuilder(); try { foreach (Microsoft.Office.Interop.PowerPoint.Slide slide in ppt.Slides) { foreach (Microsoft.Office.Interop.PowerPoint.Shape shape in slide.Shapes) { if (shape.TextFrame.HasText == Microsoft.Office.Core.MsoTriState.msoTrue) { builder.Append(shape.TextFrame.TextRange.Text); } } } } catch (Exception ex) { throw ex; } finally { ppt.Close(); pptApp.Quit(); GC.Collect(); } return builder.ToString(); } #endregion } }
  • 相关阅读:
    静态库与动态库的创建与使用
    MinGW 仿 linux 开发环境
    SICP 1.7-1.8 solution (Scheme)
    PHP 学生管理系统实现
    【2014最新】常用hosts集锦,分享给大家
    【Android快速入门3】布局简介及例子
    【Android快速入门2】拨号器的实现
    【Android快速入门1】目录结构及adb命令(以API19为例)
    基于深度及广度优先搜索的迷宫问题的演示
    基于HTML5的js构造爱心,动态时间校准
  • 原文地址:https://www.cnblogs.com/zy-style/p/4329243.html
Copyright © 2011-2022 走看看