zoukankan      html  css  js  c++  java
  • 使用NOPI读取Word、Excel文档内容

    使用NOPI读取Excel的例子很多,读取Word的例子不多。

    Excel的解析方式有多中,可以使用ODBC查询,把Excel作为一个数据集对待。也可以使用文档结构模型的方式进行解析,即解析Workbook(工作簿)、Sheet、Row、Column。

    Word的解析比较复杂,因为Word的文档结构模型定义较为复杂。解析Word或者Excel,关键是理解Word、Excel的文档对象模型。

    Word、Excel文档对象模型的解析,可以通过COM接口调用,此类方式使用较广。(可以录制宏代码,然后替换为对应的语言)

    也可以使用XML模型解析,尤其是对于2007、2010版本的文档的解析。

      1 using NPOI.POIFS.FileSystem;
      2 using NPOI.SS.UserModel;
      3 using NPOI.XSSF.UserModel;
      4 using NPOI.XWPF.UserModel;
      5 using System;
      6 using System.Collections.Generic;
      7 using System.Configuration;
      8 using System.IO;
      9 using System.Text;
     10 
     11 namespace eyuan
     12 {
     13     public static class NOPIHandler
     14     {
     15         /// <summary>
     16         /// 
     17         /// </summary>
     18         /// <param name="fileName"></param>
     19         /// <returns></returns>
     20         public static List<List<List<string>>> ReadExcel(string fileName)
     21         {
     22             //打开Excel工作簿
     23             XSSFWorkbook hssfworkbook = null;
     24             try
     25             {
     26                 using (FileStream file = new FileStream(fileName, FileMode.Open, FileAccess.Read))
     27                 {
     28                     hssfworkbook = new XSSFWorkbook(file);
     29                 }
     30             }
     31             catch (Exception e)
     32             {
     33                 LogHandler.LogWrite(string.Format("文件{0}打开失败,错误:{1}", new string[] { fileName, e.ToString() }));
     34             }
     35             //循环Sheet页
     36             int sheetsCount = hssfworkbook.NumberOfSheets;
     37             List<List<List<string>>> workBookContent = new List<List<List<string>>>();
     38             for (int i = 0; i < sheetsCount; i++)
     39             {
     40                 //Sheet索引从0开始
     41                 ISheet sheet = hssfworkbook.GetSheetAt(i);
     42                 //循环行
     43                 List<List<string>> sheetContent = new List<List<string>>();
     44                 int rowCount = sheet.PhysicalNumberOfRows;
     45                 for (int j = 0; j < rowCount; j++)
     46                 {
     47                     //Row(逻辑行)的索引从0开始
     48                     IRow row = sheet.GetRow(j);
     49                     //循环列(各行的列数可能不同)
     50                     List<string> rowContent = new List<string>();
     51                     int cellCount = row.PhysicalNumberOfCells;
     52                     for (int k = 0; k < cellCount; k++)
     53                     {
     54                         //ICell cell = row.GetCell(k);
     55                         ICell cell = row.Cells[k];
     56                         if (cell == null)
     57                         {
     58                             rowContent.Add("NIL");
     59                         }
     60                         else
     61                         {
     62                             rowContent.Add(cell.ToString());
     63                             //rowContent.Add(cell.StringCellValue);
     64                         }
     65                     }
     66                     //添加行到集合中
     67                     sheetContent.Add(rowContent);
     68                 }
     69                 //添加Sheet到集合中
     70                 workBookContent.Add(sheetContent);
     71             }
     72 
     73             return workBookContent;
     74         }
     75 
     76         /// <summary>
     77         /// 
     78         /// </summary>
     79         /// <param name="fileName"></param>
     80         /// <returns></returns>
     81         public static string ReadExcelText(string fileName)
     82         {
     83             string ExcelCellSeparator = ConfigurationManager.AppSettings["ExcelCellSeparator"];
     84             string ExcelRowSeparator = ConfigurationManager.AppSettings["ExcelRowSeparator"];
     85             string ExcelSheetSeparator = ConfigurationManager.AppSettings["ExcelSheetSeparator"];
     86             //
     87             List<List<List<string>>> excelContent = ReadExcel(fileName);
     88             string fileText = string.Empty;
     89             StringBuilder sbFileText = new StringBuilder();
     90             //循环处理WorkBook中的各Sheet页
     91             List<List<List<string>>>.Enumerator enumeratorWorkBook = excelContent.GetEnumerator();
     92             while (enumeratorWorkBook.MoveNext())
     93             {
     94 
     95                 //循环处理当期Sheet页中的各行
     96                 List<List<string>>.Enumerator enumeratorSheet = enumeratorWorkBook.Current.GetEnumerator();
     97                 while (enumeratorSheet.MoveNext())
     98                 {
     99 
    100                     string[] rowContent = enumeratorSheet.Current.ToArray();
    101                     sbFileText.Append(string.Join(ExcelCellSeparator, rowContent));
    102                     sbFileText.Append(ExcelRowSeparator);
    103                 }
    104                 sbFileText.Append(ExcelSheetSeparator);
    105             }
    106             //
    107             fileText = sbFileText.ToString();
    108             return fileText;
    109         }
    110 
    111         /// <summary>
    112         /// 读取Word内容
    113         /// </summary>
    114         /// <param name="fileName"></param>
    115         /// <returns></returns>
    116         public static string ReadWordText(string fileName)
    117         {
    118             string WordTableCellSeparator = ConfigurationManager.AppSettings["WordTableCellSeparator"];
    119             string WordTableRowSeparator = ConfigurationManager.AppSettings["WordTableRowSeparator"];
    120             string WordTableSeparator = ConfigurationManager.AppSettings["WordTableSeparator"];
    121             //
    122             string CaptureWordHeader = ConfigurationManager.AppSettings["CaptureWordHeader"];
    123             string CaptureWordFooter = ConfigurationManager.AppSettings["CaptureWordFooter"];
    124             string CaptureWordTable = ConfigurationManager.AppSettings["CaptureWordTable"];
    125             string CaptureWordImage = ConfigurationManager.AppSettings["CaptureWordImage"];
    126             //
    127             string CaptureWordImageFileName = ConfigurationManager.AppSettings["CaptureWordImageFileName"];
    128             //
    129             string fileText = string.Empty;
    130             StringBuilder sbFileText = new StringBuilder();
    131 
    132             #region 打开文档
    133             XWPFDocument document = null;
    134             try
    135             {
    136                 using (FileStream file = new FileStream(fileName, FileMode.Open, FileAccess.Read))
    137                 {
    138                     document = new XWPFDocument(file);
    139                 }
    140             }
    141             catch (Exception e)
    142             {
    143                 LogHandler.LogWrite(string.Format("文件{0}打开失败,错误:{1}", new string[] { fileName, e.ToString() }));
    144             }
    145             #endregion
    146 
    147             #region 页眉、页脚
    148             //页眉
    149             if (CaptureWordHeader == "true")
    150             {
    151                 sbFileText.AppendLine("Capture Header Begin");
    152                 foreach (XWPFHeader xwpfHeader in document.HeaderList)
    153                 {
    154                     sbFileText.AppendLine(string.Format("{0}", new string[] { xwpfHeader.Text }));
    155                 }
    156                 sbFileText.AppendLine("Capture Header End");
    157             }
    158             //页脚
    159             if (CaptureWordFooter == "true")
    160             {
    161                 sbFileText.AppendLine("Capture Footer Begin");
    162                 foreach (XWPFFooter xwpfFooter in document.FooterList)
    163                 {
    164                     sbFileText.AppendLine(string.Format("{0}", new string[] { xwpfFooter.Text }));
    165                 }
    166                 sbFileText.AppendLine("Capture Footer End");
    167             }
    168             #endregion
    169 
    170             #region 表格
    171             if (CaptureWordTable == "true")
    172             {
    173                 sbFileText.AppendLine("Capture Table Begin");
    174                 foreach (XWPFTable table in document.Tables)
    175                 {
    176                     //循环表格行
    177                     foreach (XWPFTableRow row in table.Rows)
    178                     {
    179                         foreach (XWPFTableCell cell in row.GetTableCells())
    180                         {
    181                             sbFileText.Append(cell.GetText());
    182                             //
    183                             sbFileText.Append(WordTableCellSeparator);
    184                         }
    185 
    186                         sbFileText.Append(WordTableRowSeparator);
    187                     }
    188                     sbFileText.Append(WordTableSeparator);
    189                 }
    190                 sbFileText.AppendLine("Capture Table End");
    191             }
    192             #endregion
    193 
    194             #region 图片
    195             if (CaptureWordImage == "true")
    196             {
    197                 sbFileText.AppendLine("Capture Image Begin");
    198                 foreach (XWPFPictureData pictureData in document.AllPictures)
    199                 {
    200                     string picExtName = pictureData.suggestFileExtension();
    201                     string picFileName = pictureData.GetFileName();
    202                     byte[] picFileContent = pictureData.GetData();
    203                     //
    204                     string picTempName = string.Format(CaptureWordImageFileName, new string[] { Guid.NewGuid().ToString() + "_" + picFileName + "." + picExtName });
    205                     //
    206                     using (FileStream fs = new FileStream(picTempName, FileMode.Create, FileAccess.Write))
    207                     {
    208                         fs.Write(picFileContent, 0, picFileContent.Length);
    209                         fs.Close();
    210                     }
    211                     //
    212                     sbFileText.AppendLine(picTempName);
    213                 }
    214                 sbFileText.AppendLine("Capture Image End");
    215             }
    216             #endregion
    217 
    218             //正文段落
    219             sbFileText.AppendLine("Capture Paragraph Begin");
    220             foreach (XWPFParagraph paragraph in document.Paragraphs)
    221             {
    222                 sbFileText.AppendLine(paragraph.ParagraphText);
    223 
    224             }
    225             sbFileText.AppendLine("Capture Paragraph End");
    226             //
    227 
    228             //
    229             fileText = sbFileText.ToString();
    230             return fileText;
    231         }
    232 
    233 
    234     }
    235 }
  • 相关阅读:
    基于微信小程序的票价和时间选择以及计算总价
    基于Echarts的股票K线图展示
    基于Echarts的中国地图数据展示
    微信公众号网页授权登录获取用户基本信息
    springboot+mybatis+maven角色权限框架
    java服务端微信小程序支付
    推理 —— 猜帽子颜色
    Java 容器的使用及数组、List、Set 的相互转换
    构建工具 —— Groovy 与 Gradle
    效率生产力工具 —— idea 插件
  • 原文地址:https://www.cnblogs.com/mahongbiao/p/3760878.html
Copyright © 2011-2022 走看看