常用操作EXCEL方法研究
本文主要提供了一个利用NPOI读取一个Excel的实际值的类。本文使用的NPOI版本为1.2.5,NPOI.dll可以到NPOI的官网http://npoi.codeplex.com/下载。
using System; using System.Collections.Generic; using System.Text; using System.Data; using System.IO; using NPOI.HSSF.UserModel; using NPOI.SS.UserModel; namespace Common.Excel { public static class NPOIHandler { public static DataSet ExcelToDataSet(string excelPath) { return ExcelToDataSet(excelPath, true); } public static DataSet ExcelToDataSet(string excelPath, bool firstRowAsHeader) { int sheetCount; return ExcelToDataSet(excelPath, firstRowAsHeader, out sheetCount); } public static DataSet ExcelToDataSet(string excelPath, bool firstRowAsHeader, out int sheetCount) { using (DataSet ds = new DataSet()) { using (FileStream fileStream = new FileStream(excelPath, FileMode.Open, FileAccess.Read)) { HSSFWorkbook workbook = new HSSFWorkbook(fileStream); HSSFFormulaEvaluator evaluator = new HSSFFormulaEvaluator(workbook); sheetCount = workbook.NumberOfSheets; for (int i = 0; i < sheetCount; ++i) { HSSFSheet sheet = workbook.GetSheetAt(i) as HSSFSheet; DataTable dt = ExcelToDataTable(sheet, evaluator, firstRowAsHeader); ds.Tables.Add(dt); } return ds; } } } public static DataTable ExcelToDataTable(string excelPath, string sheetName) { return ExcelToDataTable(excelPath, sheetName, true); } public static DataTable ExcelToDataTable(string excelPath, string sheetName, bool firstRowAsHeader) { using (FileStream fileStream = new FileStream(excelPath, FileMode.Open, FileAccess.Read)) { HSSFWorkbook workbook = new HSSFWorkbook(fileStream); HSSFFormulaEvaluator evaluator = new HSSFFormulaEvaluator(workbook); HSSFSheet sheet = workbook.GetSheet(sheetName) as HSSFSheet; return ExcelToDataTable(sheet, evaluator, firstRowAsHeader); } } private static DataTable ExcelToDataTable(HSSFSheet sheet, HSSFFormulaEvaluator evaluator, bool firstRowAsHeader) { if (firstRowAsHeader) { return ExcelToDataTableFirstRowAsHeader(sheet, evaluator); } else { return ExcelToDataTable(sheet, evaluator); } } private static DataTable ExcelToDataTableFirstRowAsHeader(HSSFSheet sheet, HSSFFormulaEvaluator evaluator) { using (DataTable dt = new DataTable()) { HSSFRow firstRow = sheet.GetRow(0) as HSSFRow; int cellCount = GetCellCount(sheet); for (int i = 0; i < cellCount; i++) { if (firstRow.GetCell(i) != null) { dt.Columns.Add(firstRow.GetCell(i).StringCellValue ?? string.Format("F{0}", i + 1), typeof(string)); } else { dt.Columns.Add(string.Format("F{0}", i + 1), typeof(string)); } } for (int i = 1; i <= sheet.LastRowNum; i++) { HSSFRow row = sheet.GetRow(i) as HSSFRow; DataRow dr = dt.NewRow(); FillDataRowByHSSFRow(row, evaluator, ref dr); dt.Rows.Add(dr); } dt.TableName = sheet.SheetName; return dt; } } private static DataTable ExcelToDataTable(HSSFSheet sheet, HSSFFormulaEvaluator evaluator) { using (DataTable dt = new DataTable()) { if (sheet.LastRowNum != 0) { int cellCount = GetCellCount(sheet); for (int i = 0; i < cellCount; i++) { dt.Columns.Add(string.Format("F{0}", i), typeof(string)); } for (int i = 0; i < sheet.FirstRowNum; ++i) { DataRow dr = dt.NewRow(); dt.Rows.Add(dr); } for (int i = sheet.FirstRowNum; i <= sheet.LastRowNum; i++) { HSSFRow row = sheet.GetRow(i) as HSSFRow; DataRow dr = dt.NewRow(); FillDataRowByHSSFRow(row, evaluator, ref dr); dt.Rows.Add(dr); } } dt.TableName = sheet.SheetName; return dt; } } private static void FillDataRowByHSSFRow(HSSFRow row, HSSFFormulaEvaluator evaluator, ref DataRow dr) { if (row != null) { for (int j = 0; j < dr.Table.Columns.Count; j++) { HSSFCell cell = row.GetCell(j) as HSSFCell; if (cell != null) { switch (cell.CellType) { case CellType.BLANK: dr[j] = DBNull.Value; break; case CellType.BOOLEAN: dr[j] = cell.BooleanCellValue; break; case CellType.NUMERIC: if (DateUtil.IsCellDateFormatted(cell)) { dr[j] = cell.DateCellValue; } else { dr[j] = cell.NumericCellValue; } break; case CellType.STRING: dr[j] = cell.StringCellValue; break; case CellType.ERROR: dr[j] = cell.ErrorCellValue; break; case CellType.FORMULA: cell = evaluator.EvaluateInCell(cell) as HSSFCell; dr[j] = cell.ToString(); break; default: throw new NotSupportedException(string.Format("Catched unhandle CellType[{0}]", cell.CellType)); } } } } } private static int GetCellCount(HSSFSheet sheet) { int firstRowNum = sheet.FirstRowNum; int cellCount = 0; for (int i = sheet.FirstRowNum; i <= sheet.LastRowNum; ++i) { HSSFRow row = sheet.GetRow(i) as HSSFRow; if (row != null && row.LastCellNum > cellCount) { cellCount = row.LastCellNum; } } return cellCount; } } }
本文主要提供了一个利用Oledb读取一个Excel的类。
using System; using System.Collections.Generic; using System.Text; using System.Data; using System.Data.OleDb; using System.IO; namespace Common.Excel { //连接字符串说明 //HDR=Yes:将第一行作为DataTable的列名,根据该列的数据判断该列的数据类型 //HDR=No:将所有行都作为数据,所有的数据类型都是string,空值为空字符串"" //IMEX=0:汇出模式,这个模式开启的Excel档案只能用来做“写入”用途。 //IMEX=1:汇入模式,这个模式开启的Excel档案只能用来做“读取”用途。 //IMEX=2:连結模式,这个模式开启的Excel档案可同时支持“读取”与“写入”用途。 public static class OleDbHandler { public static string[] GetSheetNames(string excelPath) { string connectionStr = GetConnectionStr(excelPath, true); return GetSheetNamesByOleDb(connectionStr); } public static DataSet ExcelToDataSet(string excelPath) { return ExcelToDataSet(excelPath, true); } public static DataSet ExcelToDataSet(string excelPath, bool firstRowAsHeader) { string connectionStr = GetConnectionStr(excelPath, firstRowAsHeader); string[] sheetNames = GetSheetNamesByOleDb(connectionStr); using (DataSet ds = new DataSet()) { foreach (string sheetName in sheetNames) { //过滤隐藏表,Oledb读取表会在表后面加上$符号,对于一些有公式的sheet,OleDb会创建一个隐藏表,但这些表没有加上$符号 if (sheetName.EndsWith("$")) { DataTable dt = ExcelToDataTableByOleDb(connectionStr, sheetName); ds.Tables.Add(dt); } } return ds; } } public static DataTable ExcelToDataTable(string excelPath, string sheetName) { return ExcelToDataTable(excelPath, sheetName, true); } public static DataTable ExcelToDataTable(string excelPath, string sheetName, bool firstRowAsHeader) { string connectionStr = GetConnectionStr(excelPath, firstRowAsHeader); return ExcelToDataTableByOleDb(connectionStr, sheetName); } private static DataTable ExcelToDataTableByOleDb(string connectionStr, string sheetName) { using (DataTable dt = new DataTable()) { using (OleDbConnection conn = new OleDbConnection(connectionStr)) { OleDbDataAdapter da = new OleDbDataAdapter(string.Format("SELECT * FROM [{0}]", sheetName), connectionStr); da.Fill(dt); dt.TableName = sheetName; return dt; } } } private static string[] GetSheetNamesByOleDb(string connectionStr) { using (OleDbConnection conn = new OleDbConnection(connectionStr)) { conn.Open(); DataTable dt = conn.GetOleDbSchemaTable(OleDbSchemaGuid.Tables, null); string[] sheetNames = new string[dt.Rows.Count]; for (int i = 0; i < dt.Rows.Count; ++i) { sheetNames[i] = dt.Rows[i]["TABLE_NAME"].ToString(); } return sheetNames; } } private static string GetConnectionStr(string excelPath, bool firstRowAsHeader) { string suffix = Path.GetExtension(excelPath); string excelVersion; string provider; switch (suffix.ToLower()) { case ".xls": provider = "Microsoft.Jet.OLEDB.4.0"; excelVersion = "Excel 8.0"; break; case ".xlsx": provider = "Microsoft.Ace.OleDb.12.0"; excelVersion = "Excel 12.0"; break; default: throw new NotSupportedException(string.Format("The file extension[{0}] is not supported.", suffix)); } if (firstRowAsHeader) { return string.Format("Provider={0};Data Source={1};Extended Properties='{2};HDR=Yes;IMEX=1'", provider, excelPath, excelVersion); } else { return string.Format("Provider={0};Data Source={1};Extended Properties='{2};HDR=No;IMEX=1'", provider, excelPath, excelVersion); } } } }
利用EPPlus读取Excel
笔者主要在一家金融公司从事开发工作,需要长期与数据打交道,因此,经常逃不掉关于Excel的操作。在此,笔者想对比下当前主流的读取Excel的技术。笔者认为当前比较主流的读取Excel技术包括COM组件,OleDb,和NOPI。 这里我们设定情景为将Excel读成DataSet或者DataTable。
#1 COM组件
优点:
(1)能读取各种版本的Excel,包括2003,2007,2010
(2)能够较好的读取Excel的显示值和实际值(包括存在公式的情况)
缺点:
(1)运行的机器需要安装了相应版本的Excel
(2)使用后不能很好的完成资源释放工作,很多时候需要通过杀掉Excel进程或者根据进程号去杀掉对应的进程来实现资源释放
#2 OleDb
优点:
(1)能读取各种版本的Excel,包括2003,2007,2010
(2)运行的机器不需要安装了相应版本的Excel
(3)没有资源释放的忧虑
缺点:
(1)不能很好地处理Excel的显示值和实际值,尤其是不能很好地处理公式
(2)列类型的判断逻辑不合理,当指定了首行作为头(header)的时候,Oledb会根据该列的前若干行数据类型类判断该列的数据类型,这会导致数据丢失。例如某列前n行都为int,列的数据类型被判断为int,那么n+1行后的非可转为int的数据都会被清空。
(3)Microsoft.Jet.OLEDB.4.0 不支持64位的操作系统,在64位的操作系统需要特殊处理。
#3 NPOI
优点:
(1)运行的机器不需要安装了相应版本的Excel
(2)能很好的读取Excel的实际值,包括公式的处理,堪称完美
(3)没有资源释放的忧虑
缺点:
(1)现时最新版本的NPOI版本只支持Excel2003,其官方微博宣称会在2012年底实现对Excel2007的支持,我们还是耐心等待一会吧
(2)不能很好地处理Excel的显示值,笔者试过跟踪代码,都未能发现一个和显示值完全一致的方法或者属性,这让我觉得NPOI不能很好地处理Excel的显示值
#4 EPPlus
优点:
(1)运行的机器不需要安装了相应版本的Excel
(2)支持.xlsx 2007格式
缺点:
(1)不支持.xls 2003格式
//TODO:遗漏,细心的读者可能会发现,这里没有谈到各种技术对读取Excel里的图片的处理情况。现今笔者确实没有对这方面进行相关的研究,但后续我会添加相关的研究结论。我提供了一个OleDb和一个NPOI读取Excel的代码供大家参考研究。OleDb的请查看利用Oledb读取Excel,NPOI的请查看利用NPOI读取Excel,至于COM组件读取Excel的代码笔者也有写过,只是碍于写得过于粗糙凌乱,顾暂不奉上,待归纳封装后再提供给诸位读者。
本文主要内容摘于 http://www.cnblogs.com/Erik_Xu/archive/2012/06/08.html