zoukankan      html  css  js  c++  java
  • c# 读取excel数据的两种方法(转)

    转载自:http://developer.51cto.com/art/201302/380622.htm

    方法一:OleDb: 用这种方法读取Excel速度还是非常的快的,但这种方式读取数据的时候不太灵活,不过可以在 DataTable 中对数据进行一些删减修改。

    优点:读取方式简单、读取速度快

    缺点:除了读取过程不太灵活之外,这种读取方式还有个弊端就是,当Excel数据量很大时。会非常占用内存,当内存不够时会抛出内存溢出的异常。

    不过一般情况下还是非常不错的。

    DataTable GetDataFromExcelByConn(bool hasTitle = false)
    {
        OpenFileDialog openFile = new OpenFileDialog();
        openFile.Filter = "Excel(*.xlsx)|*.xlsx|Excel(*.xls)|*.xls";
        openFile.InitialDirectory = Environment.GetFolderPath(Environment.SpecialFolder.Desktop);
        openFile.Multiselect = false;
        if (openFile.ShowDialog() == DialogResult.Cancel) return null;
        var filePath = openFile.FileName;
        string fileType = System.IO.Path.GetExtension(filePath);
        if (string.IsNullOrEmpty(fileType)) return null;
    
        using (DataSet ds = new DataSet())
        {
            string strCon = string.Format("Provider=Microsoft.Jet.OLEDB.{0}.0;" +
                            "Extended Properties="Excel {1}.0;HDR={2};IMEX=1;";" +
                            "data source={3};",
                            (fileType == ".xls" ? 4 : 12), (fileType == ".xls" ? 8 : 12), (hasTitle ? "Yes" : "NO"), filePath);
            string strCom = " SELECT * FROM [Sheet1$]";
            using (OleDbConnection myConn = new OleDbConnection(strCon))
            using (OleDbDataAdapter myCommand = new OleDbDataAdapter(strCom, myConn))
            {
                myConn.Open();
                myCommand.Fill(ds);
            }
            if (ds == null || ds.Tables.Count <= 0) return null;
            return ds.Tables[0];
        }
    }

    方法二:Com组件的方式读取Excel 

    这种方式需要先引用 Microsoft.Office.Interop.Excel 。首选说下这种方式的优缺点 

    优点:可以非常灵活的读取Excel中的数据 

    缺点:如果是Web站点部署在IIS上时,还需要服务器机子已安装了Excel,有时候还需要为配置IIS权限。最重要的一点因为是基于单元格方式读取的,所以数据很慢(曾做过试验,直接读取千行、200多列的文件,直接读取耗时15分钟。即使采用多线程分段读取来提高CPU的利用率也需要8分钟。PS:CPU I3) 

    需要读取大文件的的童鞋们慎重。。。

    DataTable GetDataFromExcelByCom(bool hasTitle = false)
    {
        OpenFileDialog openFile = new OpenFileDialog();
        openFile.Filter = "Excel(*.xlsx)|*.xlsx|Excel(*.xls)|*.xls";
        openFile.InitialDirectory = Environment.GetFolderPath(Environment.SpecialFolder.Desktop);
        openFile.Multiselect = false;
        if (openFile.ShowDialog() == DialogResult.Cancel) return null;
        var excelFilePath = openFile.FileName;
    
        Excel.Application app = new Excel.Application();
        Excel.Sheets sheets;
        object oMissiong = System.Reflection.Missing.Value;
        Excel.Workbook workbook = null;
        DataTable dt = new DataTable();
    
        try
        {
            if (app == null) return null;
            workbook = app.Workbooks.Open(excelFilePath, oMissiong, oMissiong, oMissiong, oMissiong, oMissiong,
                oMissiong, oMissiong, oMissiong, oMissiong, oMissiong, oMissiong, oMissiong, oMissiong, oMissiong);
            sheets = workbook.Worksheets;
    
            //将数据读入到DataTable中
            Excel.Worksheet worksheet = (Excel.Worksheet)sheets.get_Item(1);//读取第一张表  
            if (worksheet == null) return null;
    
            int iRowCount = worksheet.UsedRange.Rows.Count;
            int iColCount = worksheet.UsedRange.Columns.Count;
            //生成列头
            for (int i = 0; i < iColCount; i++)
            {
                var name = "column" + i;
                if (hasTitle)
                {
                    var txt = ((Excel.Range)worksheet.Cells[1, i + 1]).Text.ToString();
                    if (!string.IsNullOrWhiteSpace(txt)) name = txt;
                }
                while (dt.Columns.Contains(name)) name = name + "_1";//重复行名称会报错。
                dt.Columns.Add(new DataColumn(name, typeof(string)));
            }
            //生成行数据
            Excel.Range range;
            int rowIdx = hasTitle ? 2 : 1;
            for (int iRow = rowIdx; iRow <= iRowCount; iRow++)
            {
                DataRow dr = dt.NewRow();
                for (int iCol = 1; iCol <= iColCount; iCol++)
                {
                    range = (Excel.Range)worksheet.Cells[iRow, iCol];
                    dr[iCol - 1] = (range.Value2 == null) ? "" : range.Text.ToString();
                }
                dt.Rows.Add(dr);
            }
            return dt;
        }
        catch { return null; }
        finally
        {
            workbook.Close(false, oMissiong, oMissiong);
            System.Runtime.InteropServices.Marshal.ReleaseComObject(workbook);
            workbook = null;
            app.Workbooks.Close();
            app.Quit();
            System.Runtime.InteropServices.Marshal.ReleaseComObject(app);
            app = null;
        }
    }

    原文的方法二还提供了多线程处理数据的代码,一并复制到此(此处出现了一个SheetOptions的类型,无法考证其来源,如果知晓,请留言,谢谢。):

    /// <summary>  
    /// 使用COM,多线程读取Excel(1 主线程、4 副线程)  
    /// </summary>  
    /// <param name="excelFilePath">路径</param>  
    /// <returns>DataTabel</returns>  
    public System.Data.DataTable ThreadReadExcel(string excelFilePath)
    {
        Excel.Application app = new Excel.Application();
        Excel.Sheets sheets = null;
        Excel.Workbook workbook = null;
        object oMissiong = System.Reflection.Missing.Value;
        System.Data.DataTable dt = new System.Data.DataTable();
    
        try
        {
            if (app == null)
            {
                return null;
            }
    
            workbook = app.Workbooks.Open(excelFilePath, oMissiong, oMissiong, oMissiong, oMissiong, oMissiong, oMissiong, 
                oMissiong, oMissiong, oMissiong, oMissiong, oMissiong, oMissiong, oMissiong, oMissiong);
    
            //将数据读入到DataTable中——Start    
            sheets = workbook.Worksheets;
            Excel.Worksheet worksheet = (Excel.Worksheet)sheets.get_Item(1);//读取第一张表  
            if (worksheet == null)
                return null;
    
            string cellContent;
            int iRowCount = worksheet.UsedRange.Rows.Count;
            int iColCount = worksheet.UsedRange.Columns.Count;
            Excel.Range range;
    
            //负责列头Start  
            DataColumn dc;
            int ColumnID = 1;
            range = (Excel.Range)worksheet.Cells[1, 1];
            //while (range.Text.ToString().Trim() != "")  
            while (iColCount >= ColumnID)
            {
                dc = new DataColumn();
                dc.DataType = System.Type.GetType("System.String");
    
                string strNewColumnName = range.Text.ToString().Trim();
                if (strNewColumnName.Length == 0) strNewColumnName = "_1";
                //判断列名是否重复  
                for (int i = 1; i < ColumnID; i++)
                {
                    if (dt.Columns[i - 1].ColumnName == strNewColumnName)
                        strNewColumnName = strNewColumnName + "_1";
                }
    
                dc.ColumnName = strNewColumnName;
                dt.Columns.Add(dc);
    
                range = (Excel.Range)worksheet.Cells[1, ++ColumnID];
            }
            //End  
    
            //数据大于500条,使用多进程进行读取数据  
            if (iRowCount - 1 > 500)
            {
                //开始多线程读取数据  
                //新建线程  
                int b2 = (iRowCount - 1) / 10;
                DataTable dt1 = new DataTable("dt1");
                dt1 = dt.Clone();
                SheetOptions sheet1thread = new SheetOptions(worksheet, iColCount, 2, b2 + 1, dt1);
                Thread othread1 = new Thread(new ThreadStart(sheet1thread.SheetToDataTable));
                othread1.Start();
    
                //阻塞 1 毫秒,保证第一个读取 dt1  
                Thread.Sleep(1);
    
                DataTable dt2 = new DataTable("dt2");
                dt2 = dt.Clone();
                SheetOptions sheet2thread = new SheetOptions(worksheet, iColCount, b2 + 2, b2 * 2 + 1, dt2);
                Thread othread2 = new Thread(new ThreadStart(sheet2thread.SheetToDataTable));
                othread2.Start();
    
                DataTable dt3 = new DataTable("dt3");
                dt3 = dt.Clone();
                SheetOptions sheet3thread = new SheetOptions(worksheet, iColCount, b2 * 2 + 2, b2 * 3 + 1, dt3);
                Thread othread3 = new Thread(new ThreadStart(sheet3thread.SheetToDataTable));
                othread3.Start();
    
                DataTable dt4 = new DataTable("dt4");
                dt4 = dt.Clone();
                SheetOptions sheet4thread = new SheetOptions(worksheet, iColCount, b2 * 3 + 2, b2 * 4 + 1, dt4);
                Thread othread4 = new Thread(new ThreadStart(sheet4thread.SheetToDataTable));
                othread4.Start();
    
                //主线程读取剩余数据  
                for (int iRow = b2 * 4 + 2; iRow <= iRowCount; iRow++)
                {
                    DataRow dr = dt.NewRow();
                    for (int iCol = 1; iCol <= iColCount; iCol++)
                    {
                        range = (Excel.Range)worksheet.Cells[iRow, iCol];
                        cellContent = (range.Value2 == null) ? "" : range.Text.ToString();
                        dr[iCol - 1] = cellContent;
                    }
                    dt.Rows.Add(dr);
                }
    
                othread1.Join();
                othread2.Join();
                othread3.Join();
                othread4.Join();
    
                //将多个线程读取出来的数据追加至 dt1 后面  
                foreach (DataRow dr in dt.Rows)
                    dt1.Rows.Add(dr.ItemArray);
                dt.Clear();
                dt.Dispose();
    
                foreach (DataRow dr in dt2.Rows)
                    dt1.Rows.Add(dr.ItemArray);
                dt2.Clear();
                dt2.Dispose();
    
                foreach (DataRow dr in dt3.Rows)
                    dt1.Rows.Add(dr.ItemArray);
                dt3.Clear();
                dt3.Dispose();
    
                foreach (DataRow dr in dt4.Rows)
                    dt1.Rows.Add(dr.ItemArray);
                dt4.Clear();
                dt4.Dispose();
    
                return dt1;
            }
            else
            {
                for (int iRow = 2; iRow <= iRowCount; iRow++)
                {
                    DataRow dr = dt.NewRow();
                    for (int iCol = 1; iCol <= iColCount; iCol++)
                    {
                        range = (Excel.Range)worksheet.Cells[iRow, iCol];
                        cellContent = (range.Value2 == null) ? "" : range.Text.ToString();
                        dr[iCol - 1] = cellContent;
                    }
                    dt.Rows.Add(dr);
                }
            }
            //将数据读入到DataTable中——End  
            return dt;
        }
        catch
        {
            return null;
        }
        finally
        {
            workbook.Close(false, oMissiong, oMissiong);
            System.Runtime.InteropServices.Marshal.ReleaseComObject(workbook);
            System.Runtime.InteropServices.Marshal.ReleaseComObject(sheets);
            workbook = null;
            app.Workbooks.Close();
            app.Quit();
            System.Runtime.InteropServices.Marshal.ReleaseComObject(app);
            app = null;
            GC.Collect();
            GC.WaitForPendingFinalizers();
        }
    }

    原文还提供了第三种方法,感兴趣的可以关心一下:

    方法三:NPOI方式读取Excel,NPOI是一组开源的组件,类似Java的 POI。包括:NPOI、NPOI.HPSF、NPOI.HSSF、NPOI.HSSF.UserModel、NPOI.POIFS、NPOI.Util,下载的时候别只下一个噢

    优点:读取Excel速度较快,读取方式操作灵活性

    缺点:只支持03的Excel,xlsx的无法读取。由于这点,使用这种方式的人不多啊,没理由要求客户使用03版Excel吧,再说03版Excel对于行数还有限制,只支持65536行。

    (听他们的开发人员说会在2012年底推出新版,支持xlsx的读取。但一直很忙没时间去关注这个事情,有兴趣的同学可以瞧瞧去)

    using System;
    using System.Data;
    using System.IO;
    using System.Web;
    using NPOI;
    using NPOI.HPSF;
    using NPOI.HSSF;
    using NPOI.HSSF.UserModel;
    using NPOI.POIFS;
    using NPOI.Util;
    using System.Text;
    using System.Configuration;
    
    public class NPOIHelper
    {
        private static int ExcelMaxRow = Convert.ToInt32(ConfigurationManager.AppSettings["ExcelMaxRow"]);
        /// <summary>  
        /// 由DataSet导出Excel  
        /// </summary>  
        /// <param name="sourceTable">要导出数据的DataTable</param>     
        /// <param name="sheetName">工作表名称</param>  
        /// <returns>Excel工作表</returns>     
        private static Stream ExportDataSetToExcel(DataSet sourceDs)
        {
            HSSFWorkbook workbook = new HSSFWorkbook();
            MemoryStream ms = new MemoryStream();
    
            for (int i = 0; i < sourceDs.Tables.Count; i++)
            {
                HSSFSheet sheet = (HSSFSheet)workbook.CreateSheet(sourceDs.Tables[i].TableName);
                HSSFRow headerRow = (HSSFRow)sheet.CreateRow(0);
                // handling header.             
                foreach (DataColumn column in sourceDs.Tables[i].Columns)
                    headerRow.CreateCell(column.Ordinal).SetCellValue(column.ColumnName);
                // handling value.             
                int rowIndex = 1;
                foreach (DataRow row in sourceDs.Tables[i].Rows)
                {
                    HSSFRow dataRow = (HSSFRow)sheet.CreateRow(rowIndex);
                    foreach (DataColumn column in sourceDs.Tables[i].Columns)
                    {
                        dataRow.CreateCell(column.Ordinal).SetCellValue(row[column].ToString());
                    }
                    rowIndex++;
                }
            }
            workbook.Write(ms);
            ms.Flush();
            ms.Position = 0;
            workbook = null;
            return ms;
        }
        /// <summary>  
        /// 由DataSet导出Excel  
        /// </summary>    
        /// <param name="sourceTable">要导出数据的DataTable</param>  
        /// <param name="fileName">指定Excel工作表名称</param>  
        /// <returns>Excel工作表</returns>     
        public static void ExportDataSetToExcel(DataSet sourceDs, string fileName)
        {
            //检查是否有Table数量超过65325  
            for (int t = 0; t < sourceDs.Tables.Count; t++)
            {
                if (sourceDs.Tables[t].Rows.Count > ExcelMaxRow)
                {
                    DataSet ds = GetdtGroup(sourceDs.Tables[t].Copy());
                    sourceDs.Tables.RemoveAt(t);
                    //将得到的ds插入 sourceDs中  
                    for (int g = 0; g < ds.Tables.Count; g++)
                    {
                        DataTable dt = ds.Tables[g].Copy();
                        sourceDs.Tables.Add(dt);
                    }
                    t--;
                }
            }
    
            MemoryStream ms = ExportDataSetToExcel(sourceDs) as MemoryStream;
            HttpContext.Current.Response.AppendHeader("Content-Disposition", "attachment;filename=" + fileName);
            HttpContext.Current.Response.BinaryWrite(ms.ToArray());
            HttpContext.Current.ApplicationInstance.CompleteRequest();
            //HttpContext.Current.Response.End();  
            ms.Close();
            ms = null;
        }
        /// <summary>  
        /// 由DataTable导出Excel  
        /// </summary>  
        /// <param name="sourceTable">要导出数据的DataTable</param>  
        /// <returns>Excel工作表</returns>     
        private static Stream ExportDataTableToExcel(DataTable sourceTable)
        {
            HSSFWorkbook workbook = new HSSFWorkbook();
            MemoryStream ms = new MemoryStream();
            HSSFSheet sheet = (HSSFSheet)workbook.CreateSheet(sourceTable.TableName);
            HSSFRow headerRow = (HSSFRow)sheet.CreateRow(0);
            // handling header.       
            foreach (DataColumn column in sourceTable.Columns)
                headerRow.CreateCell(column.Ordinal).SetCellValue(column.ColumnName);
            // handling value.       
            int rowIndex = 1;
            foreach (DataRow row in sourceTable.Rows)
            {
                HSSFRow dataRow = (HSSFRow)sheet.CreateRow(rowIndex);
                foreach (DataColumn column in sourceTable.Columns)
                {
                    dataRow.CreateCell(column.Ordinal).SetCellValue(row[column].ToString());
                }
                rowIndex++;
            }
            workbook.Write(ms);
            ms.Flush();
            ms.Position = 0;
            sheet = null;
            headerRow = null;
            workbook = null;
            return ms;
        }
        /// <summary>  
        /// 由DataTable导出Excel  
        /// </summary>  
        /// <param name="sourceTable">要导出数据的DataTable</param>  
        /// <param name="fileName">指定Excel工作表名称</param>  
        /// <returns>Excel工作表</returns>  
        public static void ExportDataTableToExcel(DataTable sourceTable, string fileName)
        {
            //如数据超过65325则分成多个Table导出  
            if (sourceTable.Rows.Count > ExcelMaxRow)
            {
                DataSet ds = GetdtGroup(sourceTable);
                //导出DataSet  
                ExportDataSetToExcel(ds, fileName);
            }
            else
            {
                MemoryStream ms = ExportDataTableToExcel(sourceTable) as MemoryStream;
                HttpContext.Current.Response.AppendHeader("Content-Disposition", "attachment;filename=" + fileName);
                HttpContext.Current.Response.BinaryWrite(ms.ToArray());
                HttpContext.Current.ApplicationInstance.CompleteRequest();
                //HttpContext.Current.Response.End();  
                ms.Close();
                ms = null;
            }
        }
    
        /// <summary>  
        /// 传入行数超过65325的Table,返回DataSet  
        /// </summary>  
        /// <param name="dt"></param>  
        /// <returns></returns>  
        public static DataSet GetdtGroup(DataTable dt)
        {
            string tablename = dt.TableName;
    
            DataSet ds = new DataSet();
            ds.Tables.Add(dt);
    
            double n = dt.Rows.Count / Convert.ToDouble(ExcelMaxRow);
    
            //创建表  
            for (int i = 1; i < n; i++)
            {
                DataTable dtAdd = dt.Clone();
                dtAdd.TableName = tablename + "_" + i.ToString();
                ds.Tables.Add(dtAdd);
            }
    
            //分解数据  
            for (int i = 1; i < ds.Tables.Count; i++)
            {
                //新表行数达到最大 或 基表数量不足  
                while (ds.Tables[i].Rows.Count != ExcelMaxRow && ds.Tables[0].Rows.Count != ExcelMaxRow)
                {
                    ds.Tables[i].Rows.Add(ds.Tables[0].Rows[ExcelMaxRow].ItemArray);
                    ds.Tables[0].Rows.RemoveAt(ExcelMaxRow);
    
                }
            }
    
            return ds;
        }
    
        /// <summary>  
        /// 由DataTable导出Excel  
        /// </summary>  
        /// <param name="sourceTable">要导出数据的DataTable</param>  
        /// <param name="fileName">指定Excel工作表名称</param>  
        /// <returns>Excel工作表</returns>  
        public static void ExportDataTableToExcelModel(DataTable sourceTable, string modelpath, string modelName, string fileName, string sheetName)
        {
            int rowIndex = 2;//从第二行开始,因为前两行是模板里面的内容  
            int colIndex = 0;
            FileStream file = new FileStream(modelpath + modelName + ".xls", FileMode.Open, FileAccess.Read);//读入excel模板  
            HSSFWorkbook hssfworkbook = new HSSFWorkbook(file);
            HSSFSheet sheet1 = (HSSFSheet)hssfworkbook.GetSheet("Sheet1");
            sheet1.GetRow(0).GetCell(0).SetCellValue("excelTitle");      //设置表头  
            foreach (DataRow row in sourceTable.Rows)
            {   //双循环写入sourceTable中的数据  
                rowIndex++;
                colIndex = 0;
                HSSFRow xlsrow = (HSSFRow)sheet1.CreateRow(rowIndex);
                foreach (DataColumn col in sourceTable.Columns)
                {
                    xlsrow.CreateCell(colIndex).SetCellValue(row[col.ColumnName].ToString());
                    colIndex++;
                }
            }
            sheet1.ForceFormulaRecalculation = true;
            FileStream fileS = new FileStream(modelpath + fileName + ".xls", FileMode.Create);//保存  
            hssfworkbook.Write(fileS);
            fileS.Close();
            file.Close();
        }
    }
  • 相关阅读:
    error: Microsoft Visual C++ 14.0 is required.
    pip安装其他包报错
    MapReduce
    机器学习算法使用
    结巴分词使用实例
    大数据——hbase
    机房收费系统系列一:运行时错误‘-2147217843(80040e4d)’;用户‘sa’登陆失败
    耿建玲视频总结
    学生信息管理系统系列三:验收时的改进
    学生信息管理系统系列二:常见问题
  • 原文地址:https://www.cnblogs.com/yellowcool/p/7448049.html
Copyright © 2011-2022 走看看