前言
之前公司有个需求,导入几十万的数据,需要从excel读取出来,再把重复的项合并起来导入数据库,当时用程序写的非常慢,光读取数据半小时都下不来,感觉自己写的程序太渣了.
思路
1.将Excel文件转换成.csv文件
2.读取.csv文件到DataTable里 (这个读取速度非常快)
3.补充数据表的列名,修改数据类型
4.使用SqlBulkCopy将DataTable中的数据批量插入数据库(这里就是瞬间插入的秘籍)
实现
下边直接上代码了 需要nuget安装 Microsoft.Office.Interop.Excel
using System; using System.Collections.Generic; using System.ComponentModel; using System.Data; using System.Drawing; using System.Linq; using System.Text; using System.Threading.Tasks; using System.Windows.Forms; using System.Diagnostics; using System.IO; using System.Reflection; using Excel=Microsoft.Office.Interop.Excel; using System.Data.SqlClient; namespace excel转csv { public partial class Form1 : Form { public Form1() { InitializeComponent(); } /// <summary> /// 将Csv文件转换为XLS文件 /// </summary> /// <param name="FilePath">文件全路路径</param> /// <returns>返回转换后的Xls文件名</returns> public static string CSVSaveasXLS(string FilePath) { QuertExcel(); string _NewFilePath = ""; Excel.Application excelApplication; Excel.Workbooks excelWorkBooks = null; Excel.Workbook excelWorkBook = null; Excel.Worksheet excelWorkSheet = null; try {
//此时报错:无法嵌入互操作类型“……”,请改用适用的接口的解决方法
//解决方案:选中项目中引入的dll,鼠标右键,选择属性,把“嵌入互操作类型”设置为False。
excelApplication = new Excel.ApplicationClass(); excelWorkBooks = excelApplication.Workbooks; excelWorkBook = ((Excel.Workbook)excelWorkBooks.Open(FilePath, Missing.Value, Missing.Value, Missing.Value, Missing.Value, Missing.Value, Missing.Value, Missing.Value, Missing.Value, Missing.Value, Missing.Value, Missing.Value, Missing.Value, Missing.Value, Missing.Value)); excelWorkSheet = (Excel.Worksheet)excelWorkBook.Worksheets[1]; excelApplication.Visible = false; excelApplication.DisplayAlerts = false; _NewFilePath = FilePath.Replace(".csv", ".xls"); excelWorkBook.SaveAs(_NewFilePath, Excel.XlFileFormat.xlAddIn, Missing.Value, Missing.Value, Missing.Value, Missing.Value, Excel.XlSaveAsAccessMode.xlNoChange, Missing.Value, Missing.Value, Missing.Value, Missing.Value, Missing.Value); excelWorkBook.Close(); QuertExcel(); // ExcelFormatHelper.DeleteFile(FilePath); //可以不用杀掉进程QuertExcel(); GC.Collect(System.GC.GetGeneration(excelWorkSheet)); GC.Collect(System.GC.GetGeneration(excelWorkBook)); GC.Collect(System.GC.GetGeneration(excelApplication)); } catch (Exception exc) { throw new Exception(exc.Message); } finally { GC.Collect(); } return _NewFilePath; } /// <summary> /// 将xls文件转换为csv文件 /// </summary> /// <param name="FilePath">文件全路路径</param> /// <returns>返回转换后的csv文件名</returns> public static string XLSSavesaCSV(string FilePath) { QuertExcel(); string _NewFilePath = ""; Excel.Application excelApplication; Excel.Workbooks excelWorkBooks = null; Excel.Workbook excelWorkBook = null; Excel.Worksheet excelWorkSheet = null; try { excelApplication = new Excel.ApplicationClass(); excelWorkBooks = excelApplication.Workbooks; excelWorkBook = ((Excel.Workbook)excelWorkBooks.Open(FilePath, Missing.Value, Missing.Value, Missing.Value, Missing.Value, Missing.Value, Missing.Value, Missing.Value, Missing.Value, Missing.Value, Missing.Value, Missing.Value, Missing.Value, Missing.Value, Missing.Value)); excelWorkSheet = (Excel.Worksheet)excelWorkBook.Worksheets[1]; excelApplication.Visible = false; excelApplication.DisplayAlerts = false; _NewFilePath = FilePath.Replace(".xlsx", ".csv"); //excelWorkSheet._SaveAs(FilePath, Excel.XlFileFormat.xlCSVWindows, Missing.Value, Missing.Value, Missing.Value,Missing.Value,Missing.Value, Missing.Value, Missing.Value); excelWorkBook.SaveAs(_NewFilePath, Excel.XlFileFormat.xlCSV, Missing.Value, Missing.Value, Missing.Value, Missing.Value, Microsoft.Office.Interop.Excel.XlSaveAsAccessMode.xlNoChange, Missing.Value, Missing.Value, Missing.Value, Missing.Value, Missing.Value); QuertExcel(); //ExcelFormatHelper.DeleteFile(FilePath); } catch (Exception exc) { throw new Exception(exc.Message); } return _NewFilePath; } /// <summary> /// 删除一个指定的文件 /// </summary> /// <param name="FilePath">文件路径</param> /// <returns></returns> public static bool DeleteFile(string FilePath) { try { bool IsFind = File.Exists(FilePath); if (IsFind) { File.Delete(FilePath); } else { throw new IOException("指定的文件不存在"); } return true; } catch (Exception exc) { throw new Exception(exc.Message); } } /// <summary> /// 执行过程中可能会打开多个EXCEL文件 所以杀掉 /// </summary> private static void QuertExcel() { Process[] excels = Process.GetProcessesByName("EXCEL"); foreach (var item in excels) { item.Kill(); } } private void Button1_Click(object sender, EventArgs e) { //F:资料资料demo大数据瞬间入库WebApplication1Content客户信息.csv //CSVSaveasXLS(textBox1.Text); //F:资料资料demo大数据瞬间入库WebApplication1Content客户信息.xlsx //E:客户信息模板.xlsx var aa = XLSSavesaCSV(textBox1.Text); label1.Text = aa; //让主线程停顿两秒 负责下边读取csv文件太快会显示占用 System.Threading.Thread.Sleep(2000); //2秒 var dt= OpenCSV(aa); dt.Columns["序号"].ColumnName = "Id"; //有数据则不能改类型 //dt.Columns["Id"].DataType = Type.GetType("System.Int32"); dt.Columns["公司名称"].ColumnName = "Company"; dt.Columns["公司注册地址"].ColumnName = "Address"; dt.Columns["公司营业范围"].ColumnName = "Business"; dt.Columns["注册资金"].ColumnName = "RegCapital"; dt.Columns["投保人数"].ColumnName = "EmployeesNum"; dt.Columns["法人姓名"].ColumnName = "Boss"; dt.Columns["备注"].ColumnName = "Remark"; dt.Columns.Add("Status", Type.GetType("System.Int32")).SetOrdinal(7); dt.Columns.Add("StatusTime", Type.GetType("System.DateTime")).SetOrdinal(8); dt.Columns.Add("Type", Type.GetType("System.Int32")).SetOrdinal(9); dt.Columns.Add("TypeTime", Type.GetType("System.DateTime")).SetOrdinal(10); dt.Columns.Add("Level", Type.GetType("System.String")).SetOrdinal(11); dt.Columns.Add("PrePurchase", Type.GetType("System.Decimal")).SetOrdinal(12); dt.Columns.Add("Description", Type.GetType("System.String")).SetOrdinal(13); dt.Columns.Add("YewuId", Type.GetType("System.Int32")).SetOrdinal(14); dt.Columns.Add("YewuName", Type.GetType("System.String")).SetOrdinal(15); dt.Columns.Add("IsDel", Type.GetType("System.Int32")).SetOrdinal(16); dt.Columns.Add("Createtime",Type.GetType("System.DateTime")).SetOrdinal(18); //有数据的列要修改类型进入以下方法 dt = UpdateDataTable(dt); var newCon = "server=127.0.0.1;database=its;uid=sa;pwd=1;"; SqlBulkCopyInsert(newCon, "MallCustomer", dt); } /// <summary> /// 修改数据表DataTable某一列的类型和记录值(正确步骤:1.克隆表结构,2.修改列类型,3.修改记录值,4.返回希望的结果) /// </summary> /// <param name="argDataTable">数据表DataTable</param> /// <returns>数据表DataTable</returns> private DataTable UpdateDataTable(DataTable argDataTable) { DataTable dtResult = new DataTable(); //克隆表结构 dtResult = argDataTable.Clone(); foreach (DataColumn col in dtResult.Columns) { //修改列的数据格式 switch (col.ColumnName) { case "Id": col.DataType = typeof(Int32); break; case "RegCapital": col.DataType = typeof(Decimal); break; case "EmployeesNum": col.DataType = typeof(Int32); break; } } string company = ""; foreach (DataRow row in argDataTable.Rows) { DataRow rowNew = dtResult.NewRow(); rowNew["Id"] =Convert.ToInt32(row["Id"].ToString() == "" ? 0 : row["Id"]).ToString(); var aaa = row["EmployeesNum"]; //修改记录值 rowNew["EmployeesNum"] = Convert.ToInt32(row["EmployeesNum"].ToString()==""?0: row["EmployeesNum"]).ToString(); var aaa1 = row["RegCapital"]; rowNew["RegCapital"] = Convert.ToDecimal(row["RegCapital"].ToString() == "" ? 0 : row["RegCapital"]).ToString(); rowNew["Company"] = row["Company"]; rowNew["Address"] = row["Address"]; rowNew["Business"] = row["Business"]; rowNew["Boss"] = row["Boss"]; rowNew["Remark"] = row["Remark"]; rowNew["Status"] = row["Status"]; rowNew["StatusTime"] = row["StatusTime"]; rowNew["Type"] = row["Type"]; rowNew["TypeTime"] = row["TypeTime"]; rowNew["Level"] = row["Level"]; rowNew["PrePurchase"] = row["PrePurchase"]; rowNew["Description"] = row["Description"]; rowNew["YewuId"] = row["YewuId"]; rowNew["YewuName"] = row["YewuName"]; rowNew["IsDel"] = 0.ToString(); rowNew["Createtime"] = DateTime.Now.ToString(); //合并重复项 if (company == row["Company"].ToString()) { dtResult.Rows[dtResult.Rows.Count - 1]["Remark"] +=","+ row["Remark"].ToString(); } else { dtResult.Rows.Add(rowNew); company = row["Company"].ToString(); } } return dtResult; } /// <summary> /// 将CSV文件的数据读取到DataTable中 /// </summary> /// <param name="fileName">CSV文件路径</param> /// <returns>返回读取了CSV数据的DataTable</returns> public static DataTable OpenCSV(string filePath) { //中文乱码 如果乱码 可以改下编码格式 Encoding encoding = Encoding.Default; //Encoding.ASCII;// DataTable dt = new DataTable(); FileStream fs = new FileStream(filePath, System.IO.FileMode.Open, System.IO.FileAccess.Read); //StreamReader sr = new StreamReader(fs, Encoding.UTF8); StreamReader sr = new StreamReader(fs, encoding); //string fileContent = sr.ReadToEnd(); //encoding = sr.CurrentEncoding; //记录每次读取的一行记录 string strLine = ""; //记录每行记录中的各字段内容 string[] aryLine = null; string[] tableHead = null; //标示列数 int columnCount = 0; //标示是否是读取的第一行 bool IsFirst = true; //逐行读取CSV中的数据 while ((strLine = sr.ReadLine()) != null) { //strLine = Common.ConvertStringUTF8(strLine, encoding); //strLine = Common.ConvertStringUTF8(strLine); if (IsFirst == true) { tableHead = strLine.Split(','); IsFirst = false; columnCount = tableHead.Length; //创建列 for (int i = 0; i < columnCount; i++) { DataColumn dc = new DataColumn(tableHead[i]); dt.Columns.Add(dc); } } else { if (!String.IsNullOrEmpty(strLine)) { aryLine = strLine.Split(','); DataRow dr = dt.NewRow(); for (int j = 0; j < columnCount; j++) { dr[j] = aryLine[j]; } dt.Rows.Add(dr); } } } if (aryLine != null && aryLine.Length > 0) { dt.DefaultView.Sort = tableHead[0] + " " + "asc"; } sr.Close(); fs.Close(); return dt; } #region 使用SqlBulkCopy将DataTable中的数据批量插入数据库中 /// <summary> /// 注意:DataTable中的列需要与数据库表中的列完全一致。/// </summary> /// <param name="conStr">数据库连接串</param> /// <param name="strTableName">数据库中对应的表名</param> /// <param name="dtData">数据集</param> public static void SqlBulkCopyInsert(string conStr, string strTableName, DataTable dtData) { try { using (SqlBulkCopy sqlRevdBulkCopy = new SqlBulkCopy(conStr)) //引用SqlBulkCopy { sqlRevdBulkCopy.DestinationTableName = strTableName; //数据库中对应的表名 sqlRevdBulkCopy.NotifyAfter = dtData.Rows.Count; //有几行数据 sqlRevdBulkCopy.WriteToServer(dtData); //数据导入数据库 sqlRevdBulkCopy.Close(); //关闭连接 } } catch (Exception ex) {
//这里可能会报一些数据类型的错误,有以下几种解决办法
//1.DataTable的数据类型要与数据库一致,列的顺序,列名大小写都要一致
//2.数据太长,如数据库类型是varchar(50),当前数据太长超过50就会报错,可以修改数据库类型
throw (ex); } } #endregion } }
这样百万级数据就能瞬间插入数据库了