zoukankan      html  css  js  c++  java
  • Excel导入数据库百万级数据瞬间插入

    前言

       之前公司有个需求,导入几十万的数据,需要从excel读取出来,再把重复的项合并起来导入数据库,当时用程序写的非常慢,光读取数据半小时都下不来,感觉自己写的程序太渣了.

    思路

      1.将Excel文件转换成.csv文件

     2.读取.csv文件到DataTable里 (这个读取速度非常快)

       3.补充数据表的列名,修改数据类型

     4.使用SqlBulkCopy将DataTable中的数据批量插入数据库(这里就是瞬间插入的秘籍)

    实现

      下边直接上代码了 需要nuget安装  Microsoft.Office.Interop.Excel

    using System;
    using System.Collections.Generic;
    using System.ComponentModel;
    using System.Data;
    using System.Drawing;
    using System.Linq;
    using System.Text;
    using System.Threading.Tasks;
    using System.Windows.Forms;
    using System.Diagnostics;
    using System.IO;
    using System.Reflection;
    using Excel=Microsoft.Office.Interop.Excel;
    using System.Data.SqlClient;
    
    namespace excel转csv
    {
        public partial class Form1 : Form
        {
            public Form1()
            {
                InitializeComponent();
            }
    
            /// <summary>
            /// 将Csv文件转换为XLS文件
            /// </summary>
            /// <param name="FilePath">文件全路路径</param>
            /// <returns>返回转换后的Xls文件名</returns>
            public static string CSVSaveasXLS(string FilePath)
            {
                QuertExcel();
                string _NewFilePath = "";
    
                Excel.Application excelApplication;
                Excel.Workbooks excelWorkBooks = null;
                Excel.Workbook excelWorkBook = null;
                Excel.Worksheet excelWorkSheet = null;
    
                try
                {
    //此时报错
    :无法嵌入互操作类型“……”,请改用适用的接口的解决方法

             //解决方案:选中项目中引入的dll,鼠标右键,选择属性,把“嵌入互操作类型”设置为False。
                    excelApplication = new Excel.ApplicationClass();
                    excelWorkBooks = excelApplication.Workbooks;
                    excelWorkBook = ((Excel.Workbook)excelWorkBooks.Open(FilePath, Missing.Value, Missing.Value, Missing.Value, Missing.Value, Missing.Value, Missing.Value, Missing.Value, Missing.Value, Missing.Value, Missing.Value, Missing.Value, Missing.Value, Missing.Value, Missing.Value));
                    excelWorkSheet = (Excel.Worksheet)excelWorkBook.Worksheets[1];
                    excelApplication.Visible = false;
                    excelApplication.DisplayAlerts = false;
                    _NewFilePath = FilePath.Replace(".csv", ".xls");
                    excelWorkBook.SaveAs(_NewFilePath, Excel.XlFileFormat.xlAddIn, Missing.Value, Missing.Value, Missing.Value, Missing.Value, Excel.XlSaveAsAccessMode.xlNoChange, Missing.Value, Missing.Value, Missing.Value, Missing.Value, Missing.Value);
                    excelWorkBook.Close();
                    QuertExcel();
                    // ExcelFormatHelper.DeleteFile(FilePath);
                    //可以不用杀掉进程QuertExcel();
    
    
                    GC.Collect(System.GC.GetGeneration(excelWorkSheet));
                    GC.Collect(System.GC.GetGeneration(excelWorkBook));
                    GC.Collect(System.GC.GetGeneration(excelApplication));
    
                }
                catch (Exception exc)
                {
                    throw new Exception(exc.Message);
                }
    
                finally
                {
                    GC.Collect();
                }
    
                return _NewFilePath;
            }
    
    
    
            /// <summary>
            /// 将xls文件转换为csv文件
            /// </summary>
            /// <param name="FilePath">文件全路路径</param>
            /// <returns>返回转换后的csv文件名</returns>
            public static string XLSSavesaCSV(string FilePath)
            {
                QuertExcel();
                string _NewFilePath = "";
    
                Excel.Application excelApplication;
                Excel.Workbooks excelWorkBooks = null;
                Excel.Workbook excelWorkBook = null;
                Excel.Worksheet excelWorkSheet = null;
    
                try
                {
                    excelApplication = new Excel.ApplicationClass();
                    excelWorkBooks = excelApplication.Workbooks;
                    excelWorkBook = ((Excel.Workbook)excelWorkBooks.Open(FilePath, Missing.Value, Missing.Value, Missing.Value, Missing.Value, Missing.Value, Missing.Value, Missing.Value, Missing.Value, Missing.Value, Missing.Value, Missing.Value, Missing.Value, Missing.Value, Missing.Value));
                    excelWorkSheet = (Excel.Worksheet)excelWorkBook.Worksheets[1];
                    excelApplication.Visible = false;
                    excelApplication.DisplayAlerts = false;
                    _NewFilePath = FilePath.Replace(".xlsx", ".csv");
    
                    //excelWorkSheet._SaveAs(FilePath, Excel.XlFileFormat.xlCSVWindows, Missing.Value, Missing.Value, Missing.Value,Missing.Value,Missing.Value, Missing.Value, Missing.Value);
                    excelWorkBook.SaveAs(_NewFilePath, Excel.XlFileFormat.xlCSV, Missing.Value, Missing.Value, Missing.Value, Missing.Value, Microsoft.Office.Interop.Excel.XlSaveAsAccessMode.xlNoChange, Missing.Value, Missing.Value, Missing.Value, Missing.Value, Missing.Value);
                    QuertExcel();
                    //ExcelFormatHelper.DeleteFile(FilePath);
                }
                catch (Exception exc)
                {
                    throw new Exception(exc.Message);
                }
                return _NewFilePath;
            }
    
    
    
            /// <summary>
            /// 删除一个指定的文件
            /// </summary>
            /// <param name="FilePath">文件路径</param>
            /// <returns></returns>
            public static bool DeleteFile(string FilePath)
            {
                try
                {
                    bool IsFind = File.Exists(FilePath);
                    if (IsFind)
                    {
                        File.Delete(FilePath);
                    }
                    else
                    {
                        throw new IOException("指定的文件不存在");
                    }
                    return true;
                }
                catch (Exception exc)
                {
                    throw new Exception(exc.Message);
                }
    
            }
    
    
            /// <summary>
            /// 执行过程中可能会打开多个EXCEL文件 所以杀掉
            /// </summary>
            private static void QuertExcel()
            {
                Process[] excels = Process.GetProcessesByName("EXCEL");
                foreach (var item in excels)
                {
                    item.Kill();
                }
            }
    
    
            private void Button1_Click(object sender, EventArgs e)
            {
                //F:资料资料demo大数据瞬间入库WebApplication1Content客户信息.csv
                //CSVSaveasXLS(textBox1.Text);
                //F:资料资料demo大数据瞬间入库WebApplication1Content客户信息.xlsx
                //E:客户信息模板.xlsx
                var aa =  XLSSavesaCSV(textBox1.Text);
                label1.Text = aa;
    
                //让主线程停顿两秒 负责下边读取csv文件太快会显示占用 
                System.Threading.Thread.Sleep(2000);  //2秒
    
                var dt= OpenCSV(aa);
                dt.Columns["序号"].ColumnName = "Id";
                //有数据则不能改类型
                //dt.Columns["Id"].DataType = Type.GetType("System.Int32");
                dt.Columns["公司名称"].ColumnName = "Company";
                dt.Columns["公司注册地址"].ColumnName = "Address";
                dt.Columns["公司营业范围"].ColumnName = "Business";
                dt.Columns["注册资金"].ColumnName = "RegCapital";
                dt.Columns["投保人数"].ColumnName = "EmployeesNum";
                dt.Columns["法人姓名"].ColumnName = "Boss";
                dt.Columns["备注"].ColumnName = "Remark";
                dt.Columns.Add("Status", Type.GetType("System.Int32")).SetOrdinal(7);
                dt.Columns.Add("StatusTime", Type.GetType("System.DateTime")).SetOrdinal(8);
                dt.Columns.Add("Type", Type.GetType("System.Int32")).SetOrdinal(9);
                dt.Columns.Add("TypeTime", Type.GetType("System.DateTime")).SetOrdinal(10);
                dt.Columns.Add("Level", Type.GetType("System.String")).SetOrdinal(11);
                dt.Columns.Add("PrePurchase", Type.GetType("System.Decimal")).SetOrdinal(12);
                dt.Columns.Add("Description", Type.GetType("System.String")).SetOrdinal(13);
                dt.Columns.Add("YewuId", Type.GetType("System.Int32")).SetOrdinal(14);
                dt.Columns.Add("YewuName", Type.GetType("System.String")).SetOrdinal(15);
                dt.Columns.Add("IsDel", Type.GetType("System.Int32")).SetOrdinal(16);
                dt.Columns.Add("Createtime",Type.GetType("System.DateTime")).SetOrdinal(18);
    
                //有数据的列要修改类型进入以下方法
                dt = UpdateDataTable(dt);
    
                var newCon = "server=127.0.0.1;database=its;uid=sa;pwd=1;";
    
                SqlBulkCopyInsert(newCon, "MallCustomer", dt);
            }
    
            /// <summary>
            /// 修改数据表DataTable某一列的类型和记录值(正确步骤:1.克隆表结构,2.修改列类型,3.修改记录值,4.返回希望的结果)
            /// </summary>
            /// <param name="argDataTable">数据表DataTable</param>
            /// <returns>数据表DataTable</returns>  
            private DataTable UpdateDataTable(DataTable argDataTable)
            {
                DataTable dtResult = new DataTable();
                //克隆表结构
                dtResult = argDataTable.Clone();
                foreach (DataColumn col in dtResult.Columns)
                {
                    //修改列的数据格式  
                    switch (col.ColumnName)
                    {
                        case "Id":
                            col.DataType = typeof(Int32);
                            break;
                        case "RegCapital":
                            col.DataType = typeof(Decimal);
                            break;
                        case "EmployeesNum":
                            col.DataType = typeof(Int32);
                            break;
                    }
                }
                string company = "";
                foreach (DataRow row in argDataTable.Rows)
                {
                    DataRow rowNew = dtResult.NewRow();
                    rowNew["Id"] =Convert.ToInt32(row["Id"].ToString() == "" ? 0 : row["Id"]).ToString();
                    var aaa = row["EmployeesNum"];
                    //修改记录值
                    rowNew["EmployeesNum"] = Convert.ToInt32(row["EmployeesNum"].ToString()==""?0: row["EmployeesNum"]).ToString();
                    var aaa1 = row["RegCapital"];
                    rowNew["RegCapital"] = Convert.ToDecimal(row["RegCapital"].ToString() == "" ? 0 : row["RegCapital"]).ToString();
                    rowNew["Company"] = row["Company"];
                    rowNew["Address"] = row["Address"];
                    rowNew["Business"] = row["Business"];
                    rowNew["Boss"] = row["Boss"];
                    rowNew["Remark"] = row["Remark"];
                    rowNew["Status"] = row["Status"];
                    rowNew["StatusTime"] = row["StatusTime"];
                    rowNew["Type"] = row["Type"];
                    rowNew["TypeTime"] = row["TypeTime"];
                    rowNew["Level"] = row["Level"];
                    rowNew["PrePurchase"] = row["PrePurchase"];
                    rowNew["Description"] = row["Description"];
                    rowNew["YewuId"] = row["YewuId"];
                    rowNew["YewuName"] = row["YewuName"];
                    rowNew["IsDel"] = 0.ToString();
                    rowNew["Createtime"] = DateTime.Now.ToString();
    
                    //合并重复项
                    if (company == row["Company"].ToString())
                    {
                        dtResult.Rows[dtResult.Rows.Count - 1]["Remark"] +=","+ row["Remark"].ToString();
                    }
                    else
                    {
                        dtResult.Rows.Add(rowNew);
                        company = row["Company"].ToString();
                    }
                }
                return dtResult;
            }
    
    
            /// <summary>
            /// 将CSV文件的数据读取到DataTable中
            /// </summary>
            /// <param name="fileName">CSV文件路径</param>
            /// <returns>返回读取了CSV数据的DataTable</returns>
            public static DataTable OpenCSV(string filePath)
            {
                //中文乱码 如果乱码 可以改下编码格式
                Encoding encoding = Encoding.Default; //Encoding.ASCII;//
                DataTable dt = new DataTable();
                FileStream fs = new FileStream(filePath, System.IO.FileMode.Open, System.IO.FileAccess.Read);
    
                //StreamReader sr = new StreamReader(fs, Encoding.UTF8);
                StreamReader sr = new StreamReader(fs, encoding);
                //string fileContent = sr.ReadToEnd();
                //encoding = sr.CurrentEncoding;
                //记录每次读取的一行记录
                string strLine = "";
                //记录每行记录中的各字段内容
                string[] aryLine = null;
                string[] tableHead = null;
                //标示列数
                int columnCount = 0;
                //标示是否是读取的第一行
                bool IsFirst = true;
                //逐行读取CSV中的数据
                while ((strLine = sr.ReadLine()) != null)
                {
                    //strLine = Common.ConvertStringUTF8(strLine, encoding);
                    //strLine = Common.ConvertStringUTF8(strLine);
    
                    if (IsFirst == true)
                    {
                        tableHead = strLine.Split(',');
                        IsFirst = false;
                        columnCount = tableHead.Length;
                        //创建列
                        for (int i = 0; i < columnCount; i++)
                        {
                            DataColumn dc = new DataColumn(tableHead[i]);
                            dt.Columns.Add(dc);
                        }
                    }
                    else
                    {
                        if (!String.IsNullOrEmpty(strLine))
                        {
                            aryLine = strLine.Split(',');
                            DataRow dr = dt.NewRow();
                            for (int j = 0; j < columnCount; j++)
                            {
                                dr[j] = aryLine[j];
                            }
                            dt.Rows.Add(dr);
                        }
                    }
                }
                if (aryLine != null && aryLine.Length > 0)
                {
                    dt.DefaultView.Sort = tableHead[0] + " " + "asc";
                }
    
                sr.Close();
                fs.Close();
                return dt;
            }
    
    
            #region 使用SqlBulkCopy将DataTable中的数据批量插入数据库中
            /// <summary>  
            /// 注意:DataTable中的列需要与数据库表中的列完全一致。/// </summary>  
            /// <param name="conStr">数据库连接串</param>
            /// <param name="strTableName">数据库中对应的表名</param>  
            /// <param name="dtData">数据集</param>  
            public static void SqlBulkCopyInsert(string conStr, string strTableName, DataTable dtData)
            {
                try
                {
                    using (SqlBulkCopy sqlRevdBulkCopy = new SqlBulkCopy(conStr))           //引用SqlBulkCopy  
                    {
                        sqlRevdBulkCopy.DestinationTableName = strTableName;                //数据库中对应的表名  
                        sqlRevdBulkCopy.NotifyAfter = dtData.Rows.Count;                    //有几行数据  
                        sqlRevdBulkCopy.WriteToServer(dtData);                              //数据导入数据库  
                        sqlRevdBulkCopy.Close();                                            //关闭连接  
                    }
                }
                catch (Exception ex)
                {
              //这里可能会报一些数据类型的错误,有以下几种解决办法
              //1.DataTable的数据类型要与数据库一致,列的顺序,列名大小写都要一致
    //2.数据太长,如数据库类型是varchar(50),当前数据太长超过50就会报错,可以修改数据库类型
    throw (ex); } } #endregion } }


    这样百万级数据就能瞬间插入数据库了 

  • 相关阅读:
    我来了
    性能分析:处理器、磁盘I/O、进程、网络分析方法 http://www.cnblogs.com/fnng/archive/2012/10/30/2747246.html
    jvisualvm监控服务器状态
    linux下常用监控命令
    app 常见网络性能
    native app ->hybrid app->web app的发展
    JMeter远程启动客户端总是不通的原因
    java机制
    webbench,linux下并发测试工具
    操作数数据类型 ntext 对于 max 运算符无效
  • 原文地址:https://www.cnblogs.com/LiChen19951127/p/10950197.html
Copyright © 2011-2022 走看看