zoukankan      html  css  js  c++  java
  • C# 读取 csv(大文件)

    上次读取了excel发现还是很慢(结果集为DataTable),后来研究了一下csv这个文件效率很高呀,特别是针对大文件的时候,话不多说上代码!

    本机配置:win10 i5900F 16G 500G固态

    1、csv文件帮助类

      1 public static class CsvHelper
      2     {
      3         /// <summary>
      4         /// 创建CSV文件并写入内容
      5         /// </summary>
      6         /// <param name="dt">DataTable</param>
      7         /// <param name="fileName">文件全名</param>
      8         /// <returns>是否写入成功</returns>
      9         public static Boolean SaveCSV(DataTable dt, string fullFileName)
     10         {
     11             Boolean r = false;
     12             FileStream fs = new FileStream(fullFileName, System.IO.FileMode.Create, System.IO.FileAccess.Write);
     13             StreamWriter sw = new StreamWriter(fs, System.Text.Encoding.Default);
     14             string data = "";
     15 
     16             //写出列名称
     17             for (int i = 0; i < dt.Columns.Count; i++)
     18             {
     19                 data += dt.Columns[i].ColumnName.ToString();
     20                 if (i < dt.Columns.Count - 1)
     21                 {
     22                     data += ",";
     23                 }
     24             }
     25             sw.WriteLine(data);
     26 
     27             //写出各行数据
     28             for (int i = 0; i < dt.Rows.Count; i++)
     29             {
     30                 data = "";
     31                 for (int j = 0; j < dt.Columns.Count; j++)
     32                 {
     33                     data += dt.Rows[i][j].ToString();
     34                     if (j < dt.Columns.Count - 1)
     35                     {
     36                         data += ",";
     37                     }
     38                 }
     39                 sw.WriteLine(data);
     40             }
     41 
     42             sw.Close();
     43             fs.Close();
     44 
     45             r = true;
     46             return r;
     47         }
     48 
     49         /// <summary>
     50         /// 读CSV 文件
     51         /// </summary>
     52         /// <param name="fileName">文件全名</param>
     53         /// <returns>DataTable</returns>
     54         public static DataTable ReadCSV(string fullFileName)
     55         {
     56             return ReadCSV(fullFileName, 0, 0, 0, 0, true);
     57         }
     58 
     59         /// <summary>
     60         /// 读CSV 文件
     61         /// </summary>
     62         /// <param name="fileName">文件全名</param>
     63         /// <param name="firstRow">开始行</param>
     64         /// <param name="firstColumn">开始列</param>
     65         /// <param name="getRows">获取多少行</param>
     66         /// <param name="getColumns">获取多少列</param>
     67         /// <param name="haveTitleRow">是有标题行</param>
     68         /// <returns>DataTable</returns>
     69         public static DataTable ReadCSV(string fullFileName, Int16 firstRow = 0, Int16 firstColumn = 0, Int16 getRows = 0, Int16 getColumns = 0, bool haveTitleRow = true)
     70         {
     71             DataTable dt = new DataTable();
     72             FileStream fs = new FileStream(fullFileName, System.IO.FileMode.Open, System.IO.FileAccess.Read);
     73             StreamReader sr = new StreamReader(fs, System.Text.Encoding.Default);
     74             try
     75             {
     76                 string strLine = "";//记录每次读取的一行记录
     77                 string[] aryLine;//记录每行记录中的各字段内容
     78                 int columnCount = 0; //标示列数
     79                 bool bCreateTableColumns = false;//是否已建立了表的字段
     80                 int iRow = 1;//第几行
     81 
     82                 if (firstRow > 0) //去除无用行
     83                 {
     84                     for (int i = 1; i < firstRow; i++)
     85                     {
     86                         sr.ReadLine();
     87                     }
     88                 }
     89                 string[] separators = { "," };// { ",", ".", "!", "?", ";", ":", " " };
     90                 while ((strLine = sr.ReadLine()) != null)//逐行读取CSV中的数据
     91                 {
     92                     strLine = strLine.Trim();
     93                     aryLine = strLine.Split(separators, System.StringSplitOptions.RemoveEmptyEntries);
     94 
     95                     if (bCreateTableColumns == false)
     96                     {
     97                         bCreateTableColumns = true;
     98                         columnCount = aryLine.Length;
     99                         //创建列
    100                         for (int i = firstColumn; i < (getColumns == 0 ? columnCount : firstColumn + getColumns); i++)
    101                         {
    102                             DataColumn dc = new DataColumn(haveTitleRow == true ? aryLine[i] : "COL" + i.ToString());
    103                             dt.Columns.Add(dc);
    104                         }
    105 
    106                         bCreateTableColumns = true;
    107 
    108                         if (haveTitleRow == true)
    109                         {
    110                             continue;
    111                         }
    112                     }
    113 
    114                     DataRow dr = dt.NewRow();
    115                     for (int j = firstColumn; j < (getColumns == 0 ? columnCount : firstColumn + getColumns); j++)
    116                     {
    117                         dr[j - firstColumn] = aryLine[j];
    118                     }
    119                     dt.Rows.Add(dr);
    120 
    121                     iRow = iRow + 1;
    122                     if (getRows > 0)
    123                     {
    124                         if (iRow > getRows)
    125                         {
    126                             break;
    127                         }
    128                     }
    129                 }
    130             }
    131             catch (Exception ex)
    132             {
    133                 //异常处理
    134             }
    135             finally
    136             {
    137                 sr.Close();
    138                 fs.Close();
    139             }
    140             return dt;
    141         }
    142 
    143     }
    View Code

    2、使用

    2.1、创建csv文件,数据量为100W,21行

     1  public void CSV_Create()
     2         {
     3             string filePath = @"C:UsersAdministratorDesktop大数据.csv";
     4 
     5             #region 填充DataTable
     6             DataTable tblDatas = new DataTable("Datas");
     7             DataColumn dc = null;
     8             dc = tblDatas.Columns.Add("ID", Type.GetType("System.Int32"));
     9             dc.AutoIncrement = true;//自动增加
    10             dc.AutoIncrementSeed = 1;//起始为1
    11             dc.AutoIncrementStep = 1;//步长为1
    12             dc.AllowDBNull = false;//
    13 
    14             for (int i = 1; i < 21; i++)
    15             {
    16                 dc = tblDatas.Columns.Add("p" + i, Type.GetType("System.String"));
    17             }
    18 
    19             DataRow newRow = tblDatas.NewRow();
    20 
    21             for (int i = 0; i < 1000000; i++)
    22             {
    23                 newRow = tblDatas.NewRow();
    24                 newRow["p1"] = "大话西游大话西游大话西游大话西游" + i;
    25                 newRow["p2"] = "大话西游大话西游大话西游大话西游" + i;
    26                 newRow["p3"] = "大话西游大话西游大话西游大话西游" + i;
    27                 newRow["p4"] = "大话西游大话西游大话西游大话西游" + i;
    28                 newRow["p5"] = "大话西游大话西游大话西游大话西游" + i;
    29                 newRow["p6"] = "大话西游大话西游大话西游大话西游" + i;
    30                 newRow["p7"] = "大话西游大话西游大话西游大话西游" + i;
    31                 newRow["p8"] = "大话西游大话西游大话西游大话西游" + i;
    32                 newRow["p9"] = "大话西游大话西游大话西游大话西游" + i;
    33                 newRow["p10"] = "大话西游大话西游大话西游大话西游" + i;
    34                 newRow["p11"] = "大话西游大话西游大话西游大话西游" + i;
    35                 newRow["p12"] = "大话西游大话西游大话西游大话西游" + i;
    36                 newRow["p13"] = "大话西游大话西游大话西游大话西游" + i;
    37                 newRow["p14"] = "大话西游大话西游大话西游大话西游" + i;
    38                 newRow["p15"] = "大话西游大话西游大话西游大话西游" + i;
    39                 newRow["p16"] = "大话西游大话西游大话西游大话西游" + i;
    40                 newRow["p17"] = "大话西游大话西游大话西游大话西游" + i;
    41                 newRow["p18"] = "大话西游大话西游大话西游大话西游" + i;
    42                 newRow["p19"] = "大话西游大话西游大话西游大话西游" + i;
    43                 newRow["p20"] = "大话西游大话西游大话西游大话西游" + i;
    44                 tblDatas.Rows.Add(newRow);
    45             }
    46 
    47             #endregion
    48 
    49             Stopwatch sw = new Stopwatch();
    50             sw.Start();
    51 
    52             CsvHelper.SaveCSV(tblDatas, filePath);
    53 
    54             System.IO.FileInfo fileInfo = new System.IO.FileInfo(filePath);
    55             log.Info("生成.csv文件," + filePath + ",文件大小" + System.Math.Ceiling((fileInfo.Length / 1024.0) / 1024) + " M" + ",耗时:" + sw.Elapsed);
    56         }
    View Code

    耗时大概20秒左右,文件大小750M左右。

    2.2、读csv文件

    1  public void CSV_Read()
    2         {
    3             Stopwatch sw = new Stopwatch();
    4             sw.Start();
    5             string path = @"C:UsersAdministratorDesktop大数据.csv";
    6             DataTable dt = CsvHelper.ReadCSV(path);
    7             log.Info(path + ",文件读取完成,数据条数" + dt.Rows.Count / 10000 + "万,耗时:" + sw.Elapsed);
    8         }
    View Code

    生成Datatable类型的结果集,耗时10秒左右,测试结果log日志内容如下:

    感谢:https://www.cnblogs.com/fiozhao/p/3225112.html

  • 相关阅读:
    C++学习9 this指针详解
    福建省第八届 Triangles
    UVA 11584 Partitioning by Palindromes
    POJ 2752 Seek the Name, Seek the Fame
    UVA 11437 Triangle Fun
    UVA 11488 Hyper Prefix Sets (字典树)
    HDU 2988 Dark roads(kruskal模板题)
    HDU 1385 Minimum Transport Cost
    HDU 2112 HDU Today
    HDU 1548 A strange lift(最短路&&bfs)
  • 原文地址:https://www.cnblogs.com/PrintY/p/14044598.html
Copyright © 2011-2022 走看看