zoukankan      html  css  js  c++  java
  • BatchFileProcessing(2)实现之解析文件

         前一篇写到BatchFileProcessing的流程设计,这篇总结下解析文件的实现。

         目前我的产品支持用户上传excel,csv,tab delimited等三种文件格式,鉴于office com组件性能太差,所以我们购买了aspose cells组件。下面我们以批量更新商品库存为例,阐述一下解析文件的实现。

         当我们从数据库读取到尚未处理的用户上传文件后,我们会将它下载到服务器磁盘上,然后开始解析。

      文件级别的检查下包括以下几项:

      (1)文件是否可以正常打开,如果不能正常打开,记录异常并通知客户。

      (2)文件中的关键列是否有缺失,如果有缺失,记录异常并通知客户。

      (3)文件中的列是否有重复,如果有重复,记录异常并通知客户。

      (4)文件中是否有未在模版中未定义的列,如果有未在模版中未定义的列,记录异常并通知客户。

         批量更新商品库存的业务检查下包括以下几项:

    (1)商品编号是否提供,如果没有提供,记录异常到数据库。

    (2)商品库存是否提供,如果没有提供,使用默认值零;如果提供了,验证是否在有效的范围内,比如0~999999,如果不合法,记录异常到数据库。

    用 Aspose Cells组件读取数据:

      1 using System;
      2 using System.Collections.Generic;
      3 using System.Linq;
      4 using System.Text;
      5 using Aspose.Cells;
      6 
      7 namespace BatchFile.Jobs.FileParsers
      8 {
      9     public class AsposeCellParser : IExcelParser
     10     {
     11         private string m_fileName;
     12         private string m_fileExt;
     13         private FileFormatType m_fileFormatType;
     14         private Workbook m_workbook;
     15         private Worksheet m_worksheet;
     16 
     17         public AsposeCellParser(string fileName, string fileExt)
     18         {
     19             m_fileName = fileName;
     20             m_fileExt = fileExt;
     21             if (FileExtType.IsCSV(m_fileExt))
     22             {
     23                 m_fileFormatType = FileFormatType.CSV;
     24             }
     25             else if (FileExtType.IsExcel2007(m_fileExt))
     26             {
     27                 m_fileFormatType = FileFormatType.Excel2007Xlsx;
     28             }
     29             else if (FileExtType.IsExcel2003(m_fileExt))
     30             {
     31                 m_fileFormatType = FileFormatType.Excel2003;
     32             }
     33             else if (FileExtType.IsTabDelimited(m_fileExt))
     34             {
     35                 m_fileFormatType = FileFormatType.TabDelimited;
     36             }
     37             else
     38             {
     39                 throw new NotSupportedException(string.Format("File extension({0}) cannot be supported.", fileExt));
     40             }
     41         }
     42 
     43         #region IExcelParser
     44 
     45         public string ExcelFile
     46         {
     47             get { return m_fileName; }
     48         }
     49 
     50         public int RowCount
     51         {
     52             get { return m_worksheet.Cells.MaxDataRow; }
     53         }
     54 
     55         public int ColumnCount
     56         {
     57             get { return m_worksheet.Cells.MaxDataColumn; }
     58         }
     59 
     60         public object GetCellValue(int row, int col)
     61         {
     62             return m_worksheet.Cells[row, col].Value;
     63         }
     64 
     65         public void ActiveWorksheet(int activeWorksheetIndex)
     66         {
     67             if (activeWorksheetIndex < 0 || activeWorksheetIndex >= m_workbook.Worksheets.Count)
     68             {
     69                 throw new ArgumentOutOfRangeException("Worksheet index is out of range.");
     70             }
     71             m_worksheet = m_workbook.Worksheets[activeWorksheetIndex];
     72         }
     73 
     74         public void ActiveWorksheet(string activeWorksheetName)
     75         {
     76             if (string.IsNullOrWhiteSpace(activeWorksheetName))
     77             {
     78                 throw new ArgumentException("Worksheet name cannot be null or empty.");
     79             }
     80             m_worksheet = m_workbook.Worksheets[activeWorksheetName];
     81         }
     82 
     83         public void OpenFile()
     84         {
     85             m_workbook = new Workbook();
     86             m_workbook.ConvertNumericData = false;
     87             m_workbook.Open(m_fileName, m_fileFormatType);
     88             m_worksheet = m_workbook.Worksheets[0];
     89         }
     90 
     91         public void OpenFile(int activeWorksheetIndex)
     92         {
     93             OpenFile();
     94             ActiveWorksheet(activeWorksheetIndex);
     95         }
     96 
     97         public void OpenFile(string activeWorksheetName)
     98         {
     99             OpenFile();
    100             ActiveWorksheet(activeWorksheetName);
    101         }
    102 
    103         #endregion
    104     }
    105 }
     
    文件检查与解析
      1  public void Process()
      2         {
      3             m_parser = new AsposeCellParser(m_batchInventoryFile.FileName, m_fileExt);
      4 
      5             //open file
      6             try
      7             {
      8                 m_parser.OpenFile();
      9             }
     10             catch
     11             {
     12                 EmailUtil.Send(m_batchInventoryFile.UserEmail, Consts.Batch_Inventory_File_Can_Not_Open, "File you uploaded cannot be open.");
     13                 return;
     14             }
     15 
     16             //resolve headers
     17             var dictMapping = ConfigManager.BatchFileConfig.BatchInventory.InventoryPropertyMappingDict;
     18             List<ExcelHeaderModel> headerList = new List<ExcelHeaderModel>();
     19             List<string> headerNameList = new List<string>();
     20             int rowIndex = 0;
     21             int colIndex = 0;
     22             for (; colIndex < m_parser.ColumnCount; colIndex++)
     23             {
     24                 string headerName = m_parser.GetCellValue(rowIndex, colIndex).ToString().Trim();
     25                 ExcelHeaderModel header = new ExcelHeaderModel();
     26                 header.ColumnIndex = colIndex;
     27                 header.HeaderName = headerName;
     28                 header.IsDuplicated = headerNameList.Contains(headerName);
     29                 
     30                 headerList.Add(header);
     31                 headerNameList.Add(headerName);
     32             }
     33 
     34             //check missing key column or not
     35             List<string> lostKeyList = new List<string>();
     36             foreach (var key in dictMapping.Keys)
     37             {
     38                 var mapping = dictMapping[key];
     39                 if (mapping.IsKey
     40                     && !headerNameList.Contains(mapping.HeaderName))
     41                 {
     42                     lostKeyList.Add(mapping.HeaderName);
     43                 }
     44             }
     45             if (lostKeyList.Count > 0)
     46             {
     47                 string keys = lostKeyList[0];
     48                 for (int i = 1; i < lostKeyList.Count; i++)
     49                 {
     50                     keys += string.Format(",{0}", lostKeyList[i]);
     51                 }
     52 
     53                 EmailUtil.Send(m_batchInventoryFile.UserEmail,
     54                     Consts.Batch_Inventory_File_Missing_Key,
     55                     string.Format("File you uploaded is missing key(s) {0}.", keys));
     56                 return;
     57             }
     58 
     59             //check exist duplicate columns or not
     60             var duplicateColumnList = headerList.FindAll(h => h.IsDuplicated);
     61             if (duplicateColumnList.Count > 0)
     62             {
     63                 string duplicateColumns = duplicateColumnList[0].HeaderName;
     64                 for (int i = 1; i < duplicateColumnList.Count; i++)
     65                 {
     66                     duplicateColumns += string.Format(",{0}", duplicateColumnList[i].HeaderName);
     67                 }
     68                 EmailUtil.Send(m_batchInventoryFile.UserEmail,
     69                    Consts.Batch_Inventory_File_Having_Duplicated_Column,
     70                    string.Format("File you uploaded has duplicated column(s) {0}.", duplicateColumns));
     71                 return;
     72             }
     73 
     74             //check exist undefined columns or not
     75             var undefinedColumnList = headerList.FindAll(h => !h.IsTemplateDefined);
     76             if (undefinedColumnList.Count > 0)
     77             {
     78                 string undefinedColumns = undefinedColumnList[0].HeaderName;
     79                 for (int i = 1; i < undefinedColumnList.Count; i++)
     80                 {
     81                     undefinedColumns += string.Format(",{0}", undefinedColumnList[i].HeaderName);
     82                 }
     83                 EmailUtil.Send(m_batchInventoryFile.UserEmail,
     84                    Consts.Batch_Inventory_File_Having_Undefined_Column,
     85                    string.Format("File you uploaded has undefined column(s) {0}.", undefinedColumns));
     86                 return;
     87             }
     88 
     89             //mapping header according to configuration
     90             foreach (var header in headerList)
     91             {
     92                 if (dictMapping.Keys.Contains(header.HeaderName))
     93                 {
     94                     header.IsTemplateDefined = true;
     95                     var mapping = dictMapping[header.HeaderName];
     96                     header.IsKey = mapping.IsKey;
     97                     header.IsTransactional = mapping.IsTransactional;
     98                     header.PropertyName = mapping.PropertyName;
     99                     header.DataType = mapping.DataType;
    100                     header.DefaultValue = mapping.DefaultValue;
    101                     header.ColumnName = mapping.ColumnName;
    102                     header.IsSSBNode = mapping.IsSSBNode;
    103                 }
    104             }
    105 
    106             //check and extract business data
    107             if (!Directory.Exists(ConfigManager.BatchFileConfig.BatchInventory.File2Dir))
    108             {
    109                 Directory.CreateDirectory(ConfigManager.BatchFileConfig.BatchInventory.File2Dir);
    110             }
    111             var partitionList = Partitioner.Create(1, m_parser.RowCount + 1);
    112             Parallel.ForEach(partitionList, (p, loopState) =>
    113                 {
    114                     DataTable dt = CreateDataTable(headerList);
    115                     for (int i = p.Item1; i < p.Item2; i++)
    116                     {
    117                         DataRow dr = dt.NewRow();
    118                         XElement xNode = new XElement(ConfigManager.BatchFileConfig.BatchInventory.RootPropertyName);
    119                         List<string> rowErrorList = new List<string>();
    120                        
    121                         foreach (var header in headerList)
    122                         {
    123                             object value = null;
    124                             string strValue = string.Empty;
    125                             if (header.IsKey || header.IsTransactional || header.IsSSBNode)
    126                             {
    127                                 value = m_parser.GetCellValue(i, header.ColumnIndex);
    128                                 strValue = value.ToString().Trim();
    129                                 if (string.IsNullOrEmpty(strValue))
    130                                 {
    131                                     strValue = header.DefaultValue;
    132                                 }
    133                                 //find property's validator and perform validation
    134                                 if (header.IsTemplateDefined)
    135                                 {
    136                                     var property = dictMapping[header.HeaderName];
    137                                     if (!PropertyValidator.Validate(property.Validator, strValue))
    138                                     {
    139                                         rowErrorList.Add(string.Format(property.Validator.Tips, property.PropertyName));
    140                                     }
    141                                 }
    142                             }
    143                             if (header.IsTransactional)
    144                             {
    145                                 dr[header.ColumnName] = value;
    146                             }
    147                             if (header.IsSSBNode)
    148                             {
    149                                 xNode.Add(new XElement(header.PropertyName, new XCData(strValue)));
    150                             }
    151                         }
    152 
    153                         dr["BatchFileID"= m_batchInventoryFile.TransactionNumber;
    154                         dr["RowIndex"= i.ToString();
    155                         if (rowErrorList.Count == 0)
    156                         {
    157                             string fileName = i.ToString() + ".xml";
    158                             string filePath = Path.Combine(ConfigManager.BatchFileConfig.BatchInventory.File2Dir, fileName);
    159                             File.WriteAllText(filePath, xNode.ToString());
    160                             dr["FileName"= fileName;
    161                             dr["CheckResult"= Consts.Success;
    162                         }
    163                         else
    164                         {
    165                             dr["CheckResult"= Consts.Failed;
    166                             XElement xNodeCheckMemo = new XElement("CheckMemoList", from error in rowErrorList
    167                                                                                 select new XElement("CheckMemo", error));
    168                             dr["CheckMemo"=xNodeCheckMemo.ToString();
    169                         }
    170                         dr["HasCheck"= Consts.Yes;
    171                         dt.Rows.Add(dr);
    172 
    173                         if (dt.Rows.Count == ConfigManager.BatchFileConfig.BatchInventory.BatchSize)
    174                         {
    175                             //write to DB
    176                             SQLHelper.BulkCopy(dt);
    177                             dt.Rows.Clear();
    178                         }
    179                     }
    180 
    181                     //process the last batch
    182                     if (dt.Rows.Count > 0)
    183                     {
    184                         //write to DB
    185                         SQLHelper.BulkCopy(dt);
    186                         dt.Rows.Clear();
    187                     }
    188                 });
    189         }
    190 
    191         private DataTable CreateDataTable(List<ExcelHeaderModel> headerList)
    192         {
    193             DataTable dt = new DataTable(ConfigManager.BatchFileConfig.BatchInventory.DataTableName);
    194             foreach (var header in headerList)
    195             {
    196                 if (header.IsTransactional)
    197                 {
    198                     dt.Columns.Add(header.ColumnName, Type.GetType(header.DataType));
    199                 }
    200             }
    201             dt.Columns.Add("BatchFileID"typeof(int));
    202             dt.Columns.Add("RowIndex"typeof(int));
    203             dt.Columns.Add("FileName"typeof(string));
    204             dt.Columns.Add("HasCheck"typeof(string));
    205             dt.Columns.Add("CheckResult"typeof(string));
    206             dt.Columns.Add("CheckMemo"typeof(string));
    207 
    208             return dt;
    209         }
     
    为了提高文件解析的性能使用了多线程以及SqlBulkCopy等技术。下面是SQLHelper的定义:
     1 public static class SQLHelper
     2     {
     3         private static string m_connectionString = ConfigurationManager.ConnectionStrings["MSSQL"].ConnectionString;
     4         public static string ConnectionString
     5         {
     6             get
     7             {
     8                 return m_connectionString;
     9             }
    10         }
    11 
    12         public static void BulkCopy(DataTable dt, string destTableName, List<SqlBulkCopyColumnMapping> columnMappingList, int timeoutSeconds = 1200)
    13         {
    14             using (SqlConnection conn = new SqlConnection(m_connectionString))
    15             {
    16                 conn.Open();
    17                 SqlTransaction trans = conn.BeginTransaction(IsolationLevel.ReadCommitted, "Bulk copy batch data");
    18                 SqlBulkCopy b = new SqlBulkCopy(conn, SqlBulkCopyOptions.Default, trans);
    19                 b.BulkCopyTimeout = timeoutSeconds;
    20                 b.DestinationTableName = destTableName;
    21                 if (columnMappingList != null)
    22                 {
    23                     foreach (var colMapping in columnMappingList)
    24                     {
    25                         b.ColumnMappings.Add(colMapping);
    26                     }
    27                 }
    28                 b.WriteToServer(dt);
    29                 trans.Commit();
    30                 b.Close();
    31                 conn.Close();
    32             }
    33         }
    34 
    35         public static void BulkCopy(DataTable dt)
    36         {
    37             List<SqlBulkCopyColumnMapping> columnMappingList = new List<SqlBulkCopyColumnMapping>();
    38             foreach (DataColumn col in dt.Columns)
    39             {
    40                 columnMappingList.Add(new SqlBulkCopyColumnMapping
    41                 {
    42                     SourceColumn = col.ColumnName,
    43                     DestinationColumn = col.ColumnName
    44                 });
    45             }
    46             BulkCopy(dt, dt.TableName, columnMappingList);
    47         }
    48     }
    ——致力于微软企业解决方案、项目管理及技术培训
  • 相关阅读:
    mysql 数据迁移方案
    tomcat:run 指定端口号
    idea open gradle project
    tomcat:run命令执行端口号
    域名代理
    stackoverflow慢问题
    chrome 添加 vue tools
    深度学习——结构化机器学习项目(学习策略2)[9]
    深度学习——超参数调试[7]
    深度学习——优化算法[6]
  • 原文地址:https://www.cnblogs.com/fuhongwei041/p/2005478.html
Copyright © 2011-2022 走看看