zoukankan      html  css  js  c++  java
  • .NET Core TPL 数据流 BatchBlock管道封装

    场景:例如需要处理数据库大量的数据,先查询出来,然后操作数据,然后1000条合成1批次,然后再插入到另外一张表。

    作用:数据流批处理,多核操作增加效率,批处理改变单个插入为批量插入增加效率

    定义数据Model 

     1     public class Employee
     2     {
     3         public int EmployeeID { get; set; }
     4         public string LastName { get; set; }
     5         public string FirstName { get; set; }
     6 
     7         // A random number generator that helps tp generate
     8         // Employee property values.
     9         static Random rand = new Random(42);
    10 
    11         // Possible random first names.
    12         static readonly string[] firstNames = { "Tom", "Mike", "Ruth", "Bob", "John" };
    13         // Possible random last names.
    14         static readonly string[] lastNames = { "Jones", "Smith", "Johnson", "Walker" };
    15 
    16         // Creates an Employee object that contains random
    17         // property values.
    18         public static Employee Random()
    19         {
    20             return new Employee
    21             {
    22                 EmployeeID = -1,
    23                 LastName = lastNames[rand.Next() % lastNames.Length],
    24                 FirstName = firstNames[rand.Next() % firstNames.Length]
    25             };
    26         }
    27     }
    View Code

    定义数据库操作(这里简化)

     1     public class EmployeeRepository
     2     {
     3         /// <summary>
     4         /// 执行次数
     5         /// </summary>
     6         private static int Count;
     7         public static void InsertEmployees(Employee[] employees)
     8         {
     9             Console.WriteLine($"线程Id:{Thread.CurrentThread.ManagedThreadId} 第{Interlocked.Increment(ref Count)}次  总数:{ employees.Count()},数据:{JsonConvert.SerializeObject(employees.Select(p=>p.EmployeeID))}开始执行批量插入");
    10             //todo: db op
    11             return;
    12         }
    13     }
    View Code

    数据流批处理封装类

     1     public class BatchBlockPipeline<T>
     2     {
     3         /// <summary>
     4         /// 批处理块
     5         /// </summary>
     6         private BatchBlock<T> _batchBlock;
     7         /// <summary>
     8         /// 批处理执行块
     9         /// </summary>
    10         private ActionBlock<T[]> _actionBlock;
    11         /// <summary>
    12         /// 是否为定时触发
    13         /// </summary>
    14         private bool _timeTrigger;
    15         /// <summary>
    16         /// 定时触发时候用到的连接块
    17         /// </summary>
    18         private TransformBlock<T, T> _transformBlock;
    19         /// <summary>
    20         /// 定时触发器
    21         /// </summary>
    22         private readonly Timer _timer;
    23 
    24         /// <summary>
    25         /// 基本构造函数
    26         /// </summary>
    27         /// <param name="batchSize">每次处理的数据量</param>
    28         /// <param name="action">执行委托方法</param>
    29         /// <param name="boundedCapacity">最大处理的数据量 默认 int.MaxValue 2147483647</param>
    30         /// <param name="maxDegreeOfParallelism">最大并行量 默认1</param>
    31         /// <param name="timeTrigger">定时触发批处理 默认不处理, 设置大于0则处理,秒级别</param>
    32         public BatchBlockPipeline(int batchSize, Action<T[]> action, int boundedCapacity = int.MaxValue, int maxDegreeOfParallelism = 1, int timeTrigger = 0)
    33         {
    34             _batchBlock = new BatchBlock<T>(batchSize, new GroupingDataflowBlockOptions() { BoundedCapacity = boundedCapacity });
    35             _actionBlock = new ActionBlock<T[]>(data => action(data), new ExecutionDataflowBlockOptions { MaxDegreeOfParallelism = maxDegreeOfParallelism });
    36             _batchBlock.LinkTo(_actionBlock, new DataflowLinkOptions() { PropagateCompletion = true });
    37             _batchBlock.Completion.ContinueWith(delegate { _actionBlock.Complete(); });
    38             if (timeTrigger > 0)
    39             {
    40                 _timeTrigger = true;
    41                 _transformBlock = new TransformBlock<T, T>(model =>
    42                 {
    43                     _timer.Change(TimeSpan.FromSeconds(timeTrigger), Timeout.InfiniteTimeSpan);
    44                     return model;
    45                 }, new ExecutionDataflowBlockOptions() { BoundedCapacity = boundedCapacity });
    46                 _transformBlock.LinkTo(_batchBlock, new DataflowLinkOptions() { PropagateCompletion = true });
    47             }
    48         }
    49 
    50         /// <summary>
    51         /// post 数据
    52         /// </summary>
    53         /// <param name="model"></param>
    54         /// <returns></returns>
    55         public bool PostValue(T model)
    56         {
    57             if (!_timeTrigger)
    58             {
    59                 return _batchBlock.Post(model);
    60             }
    61             return _transformBlock.Post(model);
    62         }
    63 
    64         /// <summary>
    65         /// 主动触发数据处理,例如:当数据剩余未达到batchsize 主动触发处理数据
    66         /// </summary>
    67         /// <param name="model"></param>
    68         /// <returns></returns>
    69         public void TriggerBatch()
    70         {
    71             _batchBlock.TriggerBatch();
    72         }
    73 
    74         /// <summary>
    75         /// 返回当前执行总数
    76         /// </summary>
    77         /// <returns></returns>
    78         public int GetBatchSum()
    79         {
    80             return _batchBlock.Receive().Count();
    81         }
    82 
    83         /// <summary>
    84         /// 主动关闭
    85         /// </summary>
    86         /// <returns></returns>
    87         public void Close()
    88         {
    89             if (!_timeTrigger)
    90             {
    91                 _batchBlock.Complete();
    92             }
    93             _transformBlock.Complete();
    94         }
    95     }
    View Code

    测试方法

     1     class Program
     2     {
     3         static void Main(string[] args)
     4         {
     5 
     6             var batchDataPipeline = new BatchBlockPipeline<Employee>(10, EmployeeRepository.InsertEmployees);
     7 
     8             for (int i = 0; i < 100; i++)
     9             {
    10                 batchDataPipeline.PostValue(Employee.Random(i));
    11             }
    12 
    13             Console.ReadKey();
    14         }
    15     }
    View Code

    1.测试:100条数据,10个一批次插入数据库,并行数量1

    //var batchDataPipeline = new BatchBlockPipeline<Employee>(10, EmployeeRepository.InsertEmployees);

    2.测试:100条数据,10个一批次插入数据库,并行数量1,最大限制处理20个

    // var batchDataPipeline = new BatchBlockPipeline<Employee>(10, EmployeeRepository.InsertEmployees,20);

     3.测试 100条数据,10个一批次插入数据库,多核多线程并行数量10

    //var batchDataPipeline = new BatchBlockPipeline<Employee>(10, EmployeeRepository.InsertEmployees, maxDegreeOfParallelism: 10);

     4.测试 100条数据,10个一批次插入数据库,定时触发3秒触发一次 (模拟不定期post数据,或者最后剩余的数据达不到batchsize的情况)

    //var batchDataPipeline = new BatchBlockPipeline<Employee>(10, EmployeeRepository.InsertEmployees, timeTrigger: 3);

    //for (int i = 0; i < 100; i++)
    //{
    // batchDataPipeline.PostValue(Employee.Random(i));
    // if (i % 3 == 0|| i % 7 == 0 || i % 9 == 0)
    // {
    // Thread.Sleep(5000);
    // }
    //}

    官方TPL数据流之BatchBlock例子链接     DemoGitHub地址

  • 相关阅读:
    [HAOI2015] 数组游戏
    [HAOI2015] 数字串拆分
    [HAOI2015] 按位或
    [HAOI2009] 毛毛虫
    [HAOI2009] 巧克力
    [HAOI2011] Problem C
    [HAOI2011] 防线修建
    [HAOI2011] Problem A
    [HAOI2010] 最长公共子序列
    [HAOI2010] 工厂选址
  • 原文地址:https://www.cnblogs.com/TeemoHQ/p/13225841.html
Copyright © 2011-2022 走看看