先看这一篇文章http://yishan.cc/blogs/gpww/archive/2009/11/03/locality-and-false-sharing.aspx
在尝试运行下面代码
using System; using System.Collections.Generic; using System.Linq; using System.Text; using System.Diagnostics; using System.Threading.Tasks; namespace TestSum { class Program { static void Main(string[] args) { int n = 1 << 10; int[,] array = new int[n, n]; for (int x = 0; x < n; x++) for (int y = 0; y < n; y++) array[x, y] = 1; Test(SumA, array, n); Test(SumB, array, n); Test(SumC, array, n); Test(SumD, array, n); Console.ReadKey(); } static void Test(SumDele Sum, int[,] array, int n) { int sum = 0; Stopwatch watch = new Stopwatch(); watch.Start(); for (int i = 0; i < n; i++) sum += Sum(array, n); watch.Stop(); Console.WriteLine("{0}: {1}, {2}s", Sum.Method.Name, sum, watch.Elapsed.TotalSeconds); } delegate int SumDele(int[,] array, int n); static int SumA(int[,] array, int n) { int sum = 0; for (int x = 0; x < n; x++) for (int y = 0; y < n; y++) { sum += array[y, x]; } return sum; } static int SumB(int[,] array, int n) { int sum = 0; for (int x = 0; x < n; x++) for (int y = 0; y < n; y++) { sum += array[x, y]; } return sum; } static int SumC(int[,] array, int n) { int m = Environment.ProcessorCount; int[] result = new int[m]; int step = n / m; int leftover = n % m; Parallel.For(0, m, (i) => { int from = i * step; int to = from + step + (i < m - 1 ? 0 : leftover); for (int x = from; x < to; x++) for (int y = 0; y < n; y++) { result[i] += array[x, y]; } }); return result.Sum(); } static int SumD(int[,] array, int n) { int m = Environment.ProcessorCount; int[] result = new int[m]; int step = n / m; int leftover = n % m; Parallel.For(0, m, (i) => { int from = i * step; int to = from + step + (i < m - 1 ? 0 : leftover); int temp = 0; for (int x = from; x < to; x++) for (int y = 0; y < n; y++) { temp += array[x, y]; } result[ i ] = temp; }); return result.Sum(); } } }