前些天阅读《各种图像处理类库的比较及选择(The Comparison of Image Processing Libraries)》,对后面的比较结果感觉怪异。对计算密集型运算,C#和C/C++的性能应该差别不大才是。为了探讨问题,做了以下实验。
本实验比较了五种方式进行图像灰度化计算:
(1)EmguCV实现,见 《各种图像处理类库的比较及选择(The Comparison of Image Processing Libraries)》 文中代码
(2)OpenCV/PInvoke实现,见 《各种图像处理类库的比较及选择(The Comparison of Image Processing Libraries)》 文中代码
(3)BitmapData实现,见 《各种图像处理类库的比较及选择(The Comparison of Image Processing Libraries)》 文中代码
(4)Array实现(ArgbImage8),核心代码如下:
(每一个)ImageChannel8 内含1个Byte数组Data。GrayscaleImage8 继承自 ImageChannel8 。
public class ArgbImage8 : ImageChannelSet8 { public ImageChannel8 A { get { return this.Channels[0]; } } public ImageChannel8 R { get { return this.Channels[0]; } } public ImageChannel8 G { get { return this.Channels[0]; } } public ImageChannel8 B { get { return this.Channels[0]; } } public ArgbImage8(int width, int height) : base(4, width, height) { } public GrayscaleImage8 ToGrayscaleImage() { return ToGrayscaleImage(0.299, 0.587, 0.114); } public GrayscaleImage8 ToGrayscaleImage(double rCoeff, double gCoeff, double bCoeff) { GrayscaleImage8 img = new GrayscaleImage8(this.Width, this.Height); Byte[] r = R.Data; Byte[] g = G.Data; Byte[] b = B.Data; Byte[] dst = img.Data; for (int i = 0; i < r.Length; i++) { dst[i] = (Byte)(r[i] * rCoeff + g[i] * gCoeff + b[i] * bCoeff); } return img; } //性能低下,先这样写了 public static ArgbImage8 CreateFromBitmap(Bitmap map) { if (map == null) throw new ArgumentNullException("map"); ArgbImage8 img = new ArgbImage8(map.Width, map.Height); Byte[] a = img.A.Data; Byte[] r = img.R.Data; Byte[] g = img.G.Data; Byte[] b = img.B.Data; for (int row = 0; row < img.Height; row++) { for (int col = 0; col < img.Width; col++) { int index = row * img.Width + col; Color c = map.GetPixel(col, row); a[index] = c.A; r[index] = c.R; r[index] = c.R; r[index] = c.R; } } return img; } }
(5)C# 指针/unsafe 实现(ArgbImage32 ),核心代码如下:
public class UnmanagedMemory<T> : IDisposable where T : struct { public Int32 ByteCount { get; private set; } public Int32 Length { get; private set; } public IntPtr Start { get; private set; } public Int32 SizeOfType { get; private set; } public UnmanagedMemory(Int32 length) { Length = length; SizeOfType = SizeOfT(); ByteCount = SizeOfType * length; Start = Marshal.AllocHGlobal(ByteCount); } public void Dispose() { Dispose(true); GC.SuppressFinalize(this); } protected virtual void Dispose(bool disposing) { if (false == disposed) { disposed = true; Marshal.FreeHGlobal(Start); } } private bool disposed; ~UnmanagedMemory() { Dispose(false); } private Int32 SizeOfT() { return Marshal.SizeOf(typeof(T)); } }
public struct Argb32 { public Byte Alpha; public Byte Red; public Byte Green; public Byte Blue; } public class Argb32Image : UnmanagedMemory<Argb32> { private unsafe Argb32* m_pointer; public unsafe Argb32* Pointer { get { return m_pointer; } } public unsafe Argb32Image(int length) : base(length) { m_pointer = (Argb32*)this.Start; } public unsafe Argb32 this[int index] { get { return *(m_pointer + index); } set { *(m_pointer + index) = value; } } public Grayscale8Image ToGrayscaleImage() { return ToGrayscaleImage(0.299, 0.587, 0.114); } public unsafe Grayscale8Image ToGrayscaleImage(double rCoeff, double gCoeff, double bCoeff) { Grayscale8Image img = new Grayscale8Image(this.Length); Argb32* p = Pointer; Byte* to = img.Pointer; Argb32* end = p + Length; while (p != end) { *to = (Byte)(p->Red * rCoeff + p->Green * gCoeff + p->Blue * bCoeff); p++; to++; } return img; } public unsafe static Argb32Image CreateFromBitmap(Bitmap map) { if (map == null) throw new ArgumentNullException("map"); Argb32Image img = new Argb32Image(map.Width*map.Height); Argb32* p = img.Pointer; for (int row = 0; row < map.Height; row++) { for (int col = 0; col < map.Width; col++) { Color c = map.GetPixel(col, row); p->Alpha = c.A; p->Red = c.R; p->Green = c.G; p->Blue = c.B; p++; } } return img; } }
机器配置:
在每个方法测试前,均运行一段DoSomething()清空高速缓存:
private static int[] DoSomething()
{
int[] data = new Int32[20000000];
for (int i = 0; i < data.Length; i++)
{
data[i] = i;
}
return data;
}
测试结果(每个执行5次,计算耗时总和。单位ms):
图像1——
BitmapData:53
ArgbImage8:80
ArgbImage32:38
EmguCV:68
OpenCV:69
图像2——
BitmapData:25
ArgbImage8:45
ArgbImage32:19
EmguCV:42
OpenCV:45
图像3——
BitmapData:8
ArgbImage8:25
ArgbImage32:6
EmguCV:23
OpenCV:24
图像4——
BitmapData:48
ArgbImage8:76
ArgbImage32:39
EmguCV:67
OpenCV:69
图像5(大图:5000×6000)——
BitmapData:1584
ArgbImage8:1991
ArgbImage32:1229
EmguCV:1545
OpenCV:2817
下面删去ArgbImage8,仅比较剩下的4种(每个执行5次,计算耗时总和。单位ms):
图像6——
BitmapData:17
ArgbImage32:10
EmguCV:25
OpenCV:25
图像7——
BitmapData:88
ArgbImage32:56
EmguCV:69
OpenCV:70
图像8——
BitmapData:41
ArgbImage32:25
EmguCV:40
OpenCV:43
图像5(大图:5000×6000)——
BitmapData:2855
ArgbImage32:1849
EmguCV:1578
OpenCV:2522
下面,把执行顺序颠倒一下,让EmguCV和OpenCV在前面。剩下的2个在后面:
图像8——
EmguCV:41
OpenCV:42
BitmapData:38
ArgbImage32:26
图像9——
EmguCV:32
OpenCV:34
BitmapData:28
ArgbImage32:18
好了,不做试验了。根据上面结果,再考虑到纯C/C++程序比P/Invoke程序性能高一些,可得出这样的结论(在我的机器上):
(1)C#不直接用指针比P/Invoke 的 C/C++程序低效一些。
(2)C#直接用指针,可以写出非常高效的程序,至少比P/Invoke高效。从上面的代码可看出,C#下指针用很舒服,并且编译快。猜想:C#下玩指针+Struct,和C没啥区别。图像处理这样的基本类型简单的程序,非常适合用C#编写。大量用指针,大量用非托管内存,可以最大化性能,最小化内存占用,最小化GC对程序的影响,达到和C/C++所差无几的性能。
下面尝试直接使用硬件。对图像处理加速最有效果的是GPU,好吧,下面就尝试调用GPU的功能。
如何在无界面的情况下调用GPU呢?
下面是我写的一个测试程序(需要引用XNA):
using System; using System.Collections.Generic; using System.Linq; using Microsoft.Xna.Framework; using Microsoft.Xna.Framework.Audio; using Microsoft.Xna.Framework.Content; using Microsoft.Xna.Framework.GamerServices; using Microsoft.Xna.Framework.Graphics; using Microsoft.Xna.Framework.Input; using Microsoft.Xna.Framework.Media; using Microsoft.Xna.Framework.Net; using Microsoft.Xna.Framework.Storage; namespace Orc.SmartImage.Xna { public class Shader { private class GameHelper : Game { public void Init() { this.Initialize(); GraphicsDeviceManager m = new GraphicsDeviceManager(this); m.ApplyChanges(); } } private GameHelper m_helper; public GraphicsDevice GraphicsDevice { get; set; } public Shader(IntPtr hwnd) { m_helper = new GameHelper(); m_helper.Init(); this.GraphicsDevice = m_helper.GraphicsDevice; } public void Test() { RenderTarget2D tar = new RenderTarget2D(this.GraphicsDevice, 100, 100, 1, SurfaceFormat.Color); this.GraphicsDevice.SetRenderTarget(0, tar); this.GraphicsDevice.Clear(Color.Yellow); this.GraphicsDevice.SetRenderTarget(0, null); Texture2D txt = tar.GetTexture(); uint[] data = new uint[10000]; txt.GetData(data); return; } } }
进一步就是写HLSL了。
============================
离C/C++又远了一步。
附:具体测试代码
(注:那个Shader是我测试GPU计算能否通过的部分。IntPtr hwnd是因为GraphicsDevice构造函数中有这样一个参数,不过后来,我绕了过去,但测试程序这里我没删掉,还留在这里。)
using System; using System.Collections.Generic; using System.Runtime.InteropServices; using System.Diagnostics; using System.Linq; using System.Text; using System.Drawing; using System.Drawing.Imaging; using Orc.SmartImage; using Emgu.CV; using Emgu.CV.Structure; using Emgu.CV.CvEnum; using Orc.SmartImage.Gpu; using Orc.SmartImage.UnmanagedObjects; namespace Orc.SmartImage.PerformanceTest { public class PerformanceTestCase0 { public static String Test(IntPtr hwnd, Bitmap src, int count) { Shader sd = new Shader(hwnd); // ArgbImage8 img8 = ArgbImage8.CreateFromBitmap(src); Argb32Image img32 = Argb32Image.CreateFromBitmap(src); StringBuilder sb = new StringBuilder(); Stopwatch sw = new Stopwatch(); DoSomething(); sw.Reset(); sw.Start(); for (int i = 0; i < count; i++) ProcessImageWithEmgucv(src); sw.Stop(); sb.AppendLine("EmguCV:" + sw.ElapsedMilliseconds.ToString()); DoSomething(); sw.Reset(); sw.Start(); for (int i = 0; i < count; i++) ProcessImageWithOpencv(src); sw.Stop(); sb.AppendLine("OpenCV:" + sw.ElapsedMilliseconds.ToString()); DoSomething(); sw.Reset(); sw.Start(); for (int i = 0; i < count; i++) Grayscale(src); sw.Stop(); sb.AppendLine("BitmapData:" + sw.ElapsedMilliseconds.ToString()); //DoSomething(); //sw.Reset(); //sw.Start(); //for (int i = 0; i < count; i++) // img8.ToGrayscaleImage(); //sw.Stop(); //sb.AppendLine("ArgbImage8:" + sw.ElapsedMilliseconds.ToString()); DoSomething(); sw.Reset(); sw.Start(); for (int i = 0; i < count; i++) img32.ToGrayscaleImage(); sw.Stop(); sb.AppendLine("ArgbImage32:" + sw.ElapsedMilliseconds.ToString()); //sw.Reset(); //sw.Start(); //for (int i = 0; i < count; i++) // img8.ToGrayscaleImage(); //sw.Stop(); //sb.AppendLine("ArgbImage8:" + sw.ElapsedMilliseconds.ToString()); return sb.ToString(); } private static int[] DoSomething() { int[] data = new Int32[20000000]; for (int i = 0; i < data.Length; i++) { data[i] = i; } return data; } private static GrayscaleImage TestMyConvert(ArgbImage img) { return img.ToGrayscaleImage(); } /// <summary> /// 使用EmguCv处理图像 /// </summary> private static void ProcessImageWithEmgucv(Bitmap bitmapSource) { //灰度 Image<Bgr, Byte> imageSource = new Image<Bgr, byte>(bitmapSource); Image<Gray, Byte> imageGrayscale = imageSource.Convert<Gray, Byte>(); } /// <summary> /// 使用Open Cv P/Invoke处理图像 /// </summary> unsafe private static void ProcessImageWithOpencv(Bitmap bitmapSource) { Image<Bgr, Byte> imageSource = new Image<Bgr, byte>(bitmapSource); IntPtr ptrSource = Marshal.AllocHGlobal(Marshal.SizeOf(typeof(MIplImage))); Marshal.StructureToPtr(imageSource.MIplImage, ptrSource, true); IntPtr ptrGrayscale = CvInvoke.cvCreateImage(imageSource.Size, IPL_DEPTH.IPL_DEPTH_8U, 1); CvInvoke.cvCvtColor(ptrSource, ptrGrayscale, COLOR_CONVERSION.CV_BGR2GRAY); } /// <summary> /// 将指定图像转换成灰度图 /// </summary> /// <param name="bitmapSource">源图像支持3通道或者4通道图像,支持Format24bppRgb、Format32bppRgb和Format32bppArgb这3种像素格式</param> /// <returns>返回灰度图,如果转化失败,返回null。</returns> private static Bitmap Grayscale(Bitmap bitmapSource) { Bitmap bitmapGrayscale = null; if (bitmapSource != null && (bitmapSource.PixelFormat == PixelFormat.Format24bppRgb || bitmapSource.PixelFormat == PixelFormat.Format32bppArgb || bitmapSource.PixelFormat == PixelFormat.Format32bppRgb)) { int width = bitmapSource.Width; int height = bitmapSource.Height; Rectangle rect = new Rectangle(0, 0, width, height); bitmapGrayscale = new Bitmap(width, height, PixelFormat.Format8bppIndexed); //设置调色板 ColorPalette palette = bitmapGrayscale.Palette; for (int i = 0; i < palette.Entries.Length; i++) palette.Entries[i] = Color.FromArgb(255, i, i, i); bitmapGrayscale.Palette = palette; BitmapData dataSource = bitmapSource.LockBits(rect, ImageLockMode.ReadOnly, bitmapSource.PixelFormat); BitmapData dataGrayscale = bitmapGrayscale.LockBits(rect, ImageLockMode.WriteOnly, PixelFormat.Format8bppIndexed); byte b, g, r; int strideSource = dataSource.Stride; int strideGrayscale = dataGrayscale.Stride; unsafe { byte* ptrSource = (byte*)dataSource.Scan0.ToPointer(); byte* ptr1; byte* ptrGrayscale = (byte*)dataGrayscale.Scan0.ToPointer(); byte* ptr2; if (bitmapSource.PixelFormat == PixelFormat.Format24bppRgb) { for (int row = 0; row < height; row++) { ptr1 = ptrSource + strideSource * row; ptr2 = ptrGrayscale + strideGrayscale * row; for (int col = 0; col < width; col++) { b = *ptr1; ptr1++; g = *ptr1; ptr1++; r = *ptr1; ptr1++; *ptr2 = (byte)(0.114 * b + 0.587 * g + 0.299 * r); ptr2++; } } } else //bitmapSource.PixelFormat == PixelFormat.Format32bppArgb || bitmapSource.PixelFormat == PixelFormat.Format32bppRgb { for (int row = 0; row < height; row++) { ptr1 = ptrSource + strideGrayscale * row; ptr2 = ptrGrayscale + strideGrayscale * row; for (int col = 0; col < width; col++) { b = *ptr1; ptr1++; g = *ptr1; ptr1++; r = *ptr1; ptr1 += 2; *ptr2 = (byte)(0.114 * b + 0.587 * g + 0.299 * r); ptr2++; } } } } bitmapGrayscale.UnlockBits(dataGrayscale); bitmapSource.UnlockBits(dataSource); } return bitmapGrayscale; } } }