zoukankan      html  css  js  c++  java
  • HLSL实现简单的图像处理功能

        由于对于dxva2解码得到的数据不宜copy回内存给CPU处理,所以最好的办法是在GPU上直接进行处理。D3D的像素着色器能够对像素直接进行操作,实现点运算极其简单方便,简单的卷积运算效果也非常好。但D3D9的限制也很多,对于过于复杂的图像处理则显得有些不能胜任。

    1.点运算

        点运算用HLSL非常容易实现,几乎是公式怎么写,代码就怎么写。以RGB转灰度图显示为例:

    texture Tex0 ;
    
    int iFlag = 0 ;
    float aValue= 0.0 ;
    float bValue= 0.0 ;
    
    sampler2D YTex = 
    sampler_state
    {
        Texture = <Tex0> ;
        MipFilter = LINEAR ;
        MinFilter = LINEAR ;
        MagFilter = LINEAR ;
    
        AddressU = CLAMP ;
        AddressV = CLAMP ;
    };
    
    struct PS_INPUT
    {
        float2 uvCoords0 : TEXCOORD0 ;
    };
    
    float4 Main( PS_INPUT input ) : COLOR0
    {
        float4 yuvColor ;
        //rgb to gray 不知道是不是这么显示的,姑且这么认为
        float gray = tex2D( YTex, input.uvCoords0 ).r * 0.299 + tex2D( YTex, input.uvCoords0 ).g * 0.587 + tex2D( YTex, input.uvCoords0 ).b * 0.114 ;
        float s = 0 ;
        if(iFlag == 0)
        {
            s = aValue * gray + bValue/255 ;
        }
        else if(iFlag == 1)
        {
            s = aValue * log(1+gray) ;
        }
        else if(iFlag == 2)
        {
            s = aValue * pow(abs(gray),bValue) ;
        }
        yuvColor.r = s ;
        yuvColor.g = s ;
        yuvColor.b = s ;
        yuvColor.a = 1.0 ;
    
        return yuvColor  ;
    }

        点运算如此简单是因为GPU是并行运算的,我个人认为可以看成是每一个像素点(BGRA)对应一个线程,这大概就是OpenCL中所谓的数据并行。这是一个非常简单的程序,指令数少,程序结构也很简单,shader 的版本用2.0就可以轻松编过。

    2.卷积运算举例

        指令数较多的情况2.0版本的shader就搞不定了,上3.0版本可以做一些简单的卷积运算。以中值滤波为例:

    texture Tex0 ;
    
    matrix WorldMatrix;
    matrix ViewMatrix;
    matrix ProjMatrix;
    
    sampler2D YTex = 
    sampler_state
    {
        Texture = <Tex0> ;
        MipFilter = LINEAR ;
        MinFilter = LINEAR ;
        MagFilter = LINEAR ;
    
        AddressU = CLAMP ;
        AddressV = CLAMP ;
    };
    
    struct VS_INPUT
      {
        float4 pos    : POSITION;
        float4 color  : COLOR0;
        float2 tex    : TEXCOORD0;
      };
    // 
    struct VS_OUTPUT
      {
        float4 pos     : POSITION;
        float4 color   : COLOR0;
        float2 tex     : TEXCOORD0;
      };
    
    float2 g_v4ScreenSize;
    int ksize = 1 ;
    
    float fLeft = -1.0f ;
    float fTop = -1.0f ;
    float fRight = -1.0f ;
    float fBottom = -1.0f ;
    //--------------------------------- BurTechnique --------------------------------------
    
    
    VS_OUTPUT MainVS_Screen( VS_INPUT In )
    {
        VS_OUTPUT Out = ( VS_OUTPUT )0;
    
        float4x4 matWorldView = mul(WorldMatrix,ViewMatrix);
        float4x4 matProject = mul(matWorldView,ProjMatrix);
        Out.pos  = mul(In.pos,matProject);
        Out.tex  = In.tex;
        Out.color = In.color;
        return Out;
    }
      
    float4 MainPS_Screen( VS_INPUT In ) : COLOR0
    {
        float4 outColor = tex2D( YTex, In.tex ).rgba ;
        
        if(ksize <= 1 || ksize%2 == 0)
        {
            return outColor ;
        }
        
        if( ksize > 11 || ksize < 3)
        {
            return outColor ;
        }
        
        if(!(In.tex.x < fRight && In.tex.y < fBottom && In.tex.x > fLeft && In.tex.y > fTop))
        {
            return outColor ;
        }
        
        // 纹理大小
        float2 TexSize = float2( g_v4ScreenSize.x , g_v4ScreenSize.y );
    
        float x_off = 1.0f / TexSize.x;
        float y_off = 1.0f / TexSize.y;
        
        float2 fX0Y0 = In.tex - float2(x_off * ksize/2, y_off*ksize/2) ;
    
        float3 sum = {0.0f, 0.0f, 0.0f} ;
    
        if(ksize >= 3)
        {
            sum += tex2D( YTex , fX0Y0 + float2(x_off * 0, y_off*0)).rgb;
            sum += tex2D( YTex , fX0Y0 + float2(x_off * 0, y_off*1)).rgb;
            sum += tex2D( YTex , fX0Y0 + float2(x_off * 0, y_off*2)).rgb;
            sum += tex2D( YTex , fX0Y0 + float2(x_off * 1, y_off*0)).rgb;
            sum += tex2D( YTex , fX0Y0 + float2(x_off * 1, y_off*1)).rgb;
            sum += tex2D( YTex , fX0Y0 + float2(x_off * 1, y_off*2)).rgb;
            sum += tex2D( YTex , fX0Y0 + float2(x_off * 2, y_off*0)).rgb;
            sum += tex2D( YTex , fX0Y0 + float2(x_off * 2, y_off*1)).rgb;
            sum += tex2D( YTex , fX0Y0 + float2(x_off * 2, y_off*2)).rgb;
        }
        
        if(ksize >= 5)
        {
            sum += tex2D( YTex , fX0Y0 + float2(x_off * 3, y_off*0)).rgb;
            sum += tex2D( YTex , fX0Y0 + float2(x_off * 3, y_off*1)).rgb;
            sum += tex2D( YTex , fX0Y0 + float2(x_off * 3, y_off*2)).rgb;
            sum += tex2D( YTex , fX0Y0 + float2(x_off * 3, y_off*3)).rgb;
            sum += tex2D( YTex , fX0Y0 + float2(x_off * 3, y_off*4)).rgb;
            sum += tex2D( YTex , fX0Y0 + float2(x_off * 4, y_off*0)).rgb;
            sum += tex2D( YTex , fX0Y0 + float2(x_off * 4, y_off*1)).rgb;
            sum += tex2D( YTex , fX0Y0 + float2(x_off * 4, y_off*2)).rgb;
            sum += tex2D( YTex , fX0Y0 + float2(x_off * 4, y_off*3)).rgb;
            sum += tex2D( YTex , fX0Y0 + float2(x_off * 4, y_off*4)).rgb;
            sum += tex2D( YTex , fX0Y0 + float2(x_off * 0, y_off*3)).rgb;
            sum += tex2D( YTex , fX0Y0 + float2(x_off * 1, y_off*3)).rgb;
            sum += tex2D( YTex , fX0Y0 + float2(x_off * 2, y_off*3)).rgb;
            sum += tex2D( YTex , fX0Y0 + float2(x_off * 0, y_off*4)).rgb;
            sum += tex2D( YTex , fX0Y0 + float2(x_off * 1, y_off*4)).rgb;
            sum += tex2D( YTex , fX0Y0 + float2(x_off * 2, y_off*4)).rgb;
        }
        
        if(ksize >= 7)
        {
            sum += tex2D( YTex , fX0Y0 + float2(x_off * 5, y_off*0)).rgb;
            sum += tex2D( YTex , fX0Y0 + float2(x_off * 5, y_off*1)).rgb;
            sum += tex2D( YTex , fX0Y0 + float2(x_off * 5, y_off*2)).rgb;
            sum += tex2D( YTex , fX0Y0 + float2(x_off * 5, y_off*3)).rgb;
            sum += tex2D( YTex , fX0Y0 + float2(x_off * 5, y_off*4)).rgb;
            sum += tex2D( YTex , fX0Y0 + float2(x_off * 5, y_off*5)).rgb;
            sum += tex2D( YTex , fX0Y0 + float2(x_off * 5, y_off*6)).rgb;
            sum += tex2D( YTex , fX0Y0 + float2(x_off * 6, y_off*0)).rgb;
            sum += tex2D( YTex , fX0Y0 + float2(x_off * 6, y_off*1)).rgb;
            sum += tex2D( YTex , fX0Y0 + float2(x_off * 6, y_off*2)).rgb;
            sum += tex2D( YTex , fX0Y0 + float2(x_off * 6, y_off*3)).rgb;
            sum += tex2D( YTex , fX0Y0 + float2(x_off * 6, y_off*4)).rgb;
            sum += tex2D( YTex , fX0Y0 + float2(x_off * 6, y_off*5)).rgb;
            sum += tex2D( YTex , fX0Y0 + float2(x_off * 6, y_off*6)).rgb;
            
            sum += tex2D( YTex , fX0Y0 + float2(x_off * 0, y_off*5)).rgb;
            sum += tex2D( YTex , fX0Y0 + float2(x_off * 1, y_off*5)).rgb;
            sum += tex2D( YTex , fX0Y0 + float2(x_off * 2, y_off*5)).rgb;
            sum += tex2D( YTex , fX0Y0 + float2(x_off * 3, y_off*5)).rgb;
            sum += tex2D( YTex , fX0Y0 + float2(x_off * 4, y_off*5)).rgb;
            
            sum += tex2D( YTex , fX0Y0 + float2(x_off * 0, y_off*6)).rgb;
            sum += tex2D( YTex , fX0Y0 + float2(x_off * 1, y_off*6)).rgb;
            sum += tex2D( YTex , fX0Y0 + float2(x_off * 2, y_off*6)).rgb;
            sum += tex2D( YTex , fX0Y0 + float2(x_off * 3, y_off*6)).rgb;
            sum += tex2D( YTex , fX0Y0 + float2(x_off * 4, y_off*6)).rgb;
        }
        
        if(ksize >= 9)
        {
            sum += tex2D( YTex , fX0Y0 + float2(x_off * 7, y_off*0)).rgb;
            sum += tex2D( YTex , fX0Y0 + float2(x_off * 7, y_off*1)).rgb;
            sum += tex2D( YTex , fX0Y0 + float2(x_off * 7, y_off*2)).rgb;
            sum += tex2D( YTex , fX0Y0 + float2(x_off * 7, y_off*3)).rgb;
            sum += tex2D( YTex , fX0Y0 + float2(x_off * 7, y_off*4)).rgb;
            sum += tex2D( YTex , fX0Y0 + float2(x_off * 7, y_off*5)).rgb;
            sum += tex2D( YTex , fX0Y0 + float2(x_off * 7, y_off*6)).rgb;
            sum += tex2D( YTex , fX0Y0 + float2(x_off * 7, y_off*7)).rgb;
            sum += tex2D( YTex , fX0Y0 + float2(x_off * 7, y_off*8)).rgb;
            
            sum += tex2D( YTex , fX0Y0 + float2(x_off * 8, y_off*0)).rgb;
            sum += tex2D( YTex , fX0Y0 + float2(x_off * 8, y_off*1)).rgb;
            sum += tex2D( YTex , fX0Y0 + float2(x_off * 8, y_off*2)).rgb;
            sum += tex2D( YTex , fX0Y0 + float2(x_off * 8, y_off*3)).rgb;
            sum += tex2D( YTex , fX0Y0 + float2(x_off * 8, y_off*4)).rgb;
            sum += tex2D( YTex , fX0Y0 + float2(x_off * 8, y_off*5)).rgb;
            sum += tex2D( YTex , fX0Y0 + float2(x_off * 8, y_off*6)).rgb;
            sum += tex2D( YTex , fX0Y0 + float2(x_off * 8, y_off*7)).rgb;
            sum += tex2D( YTex , fX0Y0 + float2(x_off * 8, y_off*8)).rgb;
            
            sum += tex2D( YTex , fX0Y0 + float2(x_off * 0, y_off*7)).rgb;
            sum += tex2D( YTex , fX0Y0 + float2(x_off * 1, y_off*7)).rgb;
            sum += tex2D( YTex , fX0Y0 + float2(x_off * 2, y_off*7)).rgb;
            sum += tex2D( YTex , fX0Y0 + float2(x_off * 3, y_off*7)).rgb;
            sum += tex2D( YTex , fX0Y0 + float2(x_off * 4, y_off*7)).rgb;
            sum += tex2D( YTex , fX0Y0 + float2(x_off * 5, y_off*7)).rgb;
            sum += tex2D( YTex , fX0Y0 + float2(x_off * 6, y_off*7)).rgb;
            
            sum += tex2D( YTex , fX0Y0 + float2(x_off * 0, y_off*8)).rgb;
            sum += tex2D( YTex , fX0Y0 + float2(x_off * 1, y_off*8)).rgb;
            sum += tex2D( YTex , fX0Y0 + float2(x_off * 2, y_off*8)).rgb;
            sum += tex2D( YTex , fX0Y0 + float2(x_off * 3, y_off*8)).rgb;
            sum += tex2D( YTex , fX0Y0 + float2(x_off * 4, y_off*8)).rgb;
            sum += tex2D( YTex , fX0Y0 + float2(x_off * 5, y_off*8)).rgb;
            sum += tex2D( YTex , fX0Y0 + float2(x_off * 6, y_off*8)).rgb;
        }
        
        if(ksize >= 11)
        {
            sum += tex2D( YTex , fX0Y0 + float2(x_off * 9, y_off*0)).rgb;
            sum += tex2D( YTex , fX0Y0 + float2(x_off * 9, y_off*1)).rgb;
            sum += tex2D( YTex , fX0Y0 + float2(x_off * 9, y_off*2)).rgb;
            sum += tex2D( YTex , fX0Y0 + float2(x_off * 9, y_off*3)).rgb;
            sum += tex2D( YTex , fX0Y0 + float2(x_off * 9, y_off*4)).rgb;
            sum += tex2D( YTex , fX0Y0 + float2(x_off * 9, y_off*5)).rgb;
            sum += tex2D( YTex , fX0Y0 + float2(x_off * 9, y_off*6)).rgb;
            sum += tex2D( YTex , fX0Y0 + float2(x_off * 9, y_off*7)).rgb;
            sum += tex2D( YTex , fX0Y0 + float2(x_off * 9, y_off*8)).rgb;
            sum += tex2D( YTex , fX0Y0 + float2(x_off * 9, y_off*9)).rgb;
            sum += tex2D( YTex , fX0Y0 + float2(x_off * 9, y_off*10)).rgb;
            
            sum += tex2D( YTex , fX0Y0 + float2(x_off * 10, y_off*0)).rgb;
            sum += tex2D( YTex , fX0Y0 + float2(x_off * 10, y_off*1)).rgb;
            sum += tex2D( YTex , fX0Y0 + float2(x_off * 10, y_off*2)).rgb;
            sum += tex2D( YTex , fX0Y0 + float2(x_off * 10, y_off*3)).rgb;
            sum += tex2D( YTex , fX0Y0 + float2(x_off * 10, y_off*4)).rgb;
            sum += tex2D( YTex , fX0Y0 + float2(x_off * 10, y_off*5)).rgb;
            sum += tex2D( YTex , fX0Y0 + float2(x_off * 10, y_off*6)).rgb;
            sum += tex2D( YTex , fX0Y0 + float2(x_off * 10, y_off*7)).rgb;
            sum += tex2D( YTex , fX0Y0 + float2(x_off * 10, y_off*8)).rgb;
            sum += tex2D( YTex , fX0Y0 + float2(x_off * 10, y_off*9)).rgb;
            sum += tex2D( YTex , fX0Y0 + float2(x_off * 10, y_off*10)).rgb;
            
            sum += tex2D( YTex , fX0Y0 + float2(x_off * 0, y_off*9)).rgb;
            sum += tex2D( YTex , fX0Y0 + float2(x_off * 1, y_off*9)).rgb;
            sum += tex2D( YTex , fX0Y0 + float2(x_off * 2, y_off*9)).rgb;
            sum += tex2D( YTex , fX0Y0 + float2(x_off * 3, y_off*9)).rgb;
            sum += tex2D( YTex , fX0Y0 + float2(x_off * 4, y_off*9)).rgb;
            sum += tex2D( YTex , fX0Y0 + float2(x_off * 5, y_off*9)).rgb;
            sum += tex2D( YTex , fX0Y0 + float2(x_off * 6, y_off*9)).rgb;
            sum += tex2D( YTex , fX0Y0 + float2(x_off * 7, y_off*9)).rgb;
            sum += tex2D( YTex , fX0Y0 + float2(x_off * 8, y_off*9)).rgb;
            
            sum += tex2D( YTex , fX0Y0 + float2(x_off * 0, y_off*10)).rgb;
            sum += tex2D( YTex , fX0Y0 + float2(x_off * 1, y_off*10)).rgb;
            sum += tex2D( YTex , fX0Y0 + float2(x_off * 2, y_off*10)).rgb;
            sum += tex2D( YTex , fX0Y0 + float2(x_off * 3, y_off*10)).rgb;
            sum += tex2D( YTex , fX0Y0 + float2(x_off * 4, y_off*10)).rgb;
            sum += tex2D( YTex , fX0Y0 + float2(x_off * 5, y_off*10)).rgb;
            sum += tex2D( YTex , fX0Y0 + float2(x_off * 6, y_off*10)).rgb;
            sum += tex2D( YTex , fX0Y0 + float2(x_off * 7, y_off*10)).rgb;
            sum += tex2D( YTex , fX0Y0 + float2(x_off * 8, y_off*10)).rgb;
        }
    
        outColor = float4(sum/(ksize*ksize),1.0f);
    
        return outColor ;
    }
    
    //--------------------------- 技术---------------------------
    
    technique BurTechnique
    {
        pass P0
        {    
            LightEnable[0] = false;
            
            VertexShader = compile vs_3_0 MainVS_Screen();
            PixelShader  = compile ps_3_0 MainPS_Screen();
        }
    }

         由于3.0版本的shader似乎不允许pixel shader单独出现,所以我从点运算用像素着色器实现改为用特效来实现。HLSL语法中有if语句,也有for语句,可是这个程序却不厌其烦的把所有的都给列出来来,而没有使用for循环。这是因为在实际使用中发现有一些限制,比如if语句的if(A>B),A与B中必须有一个是常量,就像上面见到的那种形式;for循环中间的判断也是如此,只是在第二层j循环中可以是第一层循环的i,即不可以

    for(int i=0;i<ksize;i++)
    {
        for(int j=0;j<ksize1;j++)
            {
        ..........
            }
    }

    以上代码的ksize与ksize1都必须为常数,例外的情况是ksize1可以为第一层循环的 i 。这个问题不知道后续版本的shader有没有,反正我当前使用的版本有。

    另外有一个需要注意的地方是指令数,2.0版本的shader支持的指令数相当少,3.0版本则要多好多,我最长写到了400多条快500条时才导致编译失败。 还有一个需要提醒的是3.0版本的shader只支持D3D 9.0C以后的。如果要求做更为复杂的图像处理,可以的话建议上D3D11,compute shader虽然我没用过,但从介绍来说,应该可以处理一些更为复杂的图像处理。

    结合我的上一篇博客(DXVA2解码数据用texture纹理渲染http://www.cnblogs.com/betterwgo/p/6327422.html),就算是实现了从硬解到简单图像处理的完整过程。显卡加速效果非常好,在我的intel 5200上即使4K视频也可以实现比正常播放略快的效果。

    工程源码:http://download.csdn.net/download/qq_33892166/9755307

  • 相关阅读:
    时间复杂度计算
    SQL Server2012编程入门经典(第四版) 读书笔记
    一些编程试题
    Qt 对话框显示控制按钮
    vc++创建文件目录
    配置ubuntu虚拟机备忘
    Qt QThread 多线程使用
    Qt 程序等待多长时间执行Sleep
    Qt 数字和字符处理总结
    c++ 文件utf-8格式
  • 原文地址:https://www.cnblogs.com/betterwgo/p/6403522.html
Copyright © 2011-2022 走看看