zoukankan      html  css  js  c++  java
  • Vulkan中的实时软阴影与硬件优化

    Image

    博客链接

    ShadowMap算法需要解决阴影失真(Shadow Acne)阴影抗锯齿问题,前者可以通过 shadow bias 解决,后者可以使用自定义滤波器如pcf、vsm、esm、evsm等解决,而 shadow bias和pcf滤波,vulkan都提供了对应的硬件能力来加速计算。

    对于shadow bias,如果不使用硬件方法,而在shader中计算偏移量的话,常用的计算方法(learnopengl给出来的)是:

    float bias = max(0.05f * (1.0 - dot(normal, lightDir)), 0.005f);
    

    当然,渲染时把Cull Mode改为Cull Front,可以进一步弥补深度的比较精度。

    Vulkan光栅化器内置了一个Pipeline State:depthBiasState,开启后会将光栅化得到的深度值偏移一段距离。

    可以配置的参数有depthBiasSlopeFactor、depthBiasConstantFactor以及depthBiasClamp,偏移的计算公式如下:

    Image

    更详细的介绍见spec.

    1. depthBiasSlopeFactor:根据几何体表面梯度大小来缩放的因子。
    2. depthBiasConstantFactor: 恒定偏移量。
    3. depthBiasClamp:偏移量钳制值。

    开启步骤:

    1. 填写VkPipelineRasterizationStateCreateInfo时把depthBiasEnable设为VK_TRUE。
    2. (可选)填写VkPipelineRasterizationStateCreateInfo时把cullMode设为VK_CULL_MODE_FRONT_BIT。
    3. (可选)填写VkPipelineDynamicStateCreateInfo时把VK_DYNAMIC_STATE_DEPTH_BIAS也加进去。
    4. 如果选用了第三步,则在VkCommandBuffer录制时使用vkCmdSetDepthBias设置动态的深度偏移值,否则直接在创建Pipeline时把这个值填好。

    整体的代码流程如下:

    // 1. ShadowMap深度偏移开启
    pipe_info.rasterization_state.depthBiasEnable = VK_TRUE;
    
    // ...
    
    // 2. ShadowMap Cull Front
    pipe_info.rasterization_state.cullMode = VK_CULL_MODE_FRONT_BIT;
    pipe_info.depth_stencil_state.depthCompareOp = VK_COMPARE_OP_LESS_OR_EQUAL;
    
    // ...
    
    // 3. 添加VK_DYNAMIC_STATE_DEPTH_BIAS动态阶段
    std::vector<VkDynamicState> dynamicStateEnables = {
    	VK_DYNAMIC_STATE_VIEWPORT,
    	VK_DYNAMIC_STATE_SCISSOR,
    	VK_DYNAMIC_STATE_DEPTH_BIAS
    };
    
    // ...
    
    // 4. 绘制记录阶段
    pass_shadowdepth->cmd_buf->begin(VK_COMMAND_BUFFER_USAGE_SIMULTANEOUS_USE_BIT);
    {
        // ...
        vkCmdSetViewport(*pass_shadowdepth->cmd_buf, 0, 1, &viewport);
    	vkCmdSetScissor(*pass_shadowdepth->cmd_buf, 0, 1, &scissor);
        
        static const float depthBiasConstant = 1.25f;
    	static const float depthBiasSlope = 1.75f;
    
    	vkCmdSetDepthBias(
    		*pass_shadowdepth->cmd_buf,
    		depthBiasConstant,
    		0.0f,
    		depthBiasSlope
    	);
        
        // ...
        draw();
    }
    pass_shadowdepth->cmd_buf->end();
    

    这样,便无需再在着色器中计算一次偏移了。

    Image

    接下来是硬件PCF,更准确的来说是双线性的深度比较纹理滤波。

    开启步骤如下:

    1. shadowdepth纹理的采样方式设为Bilinear。
    2. compareEnable设为True。
    3. compare_op设为VK_COMPARE_OP_LESS。
    4. shader中采样器设为sampler2DShadow。
    5. 采样时把ShadowCoord的xyz都传入。

    代码流程如下:

    // 1. cpp
    VkSamplerCreateInfo sampler_info{};
    sampler_info.sType                   = VK_STRUCTURE_TYPE_SAMPLER_CREATE_INFO;
    sampler_info.magFilter               = VK_FILTER_LINEAR;
    sampler_info.minFilter               = VK_FILTER_LINEAR;
    sampler_info.compareOp               = VK_COMPARE_OP_LESS;
    sampler_info.compareEnable           = VK_TRUE;
    // 边界情况优化
    sampler_info.address_mode_U = VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_BORDER; 
    sampler_info.address_mode_V = VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_BORDER; 
    sampler_info.address_mode_W = VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_BORDER;
    sampler_info.bordercolor = VK_BORDER_COLOR_FLOAT_OPAQUE_WHITE;
    
    // 2. shader...
    // layout (binding = 5) uniform sampler2D directional_light_shadowdepth;
    layout (binding = 5) uniform sampler2DShadow directional_light_shadowdepth;
    vec3 hardware_shadow_pcf(sampler2DShadow shadow_tex,float NoL,vec3 fragworldpos,vec2 screen_uv)
    {
    	vec3 shadowMaskColor = vec3(0.0, 0.0, 0.0);
    	vec4 shadow_clip_pos = ub_directional_light_vp.proj * ub_directional_light_vp.view * vec4(fragworldpos, 1.0);
    	vec4 shadow_coord = shadow_clip_pos / shadow_clip_pos.w;
    	shadow_coord.st = shadow_coord.st * 0.5f + 0.5f;
    
    	float shadowMapCmp = texture(shadow_tex, shadow_coord.xyz);
    	shadowMaskColor += vec3(shadowMapCmp);
    	return shadowMaskColor;
    }
    

    得出的结果和双线性纹理滤波差不多,因为它只是4个滤波核:

    Image

    可以配合泊松盘采样多次,得到一个还不错的软阴影:

    const vec2 poisson_disk_25[25] = vec2[](
        vec2(-0.978698, -0.0884121),
        vec2(-0.841121, 0.521165),
        vec2(-0.71746, -0.50322),
        vec2(-0.702933, 0.903134),
        vec2(-0.663198, 0.15482),
        vec2(-0.495102, -0.232887),
        vec2(-0.364238, -0.961791),
        vec2(-0.345866, -0.564379),
        vec2(-0.325663, 0.64037),
        vec2(-0.182714, 0.321329),
        vec2(-0.142613, -0.0227363),
        vec2(-0.0564287, -0.36729),
        vec2(-0.0185858, 0.918882),
        vec2(0.0381787, -0.728996),
        vec2(0.16599, 0.093112),
        vec2(0.253639, 0.719535),
        vec2(0.369549, -0.655019),
        vec2(0.423627, 0.429975),
        vec2(0.530747, -0.364971),
        vec2(0.566027, -0.940489),
        vec2(0.639332, 0.0284127),
        vec2(0.652089, 0.669668),
        vec2(0.773797, 0.345012),
        vec2(0.968871, 0.840449),
        vec2(0.991882, -0.657338)
    );
    
    vec3 hardware_shadow_pcf(sampler2DShadow shadow_tex,float NoL,vec3 fragworldpos,vec2 screen_uv,float dilation)
    {
    	vec3 shadowMaskColor = vec3(0.0, 0.0, 0.0);
    	ivec2 tex_dim = textureSize(shadow_tex, 0).xy;
    	float dx = 1.0 / float(tex_dim.x);
    	float dy = 1.0 / float(tex_dim.y);
    	vec2 texel_size = vec2(dx, dy);
    	vec4 shadow_clip_pos = ub_directional_light_vp.proj * ub_directional_light_vp.view * vec4(fragworldpos, 1.0);
    	vec4 shadow_coord = shadow_clip_pos / shadow_clip_pos.w;
    	shadow_coord.st = shadow_coord.st * 0.5f + 0.5f;
    
    	for (int x = 0; x < 25; x++)
    	{
    		vec2 offset_uv = texel_size * poisson_disk_25[x]  * dilation;
    		float shadowMapCmp = texture(shadow_tex, vec3(shadow_coord.xy + offset_uv,shadow_coord.z));
    		shadowMaskColor += vec3(shadowMapCmp);
    	}
    
    	shadowMaskColor /= 25.0f;
    
    	return shadowMaskColor;
    }
    

    下图是25次采样的pcf软阴影,其中dilation = 1.0f:

    Image

    微软则设计了一种类似双线性过滤可缩放滤波核的算法,9次采样就能得到很不错的效果, 效果如下:

    Image

    vec3 hardware_shadow_pcf_microsoft(sampler2DShadow shadow_tex,float NoL,vec3 fragworldpos,vec2 screen_uv)
    {
    	ivec2 tex_dim = textureSize(shadow_tex, 0).xy;
    	float dx = 1.0 / float(tex_dim.x);
    	float dy = 1.0 / float(tex_dim.y);
    	vec2 texel_size = vec2(dx, dy);
    	vec4 shadow_clip_pos = ub_directional_light_vp.proj * ub_directional_light_vp.view * vec4(fragworldpos, 1.0);
    	vec4 shadow_coord = shadow_clip_pos / shadow_clip_pos.w;
    	shadow_coord.st = shadow_coord.st * 0.5f + 0.5f;
    
    	// texel_size.x = texel_size.y = 2048
    	const float dilation = 1.0f;
    	float d1 = dilation * texel_size.x * 0.125;
        float d2 = dilation * texel_size.x * 0.875;
        float d3 = dilation * texel_size.x * 0.625;
        float d4 = dilation * texel_size.x * 0.375;
        
        // acne
        shadow_coord.z -= 0.001f;
    
        float result = (
            2.0 * texture(shadow_tex,vec3(shadow_coord.xy,shadow_coord.z)) +
            texture( shadow_tex, vec3(shadow_coord.xy + vec2(-d2,  d1), shadow_coord.z )) +
            texture( shadow_tex, vec3(shadow_coord.xy + vec2(-d1, -d2), shadow_coord.z )) +
            texture( shadow_tex, vec3(shadow_coord.xy + vec2( d2, -d1), shadow_coord.z )) +
            texture( shadow_tex, vec3(shadow_coord.xy + vec2( d1,  d2), shadow_coord.z )) +
            texture( shadow_tex, vec3(shadow_coord.xy + vec2(-d4,  d3), shadow_coord.z )) +
            texture( shadow_tex, vec3(shadow_coord.xy + vec2(-d3, -d4), shadow_coord.z )) +
            texture( shadow_tex, vec3(shadow_coord.xy + vec2( d4, -d3), shadow_coord.z )) +
            texture( shadow_tex, vec3(shadow_coord.xy + vec2( d3,  d4), shadow_coord.z ))
        ) / 10.0f;
    
    	return vec3(result*result);
    }
    

    下图为9次采样的软阴影:

    Image

    当然如果不考虑性能还可以结合PCSS一起使用:

    Image

    float random(vec3 seed, int i)
    {
        vec4 seed4 = vec4(seed,i);
        float dot_product = dot(seed4, vec4(12.9898,78.233,45.164,94.673));
        return fract(sin(dot_product) * 43758.5453);
    }
    
    // pcss 查找遮挡物
    vec2 pcss_search_blocker(float bias,vec4 shadow_coord,float NoL,float radius,sampler2DShadow shadowdepth_tex,vec2 texel_size) 
    {
    	float blocker_depth = 0.0;
    	float count = 0;
    	int flag = 0;
    
    	for (int x = 0; x < 32; x++)
    	{
    		int index = int( 32.0 * random(shadow_coord.xyy,x) ) % 32;
    		vec2 sample_uv = vec2(radius) * poisson_disk_32[index] * texel_size + shadow_coord.st;
    		float dist = texture(shadowdepth_tex, vec3(sample_uv,shadow_coord.z)).r;
    
    		if (shadow_coord.w > 0.0f && dist < shadow_coord.z - bias) 
    		{
    			blocker_depth += dist;
    			count += 1.0;
    			flag = 1;
    		}
    	}
    
    	if(flag == 1)
    	{
    		return vec2(blocker_depth / count,float(flag));
    	}
    
    	return vec2(1.0f,0.0f);
    }
    
    // pcss软阴影
    float shadow_pcss(vec4 sample_clip_pos,float NoL,sampler2DShadow shadowdepth_tex,float penumbra_width,float radius)
    {
        ivec2 tex_dim = textureSize(shadowdepth_tex, 0).xy;
    	float dx = 1.0 / float(tex_dim.x);
    	float dy = 1.0 / float(tex_dim.y);
    	vec2 texel_size = vec2(dx, dy);
    	float bias = max(0.05 * (1.0 - NoL), 0.005);
    
    	vec4 shadow_coord = sample_clip_pos / sample_clip_pos.w;
    	shadow_coord.st = shadow_coord.st * 0.5f + 0.5f;
    
    	vec2 ret = pcss_search_blocker(bias,shadow_coord, NoL, radius,shadowdepth_tex,texel_size);
    	float avg_blocker_depth = ret.x;
    	if(ret.y < 0.5f)
    	{
    		return 1.0f; // 提前退出节省非阴影区域的pcf消耗。
    	}
    
    	float penumbra_size = max(shadow_coord.z - avg_blocker_depth,0.1f) / avg_blocker_depth * penumbra_width;
    	return shadow_pcf(bias,shadow_coord, NoL, penumbra_size,shadowdepth_tex,texel_size);
    }
    

    画外音:

    PCSS效果很好,但我以后都不会考虑了,因为实在是太慢了。

     
  • 相关阅读:
    Linux文件权限学习总结
    【转】Hibernate和ibatis的比较
    Spring AOP原理及拦截器
    Spring AOP (下)
    Spring AOP (上)
    SQL语句限定查询知识点总结
    多线程知识点总结
    关于tomcat那些事情
    java.lang.NoClassDefFoundError: org/apache/commons/codec/DecoderException 的解决办法
    cacti 与 nagios 一些总结 【八】
  • 原文地址:https://www.cnblogs.com/Afuness/p/14881877.html
Copyright © 2011-2022 走看看