为什么需要SSSR?
在实时渲染中反射手段经常用到的反射探针,这种方法非常节省性能,但它也存在一定的问题——即从不同视角看向同一地方,所看到的反射内容是不同的。这就导致可能会看到错误的反射效果
什么是SSSR?
- SSSR = SSR + PBR
在IBL中,会用到cube map来实现高光的反射;而在SSSR中,会在SSR的基础上配合屏幕空间,来实现反射
特点
- 支持清晰和模糊反射
- 越近反射越清晰
- 高光拉伸
SSR
关于SSR,笔者在先前的Blog中已经介绍过,不懂的可以看这哦unity 屏幕空间反射(Screen Space Reflect)
SSR借用IBL的思路,对反射内容基于roughness采样mipmap,这样的方式是非PBR的,会导致得到的反射图像可能存在一些非物理的问题;而SSSR虽然也借用了IBL的思路,但与SSR的方法不同之处在于,SSSR模糊的是远处的光照,这就使得计算得到的结果是基于物理的,而非简单模糊的数据
Hi-Z tracing
Hi-Z将屏幕深度的层次关系存储在MipMap层级中,用于加速反射光线的求交
Hi-Z的构建时,对屏幕尺寸的深度进行滤波,每次保存2x2像素中最浅的像素
在比较相机视角的场景深度和反射光线的深度时,若反射光线的深度小于场景深度,mipMap Level++;若反射光线的深度大于等于场景深度,mipMap Level--。随后继续步进,重复上述步骤,直至mipMap Level小于0,当前得到的场景深度即为想要的
实现如下:
#pragma kernel GetHiZ
Texture2D<float4> _SourceTex;
RWTexture2D<float4> _RW_OutputTex;
// xy: last mipmap's texel size;
// zw: curr mipmap's texel size
float4 _HiZTexSize;
float SampleSourceTex(float2 uv, float2 offset)
{
return _SourceTex.SampleLevel(Smp_ClampU_ClampV_Linear, uv + offset * _HiZTexSize.xy, 0).r;
}
[numthreads(8,8,1)]
void GetHiZ (uint3 id : SV_DispatchThreadID)
{
float2 uv = (float2(id.xy) + 0.5f) * _HiZTexSize.zw;
float minZ = SampleSourceTex(uv, 0);
minZ = max(minZ, SampleSourceTex(uv, float2(-1.f, -1.f)));
minZ = max(minZ, SampleSourceTex(uv, float2(-1.f, 1.f)));
minZ = max(minZ, SampleSourceTex(uv, float2(1.f, 1.f)));
minZ = max(minZ, SampleSourceTex(uv, float2(1.f, -1.f)));
_RW_OutputTex[id.xy] = minZ;
}
// 先在_HiZDepthRT[0]采样场景深度,基于该深度图生成Mipmap
private const int _maxMipMapLevels = 7;
private RenderTargetIdentifier[] _HiZDepthRT = new RenderTargetIdentifier[_maxMipMapLevels];
private static int[] _HiZDepthTexID = new int[_maxMipMapLevels];
void DoHizDepth(CommandBuffer cmd, ref RenderingData renderingData, ScriptableRenderContext context, RenderTargetIdentifier targetRT)
{
var computeShader = Resources.Load<ComputeShader>("HiZ/CS_HiZ");
if (computeShader == null) return;
var tempDesc = _descriptor;
tempDesc.enableRandomWrite = true;
tempDesc.colorFormat = RenderTextureFormat.RFloat;
tempDesc.useMipMap = true;
tempDesc.autoGenerateMips = false;
Vector2Int currTexSize = new Vector2Int(_descriptor.width, _descriptor.height);
Vector2Int lastTexSize = currTexSize;
var lastHizDepthRT = targetRT;
for (int i = 1; i < _maxMipMapLevels; ++i)
{
currTexSize.x /= 2;
currTexSize.y /= 2;
tempDesc.width = currTexSize.x;
tempDesc.height = currTexSize.y;
_HiZDepthTexID[i] = Shader.PropertyToID("_HiZDepthTex" + i);
_HiZDepthRT[i] = new RenderTargetIdentifier(_HiZDepthTexID[i]);
cmd.GetTemporaryRT(_HiZDepthTexID[i], tempDesc, FilterMode.Point);
int kernelID = computeShader.FindKernel("GetHiZ");
computeShader.GetKernelThreadGroupSizes(kernelID, out uint x, out uint y, out uint z);
cmd.SetComputeTextureParam(computeShader, kernelID, Shader.PropertyToID("_SourceTex"), lastHizDepthRT);
cmd.SetComputeTextureParam(computeShader, kernelID, Shader.PropertyToID("_RW_OutputTex"), _HiZDepthRT[i]);
cmd.SetComputeVectorParam(computeShader, Shader.PropertyToID("_HiZTexSize"),
new Vector4(1f / lastTexSize.x, 1f / lastTexSize.y, 1f / currTexSize.x, 1f / currTexSize.y));
cmd.DispatchCompute(computeShader, kernelID,
Mathf.CeilToInt((float)currTexSize.x / x),
Mathf.CeilToInt((float)currTexSize.y / y),
1);
cmd.CopyTexture(_HiZDepthRT[i], 0, 0,targetRT, 0, i);
lastTexSize = currTexSize;
lastHizDepthRT = _HiZDepthRT[i];
}
for (int i = 1; i < _maxMipMapLevels; ++i)
{
cmd.ReleaseTemporaryRT(_HiZDepthTexID[i]);
}
}
Importance Sampling
为了得到模糊的光照结果,一种方法是对物体表面的normal抖动,另一种是对反射光线抖动。在BRDF中,与高光效果直接挂钩的是法线分布函数,它直接表示了高光是清晰还是模糊,为了得到更加物理的光照效果,可以使用NDF GGX基于roughness来计算物体表面的微表面法线,从而达到抖动normal的效果
NDF GGX实际上是一个重要性采样函数,而重要性采样的精髓在于概率密度分布函数,在重要性采样当中,为了逼近暴力结算(穷举),需要给得到的数据乘以一个权重,最后再加起来得到平均值作为最后的采样结果
工业界的NDF GGX函数如下:D_{GGX}(h) = \frac{α^2}{Π((α^2 - 1)cos^2_{θ_h} + 1)^2}
其中,α为roughness,θ_h为半程向量(即采样方向)与宏观法线夹角
对应的PDF为:\frac{α^2 cosθ}{Π((α^2 - 1)cos^2_{θ_h} + 1)^2}
cosθ = \sqrt{\frac{1 - E}{1 + (α^2 - 1)E}},其中E为[0, 1]上均匀分布的随机数
最终得到的微表面法线(半程向量)为:
float3 H;
H.x = SinTheta * cos(Phi);
H.y = SinTheta * sin(Phi);
H.z = CosTheta;
实现如下:
float4 ImportanceSampleGGX(float2 Xi, float Roughness)
{
float m = Roughness * Roughness;
float m2 = m * m;
float Phi = 2 * PI * Xi.x;
float CosTheta = sqrt((1.0 - Xi.y) / (1.0 + (m2 - 1.0) * Xi.y));
float SinTheta = sqrt(max(1e-5, 1.0 - CosTheta * CosTheta));
// 半程向量(采样方向)
float3 H;
H.x = SinTheta * cos(Phi);
H.y = SinTheta * sin(Phi);
H.z = CosTheta;
float d = (CosTheta * m2 - CosTheta) * CosTheta + 1;
float D = m2 / (PI * d * d);
float pdf = D * CosTheta;
return float4(H, pdf);
}
Multi sample
当重要性采样积分次数不够会导致噪点过多,如何降噪呢?当计算出反射光线与场景交叉时,可以用它相邻的uv数据假装成它发射的光线,从而得到更多的样本
实现如下:
static const int2 offset[9] =
{
int2(-2.0, -2.0),
int2(0.0, -2.0),
int2(2.0, -2.0),
int2(-2.0, 0.0),
int2(0.0, 0.0),
int2(2.0, 0.0),
int2(-2.0, 2.0),
int2(0.0, 2.0),
int2(2.0, 2.0)
};
float2 ditherUV = fmod(i.positionCS.xy, 4);
float2 jitter = _BlueNoiseTex.SampleLevel(Smp_ClampU_ClampV_Linear, ditherUV / 4.f + float2(0.5 / 4.f, 0.5f / 4.f), 0).xy;
float2x2 offsetRotationMat = float2x2(jitter.x, jitter.y, -jitter.y, -jitter.x);
// 偏移uv,复用反射数据
for(int i = 0; i < 4; ++i)
{
float2 offsetUV = mul(offsetRotationMat, offset[i] * _ResolvedTexSize.zw);
float2 neighborUV = uv + offsetUV;
float4 hitData = _SSRHitData.SampleLevel(Smp_ClampU_ClampV_Linear, neighborUV, 0);
float2 hitUV = hitData.rg;
float hitZ = hitData.b;
float pdf = hitData.a;
float4 hitPositionCS = GetPositionCS(hitUV, hitZ);
float4 hitPositionVS = GetPositionVS(hitPositionCS);
//...
}
但Multi sample也存在一定的问题:因为各个点的PDF是不同的。若几个点的θ_h为半程向量(即采样方向)与宏观法线夹角差异较大,得到的PDF的差异也会较大,那么反射结果就存在噪点
为了避免当俯角差异过大导致的PDF差异过大,可以对随机数进行截断(截断较小的值)并重映射,进而得到更为平缓的PDF值
实现如下:
float2 ditherUV = fmod(i.positionCS.xy, 4);
float2 jitter = _BlueNoiseTex.SampleLevel(Smp_ClampU_ClampV_Linear, ditherUV / 4.f + float2(0.5 / 4.f, 0.5f / 4.f), 0).xy;
jitter.y = lerp(jitter.y, 0.f, _BRDFBias);
float4 halfVector = TransformTangentToView(normalWS, ImportanceSampleGGX(jitter, 1 - smoothness));
Patiofilter
既然问题出在法线,那么对这块区域求平均可以吗?答案是可以的,这也就是空间滤波(Patiofilter)的解决思路
伪代码如下:
Temporalfilter
既然空间上可以复用射线,时间上可以吗?当然可以,也就是常用到的TAA,即存储前N帧的反射数据,在当前帧和前N帧的反射数据lerp即可
void Temporalfilter(PSInput i, out PSOutput o)
{
float2 uv = i.uv;
float hitDepth = _SSRHitData.SampleLevel(Smp_ClampU_ClampV_Linear, uv, 0).b;
float roughness = GetRoughness(GetSmoothness(uv));
float3 normalWS = GetNormalWS(uv);
float2 depthVelocity = _MotionVectorTexture.SampleLevel(Smp_ClampU_ClampV_Linear, uv, 0).rg;
float2 rayVelocity = GetCameraMotionVector(1 - hitDepth, uv, Matrix_I_VP, _Pre_Matrix_VP, Matrix_VP);
float Velocity_Weight = saturate(dot(normalWS, float3(0, 1, 0)));
float2 velocity = lerp(depthVelocity, rayVelocity, Velocity_Weight);
float2 du = float2(_TAATexSize.z, 0);
float2 dv = float2(0, _TAATexSize.w);
float4 minColor = 1e10, maxColor = 0;
for(int i = -1; i <= 1; ++i)
{
for(int j = -1; j <= 1; ++j)
{
float4 currColor = _ResolvedTex.SampleLevel(Smp_ClampU_ClampV_Linear, uv + du * i + dv * j, 0);
minColor = min(minColor, currColor);
maxColor = max(maxColor, currColor);
}
}
float4 averageColor = (minColor + maxColor) * 0.5f;
minColor = (minColor - averageColor) * _TAAScale + averageColor;
maxColor = (maxColor - averageColor) * _TAAScale + averageColor;
float4 TAAPreColor = _TAAPreTex.SampleLevel(Smp_ClampU_ClampV_Linear, uv - velocity, 0);
TAAPreColor = clamp(TAAPreColor, minColor, maxColor);
float4 TAACurrColor = _ResolvedTex.SampleLevel(Smp_ClampU_ClampV_Linear, uv, 0);
float TAAWeight = 0.f;
if(roughness > 0.1)
{
TAAWeight = _TAAWeight;
}
else
{
TAAWeight = 0.92f;
}
float weight = saturate(clamp(0, 0.96f, TAAWeight) * (1.f - length(velocity) * 8));
float4 reflectColor = lerp(TAACurrColor, TAAPreColor, weight);
o.color = reflectColor;
}
降采样
想要进一步优化性能,可以对Render Target进行降采样
最终效果
在1k清晰度下,降采样4倍,性能消耗:1.793ms
项目地址
https://github.com/chenglixue/unity-Stochastic-Screen-Space-Reflections/tree/main
Reference
【UE|高质量渲染】Stochastic Screen-Space Reflections 高质量渲染—随机屏幕空间反射高质量,IBL补完
Comments | NOTHING