思路

观察下面这副图可以发现，在明亮处光很明显，暗处(阴影中)没有明显的光，且越暗光越不明显
为了还原这一现象，可以想到的是根据目标pixel的阴影值来计算亮度。但如何营造光的体积感呢？这就需要用到光线追踪！的思想rayMarching（光线步进）

与光追不同的是，光追是每个pixel，在场景中发射一根射线并不断弹射，当弹射出场景或达到最大弹射次数时，累加每次弹射计算得到的颜色，最终该pixel返回该颜色值；而rayMarching特别之处在于，它不会弹射，而是每个pixel发射一根射线，该射线每次行走一定的距离step，每行走一次计算当前位置的阴影值并累加，当碰到遮挡物体或达到最大距离，就终止步进，最终得到的结果即为累加的阴影值

如下图所示，红色虚线代表光线走到过的位置，当走到这些位置时就采样阴影图并得到对应的阴影值，最后累加

实现

采样阴影图

在前面说过，得到的光照与阴影值有关，所以这里需要采样阴影图

实现

#pragma multi_compile _ _MAIN_LIGHT_SHADOWS                    //接受阴影
#pragma multi_compile _ _MAIN_LIGHT_SHADOWS_CASCADE            //产生阴影
#pragma multi_compile _ _SHADOWS_SOFT                         //软阴影 

float GetShadow(float3 positionWS)
{
  float4 shadowUV = TransformWorldToShadowCoord(positionWS);
  float shadow = MainLightRealtimeShadow(shadowUV);

  return shadow;
}

这里为了简便使用的是urp管线内置的阴影，但这里是做了边缘软化的。为了节约性能，可以仅仅采样硬阴影，不做软化

散射

光线在大气中行进是有可能发生散射的，体积光也不例外。为了简便这里仅仅采用单次散射，且为了节省性能，不再走基于物理的散射模型，仅仅采用相位函数、大气散射密度、透光率
为了进一步优化性能，对于相位函数采用近似的

$\frac{1 - g^2}{4Π(1 + g^2 - 2gcosθ)^{1.5}}$

其中g=0是各向同性相函数，g>0 主要是前向散射， g < 0主要是后向散射

实现

/// -----------------
/// 沿视线方向散射的量(相位函数)
/// -----------------
float GetPhase(float cosTheta)
{
  float a = 1.f - Pow2(_HGCoff);
  float b = 4.f * PI * pow(1.f + Pow2(_HGCoff) - 2.f * _HGCoff * cosTheta, 1.5f);

  return a / b;
}

/// -----------------
/// 大气密度比例函数
/// -----------------
float GetRho()
{
  return exp(-_HeightFromSeaLevel / 1200.f);
}

/// -----------------
/// 最终的散射比例
/// -----------------
float GetScatter(float cosTheta)
{
  return GetPhase(cosTheta) * GetRho();
}

/// -----------------
/// 透光率
/// -----------------
float GetTransmittance(float distance)
{
  return exp(-distance * _TransmittanceFactor * GetRho());
}

RayMarching

按开头所提的思路进行光线步进即可

half3 GetLightShaft(float3 viewOrigin, half3 viewDir, float maxDistance, float2 screenPos)
{
    float stepLength = maxDistance / _StepCount;              // 步长
    float3 step = stepLength * viewDir;
    float3 currPos = viewOrigin;
    float3 totalLight = 0.f;
    float3 totalDistance = 0.f;

    float scatterFun = GetScatter(dot(viewDir, _SunDirection));

    UNITY_LOOP
    for(int i = 0; i < _StepCount; ++i)
    {
        float shadow = GetShadow(currPos);
        if(shadow > 0.f)
        {
            // 求当前pixel的阴影值
            totalLight += _Brightness * shadow * scatterFun * GetTransmittance(totalDistance);
        }

        // 步进后新的位置
        currPos += step;
        totalDistance += stepLength;
    }

    half3 result = totalLight * _MainLightColor * _LightShaftColor.rgb * _LightShaftColor.aaa;

    return result;
}

再和原图叠加，不难得到如下结果：

Dual Blur优化块状感

存在的问题：仔细观看上图，虽然有体积光的感觉，但是有明显的硬线，这是因为step的步长大小不够小，得到的结果不够精准（和光追一个道理，弹射次数越多越精准）。但是step步长小了开销又很高，真是头疼怎么办呢？
解决方案：因为体积光属于后处理，要用魔法打败魔法，所以这里可以采用模糊弱化硬线。出于性能考虑，这里使用性能拔尖的Dual Blur

不难得到如下结果：

性能优化

棋盘格Clip

光的变化频率不高，也就是说如果进行部分clip，也不会很容易被识别出来，这里采用棋盘格刷新的方式来更新

实现

很简单，在ps中clip即可

float2 channel = floor(i.positionCS);
clip(channel.y%2 * channel.x%2 + (channel.y+1)%2 * (channel.x+1)%2 - 0.1f);

Dither

对于每次光线步进的方向都是固定的，这样不能以较低的步进次数达到不错的效果，较低的光线步进步数会使体积光效果带有带状的伪影以及变得很糊。可以对每个pixel都抖动一下步进方向

实现

half3 GetLightShaft(float3 viewOrigin, half3 viewDir, float maxDistance, float2 screenPos)
{
float2 ditherPos = fmod(floor(screenPos.xy), 4.f);
  float3 ditherDir = _BlueNoiseTex.Sample(Smp_ClampU_ClampV_Linear, ditherPos / 4.f + float2(0.5 / 4.f, 0.5f / 4.f), float2(0, 0));

  float3 step = stepLength * viewDir * ditherDir;
  // ...
}

下图是步进16次的效果：

即使步进仅6次也比先前的好：

降采样

体积光这种比较梦幻的效果，不用使用原分辨率采用，可以先降采样，计算体积光模糊再合并到原图上，即便如此并不会露馅

但当升采样时，由于会用到双线性插值，会导致升采样后的图像带有模糊且对于体积光来说还好，SSAO就会出现伪影，尤其是物体边缘，这样得到的体积光效果并不好，需要用到Depth-Aware Upsampling

以下是升采样得到的效果：

以下是升采样但经过Depth-Aware Upsampling后的效果：

Depth-Aware Upsampling
简单来说，该算法思路是，在一个2x2的核中，计算高分辨率图的目标pixel对应的depth和该pixel在低分辨率旁边四个pixel的depth差，选择最接近全分辨率深度并返回该样本对应的低分辨率颜色

实现

首先需要降低分辨率

_descriptor.width  /= (int)_passSetting.downSample;
_descriptor.height /= (int)_passSetting.downSample;
_descriptor.colorFormat = RenderTextureFormat.R16;

_LowResDepthRT = new RenderTargetIdentifier(_lowResDepthTexID);
cmd.GetTemporaryRT(_lowResDepthTexID, _descriptor, FilterMode.Point);

然后需要一个额外的Pass来计算低分辨率下的depth

PSOutput CopyDepth(PSInput i)
{
  PSOutput o;

  o.color = Linear01Depth(_CameraDepthTexture.Sample(Smp_ClampU_ClampV_Linear, i.uv), _ZBufferParams);

  return o;
}

最后进行Depth-Aware Upsampling

half4 result = 0.h;

float highResDepth = _CameraDepthTexture.Sample(Smp_ClampU_ClampV_Linear, i.uv).r;
highResDepth = Linear01Depth(highResDepth, _ZBufferParams);
float lowResDepth1 = _LowResDepthTex.Sample(Smp_ClampU_ClampV_Linear, i.uv, int2(0, 0.5f)).r;
float lowResDepth2 = _LowResDepthTex.Sample(Smp_ClampU_ClampV_Linear, i.uv, int2(0, -0.5f)).r;
float lowResDepth3 = _LowResDepthTex.Sample(Smp_ClampU_ClampV_Linear, i.uv, int2(0.5f, 0)).r;
float lowResDepth4 = _LowResDepthTex.Sample(Smp_ClampU_ClampV_Linear, i.uv, int2(-0.5f, 0)).r;

float depthDiff1 = abs(highResDepth - lowResDepth1);
float depthDiff2 = abs(highResDepth - lowResDepth2);
float depthDiff3 = abs(highResDepth - lowResDepth3);
float depthDiff4 = abs(highResDepth - lowResDepth4);

float depthDiffMin = min(min(depthDiff1, depthDiff2), min(depthDiff3, depthDiff4));
int index = -1;
if(depthDiffMin == depthDiff1) index = 0;
else if(depthDiffMin == depthDiff2) index = 1;
else if(depthDiffMin == depthDiff3) index = 2;
else if(depthDiffMin == depthDiff4) index = 3;

switch(index)
{
  case 0:
      result += _LightShaftTex.Sample(Smp_ClampU_ClampV_Point, i.uv, int2(0, 0.5f));
      break;
  case 1:
      result += _LightShaftTex.Sample(Smp_ClampU_ClampV_Point, i.uv, int2(0, -0.5f));
      break;
  case 2:
      result += _LightShaftTex.Sample(Smp_ClampU_ClampV_Point, i.uv, int2(0.5f, 0));
      break;
  case 3:
      result += _LightShaftTex.Sample(Smp_ClampU_ClampV_Point, i.uv, int2(-0.5f, 0));
      break;
  default:
      result += _LightShaftTex.Sample(Smp_ClampU_ClampV_Point, i.uv);
      break;
}

half4 sourceTex = SAMPLE_TEXTURE2D(_SourceTex, Smp_ClampU_ClampV_Linear, i.uv);
result += sourceTex;

因为采样核的大小比较小，可能导致非边缘产生块状像素，所以可以计算深度差，并判断该深度差是否大于设定的深度阈值，大于则采用点采样，否则双线性插值

在降低四倍分辨率的情况下，得到的体积光效果：

TAA

当降采样过多后，由于分辨率过低，体积光会存在许多像素点。具体效果如下：

这时可以考虑用TAA进行抖动，并将当前帧和上一帧抖动后的图像进行lerp，从而减少噪点（关于TAA笔者先前的文章也有介绍，这里不再提及）。TAA后，效果如下：

性能耗时

3080的配置下耗时：1k分辨率 + 8次步进 + 8倍降采样 + TAA，总耗时0.557 + 0.108ms

彩色半透明玻璃

当光线穿过半透明物体时，光线会受半透明物体的颜色的影响，体积光也需支持该功能

思路
1. 对于半透明物体，新增一个pass采样它的深度，记作transparent depth
2. 每次步进时，额外新增一个步进，该步进朝光源方向步进，每次步进后将该点的depth positionWS转到uv空间，使用该uv采样transparent depth tex
3. 比较步进后的length(depth positionWS - camera world position)和transparent depth。若大于，则说明该步进点位于半透明物体后，乘上步进前的camera color tex

采样Transparent Depth

Pass
{
    Name "Sample Linear 01 Depth For Light Shaft"
    Tags
    {
        "LightMode" = "SampleLinear01Depth"
    }

    HLSLINCLUDE
    #include "Assets/Materials/Common.hlsl"
    ENDHLSL

    HLSLPROGRAM
    #pragma vertex VS
    #pragma fragment SampleLinearDepth

    PSInput VS(VSInput i)
    {
        PSInput o;

        o.posCS = mul(UNITY_MATRIX_MVP, float4(i.posOS, 1.f));
        o.posWS = mul(UNITY_MATRIX_M, float4(i.posOS, 1.f));

        o.uv = i.uv;
        #if defined (UNITY_UV_STARTS_AT_TOP)
            o.uv.y = 1 - o.uv.y;
        #endif

        return o;
    }

    PSOutput SampleLinearDepth(PSInput i)
    {
        PSOutput o;

        // 用于后续比较步进
        float viewDepth = length(i.posWS - _WorldSpaceCameraPos) / _ProjectionParams.z;
        o.color.r = viewDepth;

        return o;
    }
    ENDHLSL
}

Depth的步进与比较

float2 TransformWorldToScreen(float3 positionWS)
{
    positionWS = (positionWS - _WorldSpaceCameraPos) * (_ProjectionParams.y + (_ProjectionParams.z - _ProjectionParams.y)) + _WorldSpaceCameraPos;
    real2 uv = 0;
    real3 toCam = mul(unity_WorldToCamera, positionWS);
    real camPosZ = toCam.z;
    real height = 2 * camPosZ / unity_CameraProjection._m11;
    real width = _TexParams.x / _TexParams.y * height;
    uv.x = (toCam.x + width / 2) / width;
    uv.y = (toCam.y + height / 2) / height;

    return uv;
    float4 positionCS = TransformWorldToHClip(positionWS);

    return positionCS.xy * 0.5f + 0.5f;
}

half3 GetLightShaft(float3 viewOrigin, half3 viewDir, float maxDistance, float2 screenPos)
{
    //...
    #if defined(_TRANSPARENT_COLOR_ON)
    float3 depthRayDir = -_SunDirection;
    float depthStepLength = _TransparentMaxDistance / _TransparentStepCounts;
    float3 depthStep = depthStepLength * depthRayDir;
    #endif

    UNITY_LOOP
    for(int i = 0; i < _StepCount; ++i)
    {
        float shadow = GetShadow(currPos);

        if(shadow > 0.f)
        {
            float3 currColor = _Brightness * shadow * scatterFun * GetTransmittance(totalDistance);

            #if defined(_TRANSPARENT_COLOR_ON)
            float3 depthCurrPos = currPos + depthRayDir * ditherDir;
            UNITY_LOOP
            for(int j = 0; j < _TransparentStepCounts; ++j)
            {
                float2 depth_uv = TransformWorldToScreen(depthCurrPos);
                float distanceCameraToDepth = length(depthCurrPos - _WorldSpaceCameraPos);

                if(depth_uv.x < 0.f || depth_uv.y < 0.f || depth_uv.x > 1.f || depth_uv.y > 1.f)
                {
                    break;
                }

                float transparentDepth = _LinearDepthTex.Sample(Smp_ClampU_ClampV_Linear, depth_uv).r * _ProjectionParams.z;  // length posws to world camera pos

                // 步进depth点位于半透明物体后面
                if(transparentDepth < distanceCameraToDepth)
                {
                    float4 sourceColor = _CameraColorTexture.Sample(Smp_ClampU_ClampV_Linear, depth_uv) * _TransparentColorIntensity;
                    currColor *= sourceColor;
                }

                depthCurrPos += depthStep;
            }
            #endif

            totalLight += saturate(currColor);
        }

        currPos += step;
        totalDistance += stepLength;
    }
}

效果：