{"id":475,"date":"2026-05-18T00:32:01","date_gmt":"2026-05-17T16:32:01","guid":{"rendered":"http:\/\/chenglixue.top\/?p=475"},"modified":"2026-05-18T00:32:01","modified_gmt":"2026-05-17T16:32:01","slug":"%e6%8c%87%e4%bb%a4%e4%bc%98%e5%8c%96-unroll-loop-branch-flatten-%e4%bb%8e%e5%85%a5%e9%97%a8%e5%88%b0%e5%85%a5%e5%9d%9f","status":"publish","type":"post","link":"http:\/\/chenglixue.top\/?p=475","title":{"rendered":"\u6307\u4ee4\u4f18\u5316 unroll loop branch flatten \u4ece\u5165\u95e8\u5230\u5165\u575f"},"content":{"rendered":"<p><div class=\"has-toc have-toc\"><\/div><\/p>\n<h1>\u524d\u8a00<\/h1>\n<p>\u5728\u5b66\u4e60shader\u65f6\uff0c\u7ecf\u5e38\u80fd\u770b\u5230unroll loop branch flatten\u8fd9\u56db\u4e2a\u6307\u4ee4\uff0c\u5b83\u4eec\u53ef\u4ee5\u7528\u4e8e\u4f18\u5316shader\u6027\u80fd<\/p>\n<p>\u4f46\u4ed6\u4eec\u662f\u600e\u4e48\u4f18\u5316\u7684\u5462\uff1f\u4ec0\u4e48\u65f6\u5019\u8be5\u7528unrool\/loop\uff1b\u4ec0\u4e48\u65f6\u5019\u8be5\u7528branch\/flatten\uff1f\u8fd9\u662f\u672c\u7bc7\u5c06\u89e3\u7b54\u7684<\/p>\n<h1>unrool &amp; loop<\/h1>\n<ul>\n<li>\u7528\u9014\uff1a\u7528\u4e8efor\u5faa\u73af<\/p>\n<\/li>\n<li>\n<p>\u4f5c\u7528<\/p>\n<ul>\n<li>unrool \uff1a\u5c06for\u5faa\u73af\u5c55\u5f00\uff0c\u76f8\u5f53\u4e8e\u628a\u5faa\u73af\u91cc\u7684\u4ee3\u7801\u590d\u5236 <span class=\"katex math inline\">N<\/span> \u904d\uff0c\u65e0\u9700\u989d\u5916\u8003\u8651<strong>\u5faa\u73af\u63a7\u5236\u548c\u6761\u4ef6\u8df3\u8f6c\u6307\u4ee4<\/strong><\/li>\n<li>loop\uff1a\u5728\u751f\u6210\u7684\u6c47\u7f16\u4ee3\u7801\u4e2d\u4fdd\u7559\u771f\u5b9e\u7684\u5faa\u73af\u7ed3\u6784\uff0c\u5373\u4e0d\u505a\u66f4\u6539\uff0c\u9ed8\u8ba4\u60c5\u51b5\u4e0b\u662floop<\/li>\n<\/ul>\n<\/li>\n<li>\u5b9e\u4f8b\n<pre><code class=\"language-glsl line-numbers\">float sum = 0;\n[loop]\nfor(int i = 0; i &lt; 3; i++)\n{\n  sum += data[i];\n}\n<\/code><\/pre>\n<p>\u4ee5\u4e0a\u4ee3\u7801\u6362\u7b97\u6210\u6c47\u7f16\u4f2a\u4ee3\u7801\u5982\u4e0b\uff1a<\/p>\n<pre><code class=\"language-glsl line-numbers\">\/\/ --- 1. \u521d\u59cb\u5316 ---\n  mov r_sum, 0        \/\/ sum = 0\n  mov r_i, 0          \/\/ i = 0\n\nLOOP_START:             \/\/ (\u8fd9\u662f\u4e00\u4e2a\u5730\u5740\u6807\u7b7e\uff0c\u4e0d\u5360\u6307\u4ee4)\n  \/\/ --- 2. \u6761\u4ef6\u8df3\u8f6c\u6307\u4ee4 (\u5faa\u73af\u51fa\u53e3) ---\n  cmp r_i, 3          \/\/ \u6bd4\u8f83 i \u548c 3\n  jge LOOP_END        \/\/ \u3010\u6761\u4ef6\u8df3\u8f6c\u3011\uff1a\u5982\u679c i &gt;= 3 (Jump if Greater or Equal)\uff0c\u4fee\u6539 PC \u6307\u9488\uff0c\u8df3\u5230 LOOP_END\n\n  \/\/ --- 3. \u5faa\u73af\u4f53 ---\n  load r_temp, data, r_i \/\/ \u8bfb\u53d6 data[i]\n  add r_sum, r_sum, r_temp \/\/ sum += temp\n\n  \/\/ --- 4. \u5faa\u73af\u63a7\u5236 (\u6b65\u8fdb\u4e0e\u56de\u73af) ---\n  add r_i, r_i, 1     \/\/ i++\n  jmp LOOP_START      \/\/ \u3010\u65e0\u6761\u4ef6\u8df3\u8f6c\u3011\uff1a\u5f3a\u884c\u4fee\u6539 PC \u6307\u9488\uff0c\u8df3\u56de LOOP_START \u5904\u7ee7\u7eed\u6267\u884c\n\nLOOP_END:               \/\/ (\u5faa\u73af\u7ed3\u675f\uff0c\u7ee7\u7eed\u6267\u884c\u540e\u7eed\u4ee3\u7801)\n  ...\n<\/code><\/pre>\n<p>\u5982\u679c\u5c06loop\u6362\u6210unroll\uff0c\u6c47\u7f16\u4f2a\u4ee3\u7801\u5982\u4e0b\uff1a<\/p>\n<pre><code class=\"language-glsl line-numbers\">load r_temp0, data, 0\nadd r_sum, r_sum, r_temp0\n\nload r_temp1, data, 1\nadd r_sum, r_sum, r_temp1\n\nload r_temp2, data, 2\nadd r_sum, r_sum, r_temp2\n<\/code><\/pre>\n<p>\u4e0d\u96be\u53d1\u73b0\uff0cunroll\u4e0b\uff0c\u6307\u4ee4\u66f4\u5c11\uff0c\u5c24\u5176\u662f\u6ca1\u6709\u8df3\u8f6c\u6307\u4ee4<\/p>\n<p>\u90a3\u5c82\u4e0d\u662f\u65e0\u8111\u7528unroll\u5c31\u597d\uff1funroll\u4e5f\u5e76\u4e0d\u662f\u6ca1\u6709\u7f3a\u70b9\uff0c\u5f53\u5faa\u73af\u6b21\u6570\u8fc7\u591a\uff0c\u5c55\u5f00\u540e\u751f\u6210\u7684 Shader \u6c47\u7f16\u4ee3\u7801\u8fc7\u591a\uff0c\u4f1a<strong>\u5bfc\u81f4L1\u6307\u4ee4\u7f13\u5b58\u4e0d\u591f\u7528\uff0c\u4ece\u800c\u9020\u6210\u6307\u4ee4\u83b7\u53d6\u505c\u987f<\/strong><\/p>\n<p>\u4e3a\u4ec0\u4e48\u4f1a\u8fd9\u6837\u5462\uff1floop\u4e0b\uff0c\"sum += data[i];\"\u8fd9\u6761\u6307\u4ee4\u52a0\u8f7d\u8fdbL1\u6307\u4ee4\u7f13\u5b58\uff0c\u540e\u7eed\u4e0d\u65ad\u53cd\u590d\u8bfb\u53d6\u8fd9\u6761\u76f8\u540c\u7684\u6307\u4ee4\uff0c<strong>L1\u547d\u4e2d\u7387\u6781\u9ad8<\/strong>\uff1bunroll\u4e0b\uff0c\u5982\u679c\u5faa\u73af\u6b21\u6570\u6765\u5230100\u6b21\uff0c\u8fd9\u6761\u6307\u4ee4\u5b58\u5728100\u4e2a\uff0c\u4e14\u4e0d\u76f8\u540c\uff0c\u8fd9\u5bfc\u81f4\u8bfb\u53d6\u65f6\uff0c\u82e5\u8d85\u51faL1\u7f13\u5b58\uff0c\u9700\u8981<strong>\u7b49\u5f85GPU\u6254\u6389\u524d\u9762\u7684\u6307\u4ee4\uff0c\u524d\u5f80L2\/L3\/\u5168\u5c40\u5185\u5b58\u62ff\uff08\u770bGPU\u67b6\u6784\uff09\uff0c\u8fd9\u5c31\u5bfc\u81f4\u4e86\u5ef6\u8fdf<\/strong><\/p>\n<\/li>\n<li>\n<p>\u4f18\u7f3a\u70b9<\/p>\n<ul>\n<li>unroll\uff1a<\/p>\n<\/li>\n<li>\n<p><strong>\u6d88\u9664\u5206\u652f\u5f00\u9500<\/strong>\uff1a\u6ca1\u6709\u5faa\u73af\u7684\u6761\u4ef6\u5224\u65ad\u548c\u8df3\u8f6c\u6307\u4ee4<\/p>\n<\/li>\n<li>\n<p><strong>\u6307\u4ee4\u7f13\u5b58\u81a8\u80c0<\/strong>\uff1a\u5c55\u5f00\u540e\u751f\u6210\u7684 Shader \u6c47\u7f16\u4ee3\u7801\u8fc7\u957f\u3002\u5982\u679c\u4ee3\u7801\u8fc7\u5927\uff0c\u8d85\u51fa\u4e86 GPU \u7684 L1 \u7f13\u5b58\uff0c\u4f1a\u5bfc\u81f4\u6307\u4ee4\u83b7\u53d6\u505c\u987f<\/p>\n<ul>\n<li><strong>\u5168\u5c40\u4f18\u5316\u7a7a\u95f4\u66f4\u5927<\/strong>\uff1a\u7f16\u8bd1\u5668\u53ef\u4ee5\u8de8\u8d8a\u591a\u4e2a\u201c\u8fed\u4ee3\u201d\u8fdb\u884c<strong>\u6307\u4ee4\u91cd\u6392\u3001\u5e38\u91cf\u6298\u53e0\u548c\u4f9d\u8d56\u9690\u85cf<\/strong><\/p>\n<\/li>\n<li>\n<p>\u5e38\u91cf\u6298\u53e0\uff1a\u628a\u8fd0\u884c\u65f6\u7684\u8ba1\u7b97\u63d0\u524d\u5230\u7f16\u8bd1\u671f\u3002\u5982\u679c\u7f16\u8bd1\u5668\u53d1\u73b0\u67d0\u4e9b\u8868\u8fbe\u5f0f\u7684\u7ed3\u679c\u5728\u7f16\u8bd1\u671f\u95f4\u5c31\u80fd\u786e\u5b9a\uff0c\u5b83\u5c31\u4f1a\u76f4\u63a5\u628a\u7ed3\u679c\u7b97\u51fa\u6765\uff0c\u66ff\u6362\u6389\u539f\u6765\u7684\u4ee3\u7801\u3002\u8fd9\u6837 GPU \u5728\u8fd0\u884c\u65f6\u5c31\u4e0d\u7528\u518d\u7b97\u4e00\u904d<\/p>\n<pre><code class=\"language-glsl line-numbers\">float offset = 0;\n[unroll]\nfor(int i = 0; i &lt; 3; i++) \n{\n  offset += data[i] * (i * 2.5); \/\/ i \u4e58\u4ee5\u4e00\u4e2a\u56fa\u5b9a\u7cfb\u6570\n}\n<\/code><\/pre>\n<p>unroll\u540e\u662f\u8fd9\u6837\u7684\uff1a<\/p>\n<pre><code class=\"language-glsl line-numbers\">offset += data[0] * (0 * 2.5);\noffset += data[1] * (1 * 2.5);\noffset += data[2] * (2 * 2.5);\n<\/code><\/pre>\n<p>\u7f16\u8f91\u5668\u53d1\u73b00 * 2.5\u30011 * 2.5\u30012 * 2.5\uff0c\u8fd9\u4e09\u4e2a\u5bf9\u8c61\u5728\u7f16\u8bd1\u671f\u5373\u53ef\u786e\u5b9a\uff0c\u65e0\u9700\u5728\u8fd0\u884c\u671f\u8ba1\u7b97\uff0c\u53ef\u4f18\u5316\u6210\u5982\u4e0b\u7ed3\u679c\uff1a<\/p>\n<pre><code class=\"language-glsl line-numbers\">offset += data[0] * 0.0;\noffset += data[1] * 2.5;\noffset += data[2] * 5.0;\n<\/code><\/pre>\n<\/li>\n<li>\u6307\u4ee4\u91cd\u6392 &amp; \u4f9d\u8d56\u9690\u85cf\n<p>\u8fd9\u4e24\u4e2a\u6982\u5ff5\u662f\u7d27\u5bc6\u7ed1\u5b9a\u5728\u4e00\u8d77\u7684\u3002<strong>\u6307\u4ee4\u91cd\u6392\u662f\u201c\u624b\u6bb5\u201d\uff0c\u800c\u4f9d\u8d56\u9690\u85cf\u662f\u201c\u76ee\u7684\u201d<\/strong><\/p>\n<p>\u5728 GPU \u4e2d\uff0c\u4ece\u5185\u5b58\u8bfb\u53d6\u6570\u636e\u662f\u975e\u5e38\u975e\u5e38\u6162\u7684\uff0c\u53ef\u80fd\u9700\u8981\u51e0\u767e\u4e2a\u65f6\u949f\u5468\u671f\u3002\u5982\u679c\u4e0b\u4e00\u884c\u4ee3\u7801\u9700\u8981\u7528\u8fd9\u4e2a\u6570\u636e\u6765\u505a\u52a0\u6cd5\uff0c\u8fd9\u53eb\u505a<strong>\u6570\u636e\u4f9d\u8d56<\/strong>\u3002\u5728\u6570\u636e\u6ca1\u8bfb\u56de\u6765\u4e4b\u524d\uff0c\u52a0\u6cd5\u6307\u4ee4\u53ea\u80fd\u5e72\u7b49\u7740<\/p>\n<pre><code class=\"language-glsl line-numbers\">float sum = 0;\n[loop]\nfor(int i = 0; i &lt; 3; i++) \n{\n  float val = tex.Load(i); \/\/ \u3010\u8017\u65f6\u64cd\u4f5c\u3011\n  sum += val;              \/\/ \u3010\u4f9d\u8d56\u4e0a\u4e00\u884c\u7684\u7ed3\u679c\u3011\n}\n<\/code><\/pre>\n<p>\u5982\u679c\u662floop\uff0c\"sum += val; \"<strong>\u8981\u60f3\u6267\u884c\u5fc5\u987b\u7b49\u5f85tex\u52a0\u8f7d\u5b8c\u6bd5\uff0c\u4e14\u6bcf\u6b21\u5faa\u73af\u90fd\u9700\u8981\u7b49\u5f85<\/strong><\/p>\n<p>\u5982\u679c\u662funroll\uff0c\u4f1a\u5c55\u5f00\u4e09\u884c\"float val = tex.Load(i);\"\uff0c\u7f16\u8f91\u5668\u53d1\u73b0\u8fd9\u4e09\u884ctex.Load\u53ef\u4ee5\u7edf\u4e00\u5148\u52a0\u8f7d\uff0c\u4e14\u65e0\u9700\u7b49\u5f85\u524d\u9762\u7684\u52a0\u8f7d\u5b8c\u518d\u52a0\u8f7d\u540e\u9762\u7684\uff0c\u65e0\u7ebf\u63a5\u8fd1\u4e8e\u6d88\u8017\u4e00\u4e2aLoad\u7684\u5468\u671f\uff0c\u7b49\u52a0\u8f7d\u5b8c\u518d\"sum += val\"<\/p>\n<pre><code class=\"language-glsl line-numbers\">1. \u53d1\u8d77\u8bfb\u53d6 tex[0]\n2. \u53d1\u8d77\u8bfb\u53d6 tex[1]  \/\/ \u4e0d\u7b49 tex[0] \u56de\u6765\uff0c\u76f4\u63a5\u53d1\u8bf7\u6c42\uff01\n3. \u53d1\u8d77\u8bfb\u53d6 tex[2]  \/\/ \u4e0d\u7b49\u524d\u4e24\u6b21\u56de\u6765\uff0c\u76f4\u63a5\u53d1\u8bf7\u6c42\uff01\n4. ---- \u7b49\u5f85\u6570\u636e\u8fd4\u56de ---- \/\/ \u53ea\u5728\u8fd9\u4e00\u4e2a\u5730\u65b9\u96c6\u4e2d\u7b49\u5f85\uff01\n5. \u62ff\u5230 tex[0]\uff0c\u6267\u884c sum += val0\n6. \u62ff\u5230 tex[1]\uff0c\u6267\u884c sum += val1\n7. \u62ff\u5230 tex[2]\uff0c\u6267\u884c sum += val2\n<\/code><\/pre>\n<\/li>\n<\/ul>\n<\/li>\n<li><strong>\u5bc4\u5b58\u5668\u538b\u529b\u5267\u589e<\/strong>\uff1a\u7f16\u8bd1\u5668\u4e3a\u4e86\u6781\u529b\u9690\u85cf\u5ef6\u8fdf\uff0c\u53ef\u80fd\u4f1a\u5c06\u591a\u6b21\u8fed\u4ee3\u7684\u53d8\u91cf\u540c\u65f6\u52a0\u8f7d\u5230\u5bc4\u5b58\u5668\u4e2d\u3002\u8fd9\u4f1a\u5bfc\u81f4\u5355\u4e2a\u7ebf\u7a0b\u5206\u914d\u7684\u77e2\u91cf\u5bc4\u5b58\u5668\u6570\u91cf\u98d9\u5347\uff0c\u603b\u4f53\u7684wave\u53d8\u5c11<\/p>\n<\/li>\n<li>\n<p>loop\uff1a<\/p>\n<\/li>\n<li><strong>\u6307\u4ee4\u4f53\u79ef\u5c0f\uff0cL1\u7f13\u5b58\u53cb\u597d<\/strong>\uff1aShader \u4e8c\u8fdb\u5236\u6587\u4ef6\u7d27\u51d1\uff0c\u6781\u5927\u5730\u51cf\u5c11\u4e86\u6307\u4ee4\u83b7\u53d6\u7684\u5e26\u5bbd\u538b\u529b<\/li>\n<li><strong>\u4e25\u683c\u63a7\u5236\u5bc4\u5b58\u5668\u751f\u547d\u5468\u671f\uff1a<\/strong> \u6bcf\u6b21\u8fed\u4ee3\u540e\uff0c\u4e34\u65f6\u53d8\u91cf\u7684\u5bc4\u5b58\u5668\u53ef\u4ee5\u88ab\u7acb\u5373\u590d\u7528\uff0c\u8fd9\u5bf9\u4e8e\u7ef4\u6301\u9ad8\u6570\u91cfwave\u81f3\u5173\u91cd\u8981<\/li>\n<li><strong>\u5206\u652f\u53d1\u6563\u98ce\u9669<\/strong>\uff1a \u5982\u679c\u540c\u4e00\u4e2a Wavefront\/Warp \u4e2d\u7684\u4e0d\u540c\u7ebf\u7a0b\u5728\u4e0d\u540c\u7684\u8fed\u4ee3\u6b21\u6570\u65f6\u9000\u51fa\u5faa\u73af\uff0c\u8fd8\u5728\u6267\u884c\u7684\u7ebf\u7a0b\u4f1a\u62d6\u6162\u6574\u4e2a Wave<\/li>\n<\/ul>\n<\/li>\n<li>\n<p>\u5982\u4f55\u9009\u62e9unrool\u3001loop<\/p>\n<p>\u5f53\u6ee1\u8db3\u4ee5\u4e0b\u6761\u4ef6\u65f6\uff0c\u4f7f\u7528<strong>unroll<\/strong><\/p>\n<ul>\n<li><strong>\u56fa\u5b9a\u4e14\u8f83\u5c0f\u7684\u8fed\u4ee3\u6b21\u6570<\/strong><\/li>\n<li><strong>\u5faa\u73af\u5185\u90e8\u7d22\u5f15\u8981\u6c42\u5fc5\u987b\u662f\u7f16\u8bd1\u671f\u5e38\u91cf<\/strong><\/li>\n<li><strong>\u4e0d\u5b58\u5728 Early Exit<\/strong><\/li>\n<\/ul>\n<p>\u5f53\u6ee1\u8db3\u4ee5\u4e0b\u6761\u4ef6\u65f6\uff0c\u4f7f\u7528loop<\/p>\n<ul>\n<li>\u52a8\u6001\u4e14\u53ef\u80fd\u5f88\u9ad8\u7684\u8fed\u4ee3\u6b21\u6570<\/p>\n<\/li>\n<li>\n<p>\u4f9d\u8d56\u52a8\u6001\u6570\u636e\u7684 Early Exit<\/p>\n<\/li>\n<li>\n<p>\u8de8\u5e73\u53f0\u6027\u80fd\u8c03\u4f18<\/p>\n<p>\u5728\u79fb\u52a8\u7aef\u7684 TBDR \u67b6\u6784\u4e0b\uff0c\u5bc4\u5b58\u5668\u5806\u6bd4 PC \u684c\u9762\u7ea7 GPU \u66f4\u52a0\u53d7\u9650\uff0cContext Switch\u7684\u6210\u672c\u6781\u9ad8\u3002\u5bf9\u4e8e\u4e2d\u7b49\u89c4\u6a21\u7684\u5faa\u73af\uff0c\u5f3a\u5236 [loop] \u5f80\u5f80\u80fd\u907f\u514d\u56e0\u5bc4\u5b58\u5668\u6ea2\u51fa\u5bfc\u81f4\u7684\u6389\u5e27<\/p>\n<\/li>\n<\/ul>\n<\/li>\n<li>\n<p>\u7ecf\u9a8c\u6cd5\u5219<\/p>\n<ul>\n<li>\u6c38\u8fdc\u4e3a Early Exit\uff08\u63d0\u524d\u8df3\u51fa\uff09\u4f7f\u7528 <code>[loop]<\/code><\/li>\n<li>\u9ad8\u5bc4\u5b58\u5668\u538b\u529b\uff08VGPR\uff09\u7684\u590d\u6742\u8ba1\u7b97\uff0c\u4f7f\u7528 <code>[loop]<\/code><\/li>\n<li>\u5fae\u5c0f\u4e14\u56fa\u5b9a\u7684\u8fed\u4ee3\uff0c\u4f7f\u7528 <code>[unroll]<\/code><\/li>\n<li>\u8d44\u6e90\u6570\u7ec4\u7684\u52a8\u6001\u7d22\u5f15\uff0c\u4f7f\u7528<code>[unroll]<\/code><\/li>\n<li>\u5982\u679cShader \u7a81\u7136\u51fa\u73b0\u4e25\u91cd\u7684\u6027\u80fd\u4e0b\u964d\uff0c\u6216\u5728 Nsight \u4e2d\u770b\u5230 Occupancy \u6781\u4f4e\uff0c\u9996\u5148\u53bb\u68c0\u67e5\u90a3\u4e9b\u88ab\u9690\u5f0f\u5c55\u5f00\u7684\u5faa\u73af\u3002\u901a\u8fc7\u624b\u52a8\u6dfb\u52a0 <code>[loop]<\/code> \u5f80\u5f80\u80fd\u77ac\u95f4\u91ca\u653e\u5bc4\u5b58\u5668\u538b\u529b<\/li>\n<\/ul>\n<\/li>\n<\/ul>\n<h1>banch &amp; flatten<\/h1>\n<ul>\n<li>\u7528\u9014\uff1a\u7528\u4e8eif-else<\/li>\n<li>\u4f5c\u7528\n<ul>\n<li>branch\uff1a\u6307\u793a\u7f16\u8bd1\u5668\u4fdd\u7559\u771f\u6b63\u7684\u6761\u4ef6\u8df3\u8f6c\u6307\u4ee4\u3002GPU \u4f1a\u5728\u8fd0\u884c\u65f6\u8bc4\u4f30\u6761\u4ef6\uff0c\u5e76\u51b3\u5b9a\u8df3\u8f6c\u5230 <code>if<\/code> \u5757\u8fd8\u662f <code>else<\/code> \u5757<\/li>\n<li>flatten\uff1a\u6307\u793a\u7f16\u8bd1\u5668\u6d88\u9664\u5206\u652f\u8df3\u8f6c\u6307\u4ee4\u3002 GPU <strong>\u65e0\u6761\u4ef6\u5730\u628a <code>if<\/code> \u548c <code>else<\/code> \u91cc\u7684\u4ee3\u7801\u5168\u90e8\u6267\u884c\u4e00\u904d<\/strong><\/li>\n<\/ul>\n<\/li>\n<li>\u5b9e\u4f8b\n<ul>\n<li>\u6602\u8d35\u7684\u8ba1\u7b97<\/li>\n<\/ul>\n<pre><code class=\"language-glsl line-numbers\">float3 finalColor = ambientColor;\nfloat dist = distance(worldPos, lightPos);\n\n\/\/ \u5982\u679c\u50cf\u7d20\u5728\u5149\u6e90\u534a\u5f84\u5185\uff0c\u5219\u8ba1\u7b97\u590d\u6742\u5149\u7167\uff1b\u5426\u5219\u4ec0\u4e48\u90fd\u4e0d\u505a\u3002\n[branch] \nif (dist &lt; lightRadius) {\n    \/\/ \u3010\u6781\u5176\u6602\u8d35\u7684\u5f00\u9500\u3011\n    \/\/ 1. \u591a\u6b21\u91c7\u6837\u9634\u5f71\u8d34\u56fe (PCF \/ PCSS)\n    float shadow = CalculateHighQualityShadow(worldPos); \n    \/\/ 2. \u590d\u6742\u7684 PBR \u7269\u7406\u5149\u7167\u6a21\u578b\u8ba1\u7b97\n    float3 diffuse = ... \n    float3 specular = ...\n\n    finalColor += (diffuse + specular) * shadow;\n}\nreturn finalColor;\n<\/code><\/pre>\n<p><code>if<\/code> \u91cc\u9762\u6709\u6210\u767e\u4e0a\u5343\u4e2a\u5468\u671f\u7684\u7b97\u672f\u548c\u5185\u5b58\u8bfb\u53d6\uff0c\u800c <code>else<\/code>\uff08\u6761\u4ef6\u4e0d\u6210\u7acb\uff09\u4ec0\u4e48\u90fd\u4e0d\u7528\u505a\uff0c\u5f00\u9500\u4e3a0\u3002\u5c4f\u5e55\u4e0a\u8ddd\u79bb\u5149\u6e90\u8fdc\u7684\u50cf\u7d20\uff0c\u5f80\u5f80\u662f\u8fde\u6210\u4e00\u7247\u7684\uff0c GPU \u76f4\u63a5\u8df3\u8fc7\u90a3\u6bb5\u6602\u8d35\u7684\u4ee3\u7801\uff0c<strong>\u77ac\u95f4\u7701\u4e0b\u5de8\u91cf\u7b97\u529b\u548c\u663e\u5b58\u5e26\u5bbd<\/strong><\/p>\n<p>\u5982\u679c\u8fd9\u91cc\u7528flatten\uff0cGPU\u4f1a\u5f3a\u5236\u8ba9\u6ca1\u5728\u5149\u6e90\u534a\u5f84\u7684pixel\u8dd1if\u5206\u652f\uff0c\u5bfc\u81f4\u5f00\u9500\u66b4\u6da8<\/p>\n<ul>\n<li>\u5ec9\u4ef7\u7684\u8ba1\u7b97<\/li>\n<\/ul>\n<pre><code class=\"language-glsl line-numbers\">float3 specularColor;\nfloat isMetallic = MetallicMaskTexture.Sample(sampler, uv).r; \/\/ \u53ef\u80fd\u662f\u9ad8\u9891\u566a\u70b9\u6216\u7c97\u7cd9\u8fb9\u7f18\n\n\/\/\u7b80\u5355\u7684\u4e8c\u9009\u4e00\u8d4b\u503c\n[flatten]\nif (isMetallic &gt; 0.5) {\n    \/\/ \u3010\u6781\u5176\u5ec9\u4ef7\u7684\u5f00\u9500\u3011\n    specularColor = albedoColor; \/\/ \u91d1\u5c5e\u7684\u9ad8\u5149\u989c\u8272\u7b49\u4e8e\u5b83\u7684\u57fa\u7840\u8272\n} else {\n    \/\/ \u3010\u6781\u5176\u5ec9\u4ef7\u7684\u5f00\u9500\u3011\n    specularColor = float3(0.04, 0.04, 0.04); \/\/ \u975e\u91d1\u5c5e\u7684\u9ad8\u5149\u989c\u8272\u662f\u56fa\u5b9a\u7684 4% (0.04)\n}\n<\/code><\/pre>\n<p><code>if<\/code> \u548c <code>else<\/code> \u91cc\u9762\u90fd\u53ea\u6709\u4e00\u6b21\u6700\u57fa\u7840\u7684\u5bc4\u5b58\u5668\u8d4b\u503c\u64cd\u4f5c\uff0c\u54ea\u6015\u8ba9 GPU \u5168\u7b97\u4e00\u904d\uff0c\u4e5f\u53ea\u9700\u8981 1~2 \u4e2a\u65f6\u949f\u5468\u671f\uff0c\u4f46\u73b0\u5728\u7684\u6e38\u620f\u8d44\u4ea7\u6781\u5176\u7cbe\u7ec6\uff0c\u6750\u8d28\u7684\u8fb9\u7f18\u5f80\u5f80\u662f\u9ad8\u9891\u7684\uff0c\u8fd9\u5c31\u5bfc\u81f4 <code>isMetallic &gt; 0.5<\/code> \u8fd9\u4e2a\u6761\u4ef6\u5728\u4e00\u4e2a\u7ebf\u7a0b\u7ec4\u91cc\uff0c\u5927\u6982\u7387\u662f\u4e00\u534a True \u4e00\u534a False<\/p>\n<p>\u5982\u679c\u7528 <code>[branch]<\/code>\uff0cGPU \u4e3a\u4e86\u8fd9 1 \u4e2a\u5468\u671f\u7684\u8d4b\u503c\uff0c\u9700\u8981\u82b1\u8d39\u5341\u51e0\u4e2a\u5468\u671f\u53bb\u5efa\u7acb\u5206\u652f\u8df3\u8f6c\u903b\u8f91\u548c\u7ebf\u7a0b\u63a9\u7801\uff08\u7528\u4e8e\u89e3\u51b3\u5206\u652f\u53d1\u6563\uff09<\/p>\n<p><strong>\u5206\u652f\u8df3\u8f6c\u903b\u8f91\u548c\u7ebf\u7a0b\u63a9\u7801<\/strong>\u5927\u81f4\u7406\u89e3\u5982\u4e0b\uff1a<\/p>\n<pre><code class=\"language-glsl line-numbers\">[branch]\nif (x &gt; 0) {\n    a = 1; \/\/ \u4ec5\u9700 1 \u4e2a\u5468\u671f\n} else {\n    a = 2; \/\/ \u4ec5\u9700 1 \u4e2a\u5468\u671f\n}\n<\/code><\/pre>\n<p>\u4ee5\u4ee5\u4e0a\u4ee3\u7801\u4e3a\u4f8b\uff0c\u540c\u4e00\u4e2a Warp \u7684 32 \u4e2a\u7ebf\u7a0b\u4e2d\uff0c\u6709 16 \u4e2a\u7ebf\u7a0b\u7684 <code>x &gt; 0<\/code>\uff08\u60f3\u8d70 <code>if<\/code>\uff09\uff0c\u53e6\u5916 16 \u4e2a\u7ebf\u7a0b\u7684 <code>x &lt;= 0<\/code>\uff08\u60f3\u8d70 <code>else<\/code>\uff09\uff0c\u5bfc\u81f4\u4e86\u5206\u652f\u53d1\u6563\uff0c\u4f46<strong>GPU\u662f\u5e76\u884c\u7684\uff0c\u540c\u4e00\u5468\u671f\u5185\uff0c\u6240\u6709\u7ebf\u7a0b\u90fd\u6267\u884c\u76f8\u540c\u7684\u6307\u4ee4<\/strong>\u3002\u4e3a\u6b64\uff0cGPU\u53ea\u80fd\u7528<strong>\u65f6\u95f4\u6362\u7a7a\u95f4<\/strong>\uff0c\u5b83\u5fc5\u987b\u8ba9\u8fd9 32 \u4e2a\u7ebf\u7a0b\u628a <code>if<\/code> \u548c <code>else<\/code> \u4e24\u6761\u8def<strong>\u4f9d\u6b21<\/strong>\u90fd\u8d70\u4e00\u904d\u3002\u4e3a\u4e86\u4fdd\u8bc1\u903b\u8f91\u6b63\u786e\uff0c\u5b83\u5f15\u5165\u4e86<strong>\u7ebf\u7a0b\u63a9\u7801<\/strong>\u3002\u5927\u81f4\u6d41\u7a0b\u5982\u4e0b\uff1a<\/p>\n<ul>\n<li>\u8bc4\u4f30\u6761\u4ef6\u4e0e\u751f\u6210\u63a9\u7801\n<p>\u6307\u4ee4\u63a7\u5236\u5668\u9996\u5148\u8ba9\u6240\u6709 32 \u4e2a\u7ebf\u7a0b\u8bc4\u4f30 <code>x &gt; 0<\/code>\u3002\u786c\u4ef6\u4f1a\u751f\u6210\u4e00\u4e2a 32 \u4f4d\u7684\u5e03\u5c14\u63a9\u7801\uff08Active Mask\uff09\uff0c\u6bd4\u5982 <code>11110000...<\/code>\uff081 \u4ee3\u8868 <code>true<\/code>\uff0c0 \u4ee3\u8868 <code>false<\/code>\uff09<\/p>\n<\/li>\n<li>\n<p>\u538b\u6808\u4e0e\u8df3\u8f6c\u903b\u8f91\uff08\u5f00\u9500\u5927\u5934\uff09<\/p>\n<p>\u786c\u4ef6\u53d1\u73b0\u51fa\u73b0\u4e86\u5206\u6b67\uff0c\u5b83\u4e0d\u80fd\u76f4\u63a5\u5f80\u4e0b\u8dd1\u3002\u5b83\u5fc5\u987b\u628a\u5f53\u524d\u7684\u63a9\u7801\u72b6\u6001\u3001\u7a0b\u5e8f\u8ba1\u6570\u5668\u7b49\u4e0a\u4e0b\u6587\u4fe1\u606f\uff0c\u538b\u5165\u786c\u4ef6\u5185\u90e8\u7684\u63a7\u5236\u6d41\u6808\u3002\u8fd9\u5c31\u662f\u6240\u8c13\u7684\u201c<strong>\u5efa\u7acb\u5206\u652f\u8df3\u8f6c\u903b\u8f91<\/strong>\u201d<\/p>\n<\/li>\n<li>\n<p>\u6267\u884c <code>if<\/code> \u5757<\/p>\n<p>\u786c\u4ef6\u4f1a\u6839\u636e\u63a9\u7801 <code>11110000...<\/code> \u5173\u95ed\u6389\u540e\u9762 16 \u4e2a ALU \u7684\u5199\u56de\u6743\u9650\u3002\u90a3 16 \u4e2a\u8d70 <code>else<\/code> \u7684\u7ebf\u7a0b\u6b64\u65f6\u5728<strong>\u7a7a\u8f6c<\/strong>\uff0c\u767d\u767d\u6d6a\u8d39 1 \u4e2a\u5468\u671f<\/p>\n<\/li>\n<li>\n<p>\u53cd\u8f6c\u63a9\u7801\u4e0e\u5f39\u51fa\u6808<\/p>\n<p>if\u5757\u8d70\u5b8c\u4e86\uff0c\u786c\u4ef6\u9700\u8981\u4ece\u63a7\u5236\u6d41\u6808\u4e2d\u5f39\u51fa\u4e4b\u524d\u7684\u72b6\u6001\uff0c\u5e76\u5c06\u63a9\u7801\u53cd\u8f6c\u4e3a 00001111...<\/p>\n<\/li>\n<li>\n<p>\u6267\u884c <code>else<\/code> \u5757<\/p>\n<p>\u524d 16 \u4e2a\u7ebf\u7a0b\u53d1\u5446\uff0c\u540e 16 \u4e2a\u7ebf\u7a0b\u6267\u884c<\/p>\n<\/li>\n<li>\n<p>\u91cd\u65b0\u6c47\u5408<\/p>\n<p>\u6e05\u9664\u63a9\u7801\uff0c32 \u4e2a\u7ebf\u7a0b\u91cd\u65b0\u6c47\u5408<\/p>\n<\/li>\n<\/ul>\n<p>\u4e0d\u96be\u53d1\u73b0\uff0c<strong>\u5206\u652f\u53d1\u6563\u9700\u8981\u6267\u884c\u591a\u6b21\u6bd4\u8f83\u3001\u4f4d\u63a9\u7801\u64cd\u4f5c\u3001\u5bc4\u5b58\u5668\u8bfb\u5199\u548c\u6307\u4ee4\u5730\u5740\u8ba1\u7b97\uff0c\u8fd9\u4e9b\u51c6\u5907\u5de5\u4f5c\u52a0\u8d77\u6765\uff0c\u901a\u5e38\u9700\u8981\u82b1\u8d39<\/strong> <strong>10 \u5230 20 \u4e2a\u5468\u671f<\/strong><\/p>\n<\/li>\n<li>\n<p>\u4f18\u7f3a\u70b9<\/p>\n<ul>\n<li>branch<\/li>\n<li><strong>\u907f\u514d\u6781\u5176\u6602\u8d35\u7684\u5f00\u9500<\/strong><\/li>\n<li><strong>\u63d0\u524d\u8df3\u51fa<\/strong><\/li>\n<li><strong>\u5206\u652f\u53d1\u6563\u60e9\u7f5a<\/strong><\/li>\n<li><strong>\u7eb9\u7406\u91c7\u6837\u53d7\u9650<\/strong>\uff1a\u5728\u5206\u652f\u5185\u90e8\u65e0\u6cd5\u76f4\u63a5\u4f7f\u7528\u5e26\u6709\u81ea\u52a8 Mipmap \u5c42\u7ea7\u8ba1\u7b97\u7684\u7eb9\u7406\u91c7\u6837\u6307\u4ee4\u3002\u56e0\u4e3a<strong>\u8ba1\u7b97 Mipmap \u9700\u8981\u76f8\u90bb\u50cf\u7d20\u7684\u504f\u5bfc\u6570\uff0c\u800c\u5206\u652f\u53d1\u6563\u4f1a\u5bfc\u81f4\u76f8\u90bb\u50cf\u7d20\u53ef\u80fd\u5904\u4e8e\u975e\u6fc0\u6d3b\u72b6\u6001<\/strong><\/li>\n<li><strong>\u989d\u5916\u7684\u63a7\u5236\u6d41\u5f00\u9500<\/strong>\uff1a\u786c\u4ef6\u9700\u8981\u989d\u5916\u7684\u65f6\u949f\u5468\u671f\u53bb\u6267\u884c\u6761\u4ef6\u5224\u65ad\u3001\u8df3\u8f6c\u6307\u4ee4\u4ee5\u53ca\u7ba1\u7406\u7ebf\u7a0b\u63a9\u7801<\/li>\n<li>flatten<\/li>\n<li><strong>\u7edd\u5bf9\u6ca1\u6709\u5206\u652f\u53d1\u6563<\/strong><\/li>\n<li>\u5bf9\u7f16\u8bd1\u5668\u6781\u5176\u53cb\u597d\uff1a\u6ca1\u6709\u4e86\u8df3\u6765\u8df3\u53bb\u7684\u903b\u8f91\uff0c\u7f16\u8bd1\u5668\u53ef\u4ee5\u7eb5\u89c2\u5168\u5c40\uff0c\u8fdb\u884c<strong>\u6307\u4ee4\u91cd\u6392\u548c\u5ef6\u8fdf\u9690\u85cf<\/strong><\/li>\n<li><strong>\u652f\u6301\u6807\u51c6\u7eb9\u7406\u91c7\u6837<\/strong>\uff1a\u652f\u6301Sample()<\/li>\n<li><strong>\u65e0\u8111\u6d6a\u8d39\u7b97\u529b<\/strong><\/li>\n<\/ul>\n<\/li>\n<li>\u7ecf\u9a8c\u6cd5\u5219\n<ul>\n<li>\u5982\u679c\u4e00\u4fa7\u662f<strong>\u201c\u6838\u5f39\u7ea7\u201d\u5f00\u9500<\/strong>\uff0c\u4f7f\u7528 <code>[branch]<\/code><\/p>\n<\/li>\n<li>\n<p>\u5982\u679c\u4e24\u8fb9\u90fd\u662f<strong>\u5ec9\u4ef7<\/strong>\u7684 ALU \u7b97\u672f\u9898\uff0c\u4f7f\u7528 <code>[flatten]<\/code><\/p>\n<\/li>\n<li>\n<p><strong>\u770b\u5c4f\u5e55\u7a7a\u95f4\u7684\u4e00\u81f4\u6027<\/strong>\uff1a\u6761\u4ef6\u5728 8x8 \u7684\u50cf\u7d20\u5757\u5185\u662f\u5426\u4e00\u6837<\/p>\n<\/li>\n<\/ul>\n<p>\u6bd4\u5982 CSM\u7684\u5c42\u7ea7\u9009\u62e9\uff0c\u4e00\u4e2a 8x8 \u7684\u50cf\u7d20\u5757\u6781\u5927\u6982\u7387\u90fd\u843d\u5728\u540c\u4e00\u4e2a Shadow Cascade<\/p>\n<p>\u800cCheckerboard\u3001Dither Noise\u9608\u503c\u5254\u9664\uff0c\u76f8\u90bb\u50cf\u7d20\u6761\u4ef6\u7ed3\u679c\u5267\u70c8\u6ce2\u52a8\uff0c<code>[branch]<\/code> \u5fc5\u7136\u53d1\u6563<\/p>\n<ul>\n<li>\u7eb9\u7406\u91c7\u6837\u7684\u786c\u6027\u9650\u5236\uff1aif\u91cc\u5305\u542bSample\uff08\uff09\uff0c\u4f7f\u7528 <code>[flatten]<\/code><\/li>\n<\/ul>\n<\/li>\n<\/ul>\n","protected":false},"excerpt":{"rendered":"<p>\u524d\u8a00 \u5728\u5b66\u4e60shader\u65f6\uff0c\u7ecf\u5e38\u80fd\u770b\u5230unroll loop branch flatten\u8fd9\u56db\u4e2a\u6307\u4ee4\uff0c\u5b83\u4eec\u53ef\u4ee5\u7528\u4e8e\u4f18\u5316shader &#8230;<\/p>","protected":false},"author":1,"featured_media":447,"comment_status":"open","ping_status":"open","sticky":false,"template":"","format":"standard","meta":{"footnotes":""},"categories":[1],"tags":[],"_links":{"self":[{"href":"http:\/\/chenglixue.top\/index.php?rest_route=\/wp\/v2\/posts\/475"}],"collection":[{"href":"http:\/\/chenglixue.top\/index.php?rest_route=\/wp\/v2\/posts"}],"about":[{"href":"http:\/\/chenglixue.top\/index.php?rest_route=\/wp\/v2\/types\/post"}],"author":[{"embeddable":true,"href":"http:\/\/chenglixue.top\/index.php?rest_route=\/wp\/v2\/users\/1"}],"replies":[{"embeddable":true,"href":"http:\/\/chenglixue.top\/index.php?rest_route=%2Fwp%2Fv2%2Fcomments&post=475"}],"version-history":[{"count":1,"href":"http:\/\/chenglixue.top\/index.php?rest_route=\/wp\/v2\/posts\/475\/revisions"}],"predecessor-version":[{"id":476,"href":"http:\/\/chenglixue.top\/index.php?rest_route=\/wp\/v2\/posts\/475\/revisions\/476"}],"wp:featuredmedia":[{"embeddable":true,"href":"http:\/\/chenglixue.top\/index.php?rest_route=\/wp\/v2\/media\/447"}],"wp:attachment":[{"href":"http:\/\/chenglixue.top\/index.php?rest_route=%2Fwp%2Fv2%2Fmedia&parent=475"}],"wp:term":[{"taxonomy":"category","embeddable":true,"href":"http:\/\/chenglixue.top\/index.php?rest_route=%2Fwp%2Fv2%2Fcategories&post=475"},{"taxonomy":"post_tag","embeddable":true,"href":"http:\/\/chenglixue.top\/index.php?rest_route=%2Fwp%2Fv2%2Ftags&post=475"}],"curies":[{"name":"wp","href":"https:\/\/api.w.org\/{rel}","templated":true}]}}