From df35fd0f58c1e4d8836420fd0b828455cebbb166 Mon Sep 17 00:00:00 2001 From: robi29 Date: Sat, 26 Aug 2023 22:35:00 +0200 Subject: [PATCH] [NV8] Ported directional shadows filtering from patch 1040 (thanks to Parallellines) 6\n --- README.md | 3 +- win32_30_nv8/gta_default.fxc.xml | 28 ++++- win32_30_nv8/gta_default/gta_defaultPS1.asm | 122 ++++++++----------- win32_30_nv8/gta_default/gta_defaultPS10.asm | 122 ++++++++----------- win32_30_nv8/gta_default/gta_defaultPS11.asm | 122 ++++++++----------- win32_30_nv8/gta_default/gta_defaultPS12.asm | 122 ++++++++----------- 6 files changed, 237 insertions(+), 282 deletions(-) diff --git a/README.md b/README.md index f5f18c9..8716ef0 100644 --- a/README.md +++ b/README.md @@ -81,9 +81,8 @@ Decompiled all GTA IV shaders and provided numerous fixes and improvements for t - gta_default - Reflection draw distance increased - Increased amount of lighting at very acute angles - - Increased shadows sharpness - Fixed disconnected directional shadows (thanks to Parallellines) - - Improved directional shadows filtering (thanks to RaphaelK12) + - Ported directional shadows filtering from patch 1040 (thanks to Parallellines) - gta_diffuse_instance - Increased shadows sharpness diff --git a/win32_30_nv8/gta_default.fxc.xml b/win32_30_nv8/gta_default.fxc.xml index 9a474c1..6bef564 100644 --- a/win32_30_nv8/gta_default.fxc.xml +++ b/win32_30_nv8/gta_default.fxc.xml @@ -432,7 +432,7 @@ asm gta_default\gta_defaultPS1.asm - + sampler StippleTexture @@ -653,6 +653,11 @@ globalScalars + + float4 + globalScreenSize + + @@ -875,7 +880,7 @@ asm gta_default\gta_defaultPS10.asm - + sampler StippleTexture @@ -976,12 +981,17 @@ globalScalars + + float4 + globalScreenSize + + asm gta_default\gta_defaultPS11.asm - + sampler StippleTexture @@ -1142,12 +1152,17 @@ globalScalars + + float4 + globalScreenSize + + asm gta_default\gta_defaultPS12.asm - + sampler StippleTexture @@ -1348,6 +1363,11 @@ globalScalars + + float4 + globalScreenSize + + diff --git a/win32_30_nv8/gta_default/gta_defaultPS1.asm b/win32_30_nv8/gta_default/gta_defaultPS1.asm index ddb7e38..27b4332 100644 --- a/win32_30_nv8/gta_default/gta_defaultPS1.asm +++ b/win32_30_nv8/gta_default/gta_defaultPS1.asm @@ -47,6 +47,7 @@ // float4 globalFogColorN; // float4 globalFogParams; // float4 globalScalars; +// float4 globalScreenSize; // // // Registers: @@ -80,6 +81,7 @@ // globalFogParams c41 1 // globalFogColor c42 1 // globalFogColorN c43 1 +// globalScreenSize c44 1 // gShadowParam18192021 c53 1 // gFacetCentre c54 1 // gShadowParam14151617 c56 1 @@ -101,18 +103,18 @@ ps_3_0 def c0, -0.5, 0.5, 0.9, 1.5 - def c1, 0.0833333358, -0.100000001, 1.11111116, 1.00000001e-007 + def c1, 0.25, -0.100000001, 1.11111116, 1.00000001e-007 def c2, 0, -1, -0, 9.99999975e-006 def c3, 3.99600005, 4, 0.125, 0.25 def c4, 0.212500006, 0.715399981, 0.0720999986, 0 def c5, 1, -1, 0, -0 def c6, 0, 0, 0, 0.5 - def c7, 0.2703204087, 0.4206267664, 0.4548159977, 0.2077075065 - def c8, 0.4949107209, -0.07115741914, 0.3778747872, -0.327430367 - def c9, 0.1408662784, -0.4797464868, -0.1408662784, -0.4797464868 - def c10, -0.3778747872, -0.327430367, -0.4949107209, -0.07115741914 - def c11, -0.4548159977, 0.2077075065, -0.2703204087, 0.4206267664 - def c98, 0.5, 2, 0.1, 0 + def c7, -0.25, 1, -1, -0.07 + def c8, 0.159154937, 0.5, 6.28318548, -3.14159274 + def c9, 3, 7.13800001, 0.00012207031, 0.00048828125 + def c10, 0.75, -0.5, 0.5, 0 + def c11, 0.25, 0.5, 0.75, 4.8 + def c12, 0.5, 0.25, 0.125, 1 dcl_texcoord v0.xy dcl_texcoord1 v1 dcl_color v2.xw @@ -167,68 +169,50 @@ mul r3.w, r2.w, r3.w mul r3.w, r3.w, r3.w mul r3.w, r3.w, c0.w - mov r5.y, c53.y - mul r7.xy, r5.yy, c98.xy - add r4.z, r4.z, -c98.z - mad r5.xz, r7.xy, c11.xyyw, r4.xyyw - texld r6, r5.xzzw, s15 - add r4.w, r4.z, -r6.x - cmp r4.w, r4.w, -c2.y, -c2.z - mad r5.xz, r7.xy, c11.zyww, r4.xyyw - texld r6, r5.xzzw, s15 - add r5.x, r4.z, -r6.x - cmp r5.x, r5.x, -c2.y, -c2.z - add r4.w, r4.w, r5.x - mad r5.xz, r7.xy, c10.xyyw, r4.xyyw - texld r6, r5.xzzw, s15 - add r5.x, r4.z, -r6.x - cmp r5.x, r5.x, -c2.y, -c2.z - add r4.w, r4.w, r5.x - mad r5.xz, r7.xy, c10.zyww, r4.xyyw - texld r6, r5.xzzw, s15 - add r5.x, r4.z, -r6.x - cmp r5.x, r5.x, -c2.y, -c2.z - add r4.w, r4.w, r5.x - mad r5.xz, r7.xy, c9.xyyw, r4.xyyw - texld r6, r5.xzzw, s15 - add r5.x, r4.z, -r6.x - cmp r5.x, r5.x, -c2.y, -c2.z - add r4.w, r4.w, r5.x - mad r5.xz, r7.xy, c9.zyww, r4.xyyw - texld r6, r5.xzzw, s15 - add r5.x, r4.z, -r6.x - cmp r5.x, r5.x, -c2.y, -c2.z - add r4.w, r4.w, r5.x - mad r5.xz, r7.xy, c8.xyyw, r4.xyyw - texld r6, r5.xzzw, s15 - add r5.x, r4.z, -r6.x - cmp r5.x, r5.x, -c2.y, -c2.z - add r4.w, r4.w, r5.x - mad r5.xz, r7.xy, c8.zyww, r4.xyyw - texld r6, r5.xzzw, s15 - add r5.x, r4.z, -r6.x - cmp r5.x, r5.x, -c2.y, -c2.z - add r4.w, r4.w, r5.x - mad r5.xz, r7.xy, c7.xyyw, r4.xyyw - texld r6, r5.xzzw, s15 - add r5.x, r4.z, -r6.x - cmp r5.x, r5.x, -c2.y, -c2.z - add r4.w, r4.w, r5.x - mad r5.xz, r7.xy, c7.zyww, r4.xyyw - texld r6, r5.xzzw, s15 - add r5.x, r4.z, -r6.x - cmp r5.x, r5.x, -c2.y, -c2.z - add r4.w, r4.w, r5.x - mad r5.xz, r7.xy, c6.xyyw, r4.xyyw - texld r6, r5.xzzw, s15 - add r5.x, r4.z, -r6.x - cmp r5.x, r5.x, -c2.y, -c2.z - add r4.w, r4.w, r5.x - mad r4.xy, r7.xy, c6.zwzw, r4 - texld r5, r4, s15 - add r4.x, r4.z, -r5.x - cmp r4.x, r4.x, -c2.y, -c2.z - add r4.x, r4.w, r4.x + + add r21.z, r4.z, c7.w // depth bias + + mov r21.xy, c53.xy + max r21.xy, r21.xy, c9.zw // prevents from too sharp shadows when using ShadowResFix + mul r21.xy, r21.xy, c11.ww // *2.4 instead of *3 because CSM resolutions are multiples of 256 instead of 320 + + add r27.xyz, r4.x, -c11.xyz + cmp r27.w, r27.x, c12.x, c12.w // cascade 1-2 + cmp r27.w, r27.y, c12.y, r27.w // cascade 2-3 + cmp r27.w, r27.z, c12.z, r27.w // cascade 3-4 + mul r21.xy, r21.xy, r27.w // texel size multiplier + + mul r23.xy, c44.zw, vPos + mov r22.xy, c9.xy + mul r22.xy, r22.xy, c44.xy // r2.xy * screen dimensions + dp2add r22.y, r23, r22, c5.z // v0.x * r2.x + v0.y * r2.y + mad r22.y, r22.y, c8.x, c8.y + frc r22.y, r22.y + mad r22.y, r22.y, c8.z, c8.w // r2.y * 2pi - pi + sincos r23.xy, r22.y // sine & cosine of r2.y + mul r24, r23.yxxy, c7.xxyz + mul r23, r23.yxxy, c10.xxyz + + mad r25.xy, r24.xy, r21.xy, r4.xy // offset * texel size + UV + texld r25, r25, s15 // sample #1 + mov r26.x, r25.x // copy to r6 + + mad r25.xy, r24.zw, r21.xy, r4.xy // offset * texel size + UV + texld r25, r25, s15 // sample #2 + mov r26.y, r25.x // copy to r6 + + mad r25.xy, r23.xy, r21.xy, r4.xy // offset * texel size + UV + texld r25, r25, s15 // sample #3 + mov r26.z, r25.x // copy to r6 + + mad r25.xy, r23.zw, r21.xy, r4.xy // offset * texel size + UV + texld r25, r25, s15 // sample #4 + mov r26.w, r25.x // copy to r6 + + add r26, r21.z, -r26 + cmp r26, r26, c5.x, c5.z // depth bias + dp4 r4.x, r26, c5.x // sum + mad r3.w, r4.x, c1.x, r3.w add r2.w, r2.w, -c53.w cmp r4.xy, r2.w, c5, c5.zwzw diff --git a/win32_30_nv8/gta_default/gta_defaultPS10.asm b/win32_30_nv8/gta_default/gta_defaultPS10.asm index 997aee9..fcafb3e 100644 --- a/win32_30_nv8/gta_default/gta_defaultPS10.asm +++ b/win32_30_nv8/gta_default/gta_defaultPS10.asm @@ -23,6 +23,7 @@ // float4 globalFogColorN; // float4 globalFogParams; // float4 globalScalars; +// float4 globalScreenSize; // // // Registers: @@ -39,6 +40,7 @@ // globalFogParams c41 1 // globalFogColor c42 1 // globalFogColorN c43 1 +// globalScreenSize c44 1 // gShadowParam18192021 c53 1 // gFacetCentre c54 1 // gShadowParam14151617 c56 1 @@ -54,17 +56,17 @@ ps_3_0 def c0, 0, -1, -0, 9.99999975e-006 def c1, -0.5, 0.5, 0.9, 1.5 - def c2, 0.0833333358, 0.212500006, 0.715399981, 0.0720999986 + def c2, 0.25, 0.212500006, 0.715399981, 0.0720999986 def c3, 3.99600005, 4, 0.125, 0.25 def c4, 1.00000001e-007, 0, 0, 0 def c5, 1, -1, 0, -0 def c6, 0, 0, 0, 0.5 - def c7, 0.2703204087, 0.4206267664, 0.4548159977, 0.2077075065 - def c8, 0.4949107209, -0.07115741914, 0.3778747872, -0.327430367 - def c9, 0.1408662784, -0.4797464868, -0.1408662784, -0.4797464868 - def c10, -0.3778747872, -0.327430367, -0.4949107209, -0.07115741914 - def c11, -0.4548159977, 0.2077075065, -0.2703204087, 0.4206267664 - def c98, 0.5, 2, 0.1, 0 + def c7, -0.25, 1, -1, -0.07 + def c8, 0.159154937, 0.5, 6.28318548, -3.14159274 + def c9, 3, 7.13800001, 0.00012207031, 0.00048828125 + def c10, 0.75, -0.5, 0.5, 0 + def c11, 0.25, 0.5, 0.75, 4.8 + def c12, 0.5, 0.25, 0.125, 1 dcl_texcoord v0.xy dcl_texcoord1 v1 dcl_color v2.xw @@ -119,68 +121,50 @@ mul r3.w, r2.w, r3.w mul r3.w, r3.w, r3.w mul r3.w, r3.w, c1.w - mov r4.y, c53.y - mul r6.xy, r4.yy, c98.xy - add r2.z, r2.z, -c98.z - mad r4.xz, r6.xy, c11.xyyw, r2.xyyw - texld r5, r4.xzzw, s15 - add r4.x, r2.z, -r5.x - cmp r4.x, r4.x, -c0.y, -c0.z - mad r4.zw, r6.xyxy, c11, r2.xyxy - texld r5, r4.zwzw, s15 - add r4.z, r2.z, -r5.x - cmp r4.z, r4.z, -c0.y, -c0.z - add r4.x, r4.x, r4.z - mad r4.zw, r6.xyxy, c10.xyxy, r2.xyxy - texld r5, r4.zwzw, s15 - add r4.z, r2.z, -r5.x - cmp r4.z, r4.z, -c0.y, -c0.z - add r4.x, r4.x, r4.z - mad r4.zw, r6.xyxy, c10, r2.xyxy - texld r5, r4.zwzw, s15 - add r4.z, r2.z, -r5.x - cmp r4.z, r4.z, -c0.y, -c0.z - add r4.x, r4.x, r4.z - mad r4.zw, r6.xyxy, c9.xyxy, r2.xyxy - texld r5, r4.zwzw, s15 - add r4.z, r2.z, -r5.x - cmp r4.z, r4.z, -c0.y, -c0.z - add r4.x, r4.x, r4.z - mad r4.zw, r6.xyxy, c9, r2.xyxy - texld r5, r4.zwzw, s15 - add r4.z, r2.z, -r5.x - cmp r4.z, r4.z, -c0.y, -c0.z - add r4.x, r4.x, r4.z - mad r4.zw, r6.xyxy, c8.xyxy, r2.xyxy - texld r5, r4.zwzw, s15 - add r4.z, r2.z, -r5.x - cmp r4.z, r4.z, -c0.y, -c0.z - add r4.x, r4.x, r4.z - mad r4.zw, r6.xyxy, c8, r2.xyxy - texld r5, r4.zwzw, s15 - add r4.z, r2.z, -r5.x - cmp r4.z, r4.z, -c0.y, -c0.z - add r4.x, r4.x, r4.z - mad r4.zw, r6.xyxy, c7.xyxy, r2.xyxy - texld r5, r4.zwzw, s15 - add r4.z, r2.z, -r5.x - cmp r4.z, r4.z, -c0.y, -c0.z - add r4.x, r4.x, r4.z - mad r4.zw, r6.xyxy, c7, r2.xyxy - texld r5, r4.zwzw, s15 - add r4.z, r2.z, -r5.x - cmp r4.z, r4.z, -c0.y, -c0.z - add r4.x, r4.x, r4.z - mad r4.zw, r6.xyxy, c6.xyxy, r2.xyxy - texld r5, r4.zwzw, s15 - add r4.z, r2.z, -r5.x - cmp r4.z, r4.z, -c0.y, -c0.z - add r4.x, r4.x, r4.z - mad r2.xy, r6.xy, c6.zwzw, r2 - texld r5, r2, s15 - add r2.x, r2.z, -r5.x - cmp r2.x, r2.x, -c0.y, -c0.z - add r2.x, r4.x, r2.x + + add r21.z, r2.z, c7.w // depth bias + + mov r21.xy, c53.xy + max r21.xy, r21.xy, c9.zw // prevents from too sharp shadows when using ShadowResFix + mul r21.xy, r21.xy, c11.ww // *2.4 instead of *3 because CSM resolutions are multiples of 256 instead of 320 + + add r27.xyz, r2.x, -c11.xyz + cmp r27.w, r27.x, c12.x, c12.w // cascade 1-2 + cmp r27.w, r27.y, c12.y, r27.w // cascade 2-3 + cmp r27.w, r27.z, c12.z, r27.w // cascade 3-4 + mul r21.xy, r21.xy, r27.w // texel size multiplier + + mul r23.xy, c44.zw, vPos + mov r22.xy, c9.xy + mul r22.xy, r22.xy, c44.xy // r2.xy * screen dimensions + dp2add r22.y, r23, r22, c5.z // v0.x * r2.x + v0.y * r2.y + mad r22.y, r22.y, c8.x, c8.y + frc r22.y, r22.y + mad r22.y, r22.y, c8.z, c8.w // r2.y * 2pi - pi + sincos r23.xy, r22.y // sine & cosine of r2.y + mul r24, r23.yxxy, c7.xxyz + mul r23, r23.yxxy, c10.xxyz + + mad r25.xy, r24.xy, r21.xy, r2.xy // offset * texel size + UV + texld r25, r25, s15 // sample #1 + mov r26.x, r25.x // copy to r6 + + mad r25.xy, r24.zw, r21.xy, r2.xy // offset * texel size + UV + texld r25, r25, s15 // sample #2 + mov r26.y, r25.x // copy to r6 + + mad r25.xy, r23.xy, r21.xy, r2.xy // offset * texel size + UV + texld r25, r25, s15 // sample #3 + mov r26.z, r25.x // copy to r6 + + mad r25.xy, r23.zw, r21.xy, r2.xy // offset * texel size + UV + texld r25, r25, s15 // sample #4 + mov r26.w, r25.x // copy to r6 + + add r26, r21.z, -r26 + cmp r26, r26, c5.x, c5.z // depth bias + dp4 r2.x, r26, c5.x // sum + mad r2.x, r2.x, c2.x, r3.w add r2.y, r2.w, -c53.w cmp r2.yz, r2.y, c5.xxyw, c5.xzww diff --git a/win32_30_nv8/gta_default/gta_defaultPS11.asm b/win32_30_nv8/gta_default/gta_defaultPS11.asm index c435738..9c054d2 100644 --- a/win32_30_nv8/gta_default/gta_defaultPS11.asm +++ b/win32_30_nv8/gta_default/gta_defaultPS11.asm @@ -35,6 +35,7 @@ // float4 globalFogColorN; // float4 globalFogParams; // float4 globalScalars; +// float4 globalScreenSize; // // // Registers: @@ -63,6 +64,7 @@ // globalFogParams c41 1 // globalFogColor c42 1 // globalFogColorN c43 1 +// globalScreenSize c44 1 // gShadowParam18192021 c53 1 // gFacetCentre c54 1 // gShadowParam14151617 c56 1 @@ -78,17 +80,17 @@ ps_3_0 def c0, 0, -1, -0, 9.99999975e-006 def c1, -0.5, 0.5, 0.9, 1.5 - def c2, 0.0833333358, -0.100000001, 1.11111116, 1.00000001e-007 + def c2, 0.25, -0.100000001, 1.11111116, 1.00000001e-007 def c3, 3.99600005, 4, 0.125, 0.25 def c4, 0.212500006, 0.715399981, 0.0720999986, 0 def c5, 1, -1, 0, -0 def c6, 0, 0, 0, 0.5 - def c7, 0.2703204087, 0.4206267664, 0.4548159977, 0.2077075065 - def c8, 0.4949107209, -0.07115741914, 0.3778747872, -0.327430367 - def c9, 0.1408662784, -0.4797464868, -0.1408662784, -0.4797464868 - def c10, -0.3778747872, -0.327430367, -0.4949107209, -0.07115741914 - def c11, -0.4548159977, 0.2077075065, -0.2703204087, 0.4206267664 - def c98, 0.5, 2, 0.1, 0 + def c7, -0.25, 1, -1, -0.07 + def c8, 0.159154937, 0.5, 6.28318548, -3.14159274 + def c9, 3, 7.13800001, 0.00012207031, 0.00048828125 + def c10, 0.75, -0.5, 0.5, 0 + def c11, 0.25, 0.5, 0.75, 4.8 + def c12, 0.5, 0.25, 0.125, 1 dcl_texcoord v0.xy dcl_texcoord1 v1 dcl_color v2.xw @@ -143,68 +145,50 @@ mul r3.w, r2.w, r3.w mul r3.w, r3.w, r3.w mul r3.w, r3.w, c1.w - mov r5.y, c53.y - mul r7.xy, r5.yy, c98.xy - add r4.z, r4.z, -c98.z - mad r5.xz, r7.xy, c11.xyyw, r4.xyyw - texld r6, r5.xzzw, s15 - add r4.w, r4.z, -r6.x - cmp r4.w, r4.w, -c0.y, -c0.z - mad r5.xz, r7.xy, c11.zyww, r4.xyyw - texld r6, r5.xzzw, s15 - add r5.x, r4.z, -r6.x - cmp r5.x, r5.x, -c0.y, -c0.z - add r4.w, r4.w, r5.x - mad r5.xz, r7.xy, c10.xyyw, r4.xyyw - texld r6, r5.xzzw, s15 - add r5.x, r4.z, -r6.x - cmp r5.x, r5.x, -c0.y, -c0.z - add r4.w, r4.w, r5.x - mad r5.xz, r7.xy, c10.zyww, r4.xyyw - texld r6, r5.xzzw, s15 - add r5.x, r4.z, -r6.x - cmp r5.x, r5.x, -c0.y, -c0.z - add r4.w, r4.w, r5.x - mad r5.xz, r7.xy, c9.xyyw, r4.xyyw - texld r6, r5.xzzw, s15 - add r5.x, r4.z, -r6.x - cmp r5.x, r5.x, -c0.y, -c0.z - add r4.w, r4.w, r5.x - mad r5.xz, r7.xy, c9.zyww, r4.xyyw - texld r6, r5.xzzw, s15 - add r5.x, r4.z, -r6.x - cmp r5.x, r5.x, -c0.y, -c0.z - add r4.w, r4.w, r5.x - mad r5.xz, r7.xy, c8.xyyw, r4.xyyw - texld r6, r5.xzzw, s15 - add r5.x, r4.z, -r6.x - cmp r5.x, r5.x, -c0.y, -c0.z - add r4.w, r4.w, r5.x - mad r5.xz, r7.xy, c8.zyww, r4.xyyw - texld r6, r5.xzzw, s15 - add r5.x, r4.z, -r6.x - cmp r5.x, r5.x, -c0.y, -c0.z - add r4.w, r4.w, r5.x - mad r5.xz, r7.xy, c7.xyyw, r4.xyyw - texld r6, r5.xzzw, s15 - add r5.x, r4.z, -r6.x - cmp r5.x, r5.x, -c0.y, -c0.z - add r4.w, r4.w, r5.x - mad r5.xz, r7.xy, c7.zyww, r4.xyyw - texld r6, r5.xzzw, s15 - add r5.x, r4.z, -r6.x - cmp r5.x, r5.x, -c0.y, -c0.z - add r4.w, r4.w, r5.x - mad r5.xz, r7.xy, c6.xyyw, r4.xyyw - texld r6, r5.xzzw, s15 - add r5.x, r4.z, -r6.x - cmp r5.x, r5.x, -c0.y, -c0.z - add r4.w, r4.w, r5.x - mad r4.xy, r7.xy, c6.zwzw, r4 - texld r5, r4, s15 - add r4.x, r4.z, -r5.x - cmp r4.x, r4.x, -c0.y, -c0.z - add r4.x, r4.w, r4.x + + add r21.z, r4.z, c7.w // depth bias + + mov r21.xy, c53.xy + max r21.xy, r21.xy, c9.zw // prevents from too sharp shadows when using ShadowResFix + mul r21.xy, r21.xy, c11.ww // *2.4 instead of *3 because CSM resolutions are multiples of 256 instead of 320 + + add r27.xyz, r4.x, -c11.xyz + cmp r27.w, r27.x, c12.x, c12.w // cascade 1-2 + cmp r27.w, r27.y, c12.y, r27.w // cascade 2-3 + cmp r27.w, r27.z, c12.z, r27.w // cascade 3-4 + mul r21.xy, r21.xy, r27.w // texel size multiplier + + mul r23.xy, c44.zw, vPos + mov r22.xy, c9.xy + mul r22.xy, r22.xy, c44.xy // r2.xy * screen dimensions + dp2add r22.y, r23, r22, c5.z // v0.x * r2.x + v0.y * r2.y + mad r22.y, r22.y, c8.x, c8.y + frc r22.y, r22.y + mad r22.y, r22.y, c8.z, c8.w // r2.y * 2pi - pi + sincos r23.xy, r22.y // sine & cosine of r2.y + mul r24, r23.yxxy, c7.xxyz + mul r23, r23.yxxy, c10.xxyz + + mad r25.xy, r24.xy, r21.xy, r4.xy // offset * texel size + UV + texld r25, r25, s15 // sample #1 + mov r26.x, r25.x // copy to r6 + + mad r25.xy, r24.zw, r21.xy, r4.xy // offset * texel size + UV + texld r25, r25, s15 // sample #2 + mov r26.y, r25.x // copy to r6 + + mad r25.xy, r23.xy, r21.xy, r4.xy // offset * texel size + UV + texld r25, r25, s15 // sample #3 + mov r26.z, r25.x // copy to r6 + + mad r25.xy, r23.zw, r21.xy, r4.xy // offset * texel size + UV + texld r25, r25, s15 // sample #4 + mov r26.w, r25.x // copy to r6 + + add r26, r21.z, -r26 + cmp r26, r26, c5.x, c5.z // depth bias + dp4 r4.x, r26, c5.x // sum + mad r3.w, r4.x, c2.x, r3.w add r2.w, r2.w, -c53.w cmp r4.xy, r2.w, c5, c5.zwzw diff --git a/win32_30_nv8/gta_default/gta_defaultPS12.asm b/win32_30_nv8/gta_default/gta_defaultPS12.asm index 8296817..59c43e2 100644 --- a/win32_30_nv8/gta_default/gta_defaultPS12.asm +++ b/win32_30_nv8/gta_default/gta_defaultPS12.asm @@ -43,6 +43,7 @@ // sampler2D gShadowZSamplerDir; // row_major float4x4 gViewInverse; // float4 globalScalars; +// float4 globalScreenSize; // // // Registers: @@ -72,6 +73,7 @@ // gLightAmbient0 c37 1 // gLightAmbient1 c38 1 // globalScalars c39 1 +// globalScreenSize c44 1 // gShadowParam18192021 c53 1 // gFacetCentre c54 1 // gShadowParam14151617 c56 1 @@ -94,16 +96,16 @@ ps_3_0 def c0, 9.99999975e-006, -0.5, 0.5, -0.25 def c1, 0.9, 1, 0, 1.5 - def c2, 0.0833333358, -0.100000001, 1.11111116, 3.99600005 + def c2, 0.25, -0.100000001, 1.11111116, 3.99600005 def c3, 4, 0.125, 0, 0 def c4, 1, -1, 0, -0 def c5, 0, 0, 0, 0.5 - def c6, 0.2703204087, 0.4206267664, 0.4548159977, 0.2077075065 - def c7, 0.4949107209, -0.07115741914, 0.3778747872, -0.327430367 - def c8, 0.1408662784, -0.4797464868, -0.1408662784, -0.4797464868 - def c9, -0.3778747872, -0.327430367, -0.4949107209, -0.07115741914 - def c10, -0.4548159977, 0.2077075065, -0.2703204087, 0.4206267664 - def c98, 0.5, 2, 0.1, 0 + def c6, -0.25, 1, -1, -0.07 + def c7, 0.159154937, 0.5, 6.28318548, -3.14159274 + def c8, 3, 7.13800001, 0.00012207031, 0.00048828125 + def c9, 0.75, -0.5, 0.5, 0 + def c10, 0.25, 0.5, 0.75, 4.8 + def c11, 0.5, 0.25, 0.125, 1 dcl_texcoord v0.xy dcl_texcoord1 v1.xyz dcl_color v2 @@ -143,68 +145,50 @@ mul r3.w, r2.w, r3.w mul r3.w, r3.w, r3.w mul r3.w, r3.w, c1.w - mov r5.y, c53.y - mul r7.xy, r5.yy, c98.xy - add r4.z, r4.z, -c98.z - mad r5.xz, r7.xy, c10.xyyw, r4.xyyw - texld r6, r5.xzzw, s15 - add r4.w, r4.z, -r6.x - cmp r4.w, r4.w, c1.y, c1.z - mad r5.xz, r7.xy, c10.zyww, r4.xyyw - texld r6, r5.xzzw, s15 - add r5.x, r4.z, -r6.x - cmp r5.x, r5.x, c1.y, c1.z - add r4.w, r4.w, r5.x - mad r5.xz, r7.xy, c9.xyyw, r4.xyyw - texld r6, r5.xzzw, s15 - add r5.x, r4.z, -r6.x - cmp r5.x, r5.x, c1.y, c1.z - add r4.w, r4.w, r5.x - mad r5.xz, r7.xy, c9.zyww, r4.xyyw - texld r6, r5.xzzw, s15 - add r5.x, r4.z, -r6.x - cmp r5.x, r5.x, c1.y, c1.z - add r4.w, r4.w, r5.x - mad r5.xz, r7.xy, c8.xyyw, r4.xyyw - texld r6, r5.xzzw, s15 - add r5.x, r4.z, -r6.x - cmp r5.x, r5.x, c1.y, c1.z - add r4.w, r4.w, r5.x - mad r5.xz, r7.xy, c8.zyww, r4.xyyw - texld r6, r5.xzzw, s15 - add r5.x, r4.z, -r6.x - cmp r5.x, r5.x, c1.y, c1.z - add r4.w, r4.w, r5.x - mad r5.xz, r7.xy, c7.xyyw, r4.xyyw - texld r6, r5.xzzw, s15 - add r5.x, r4.z, -r6.x - cmp r5.x, r5.x, c1.y, c1.z - add r4.w, r4.w, r5.x - mad r5.xz, r7.xy, c7.zyww, r4.xyyw - texld r6, r5.xzzw, s15 - add r5.x, r4.z, -r6.x - cmp r5.x, r5.x, c1.y, c1.z - add r4.w, r4.w, r5.x - mad r5.xz, r7.xy, c6.xyyw, r4.xyyw - texld r6, r5.xzzw, s15 - add r5.x, r4.z, -r6.x - cmp r5.x, r5.x, c1.y, c1.z - add r4.w, r4.w, r5.x - mad r5.xz, r7.xy, c6.zyww, r4.xyyw - texld r6, r5.xzzw, s15 - add r5.x, r4.z, -r6.x - cmp r5.x, r5.x, c1.y, c1.z - add r4.w, r4.w, r5.x - mad r5.xz, r7.xy, c5.xyyw, r4.xyyw - texld r6, r5.xzzw, s15 - add r5.x, r4.z, -r6.x - cmp r5.x, r5.x, c1.y, c1.z - add r4.w, r4.w, r5.x - mad r4.xy, r7.xy, c5.zwzw, r4 - texld r5, r4, s15 - add r4.x, r4.z, -r5.x - cmp r4.x, r4.x, c1.y, c1.z - add r4.x, r4.w, r4.x + + add r21.z, r4.z, c6.w // depth bias + + mov r21.xy, c53.xy + max r21.xy, r21.xy, c8.zw // prevents from too sharp shadows when using ShadowResFix + mul r21.xy, r21.xy, c10.ww // *2.4 instead of *3 because CSM resolutions are multiples of 256 instead of 320 + + add r27.xyz, r4.x, -c10.xyz + cmp r27.w, r27.x, c11.x, c11.w // cascade 1-2 + cmp r27.w, r27.y, c11.y, r27.w // cascade 2-3 + cmp r27.w, r27.z, c11.z, r27.w // cascade 3-4 + mul r21.xy, r21.xy, r27.w // texel size multiplier + + mul r23.xy, c44.zw, vPos + mov r22.xy, c8.xy + mul r22.xy, r22.xy, c44.xy // r2.xy * screen dimensions + dp2add r22.y, r23, r22, c4.z // v0.x * r2.x + v0.y * r2.y + mad r22.y, r22.y, c7.x, c7.y + frc r22.y, r22.y + mad r22.y, r22.y, c7.z, c7.w // r2.y * 2pi - pi + sincos r23.xy, r22.y // sine & cosine of r2.y + mul r24, r23.yxxy, c6.xxyz + mul r23, r23.yxxy, c9.xxyz + + mad r25.xy, r24.xy, r21.xy, r4.xy // offset * texel size + UV + texld r25, r25, s15 // sample #1 + mov r26.x, r25.x // copy to r6 + + mad r25.xy, r24.zw, r21.xy, r4.xy // offset * texel size + UV + texld r25, r25, s15 // sample #2 + mov r26.y, r25.x // copy to r6 + + mad r25.xy, r23.xy, r21.xy, r4.xy // offset * texel size + UV + texld r25, r25, s15 // sample #3 + mov r26.z, r25.x // copy to r6 + + mad r25.xy, r23.zw, r21.xy, r4.xy // offset * texel size + UV + texld r25, r25, s15 // sample #4 + mov r26.w, r25.x // copy to r6 + + add r26, r21.z, -r26 + cmp r26, r26, c4.x, c4.z // depth bias + dp4 r4.x, r26, c4.x // sum + mad r3.w, r4.x, c2.x, r3.w add r2.w, r2.w, -c53.w cmp r4.xy, r2.w, c4, c4.zwzw