diff --git a/package/Shaders/Common/Permutation.hlsli b/package/Shaders/Common/Permutation.hlsli index 1ac28e6afe..b851c8c87e 100644 --- a/package/Shaders/Common/Permutation.hlsli +++ b/package/Shaders/Common/Permutation.hlsli @@ -60,8 +60,7 @@ namespace Permutation static const uint InReflection = (1 << 1); static const uint IsBeastRace = (1 << 2); static const uint EffectShadows = (1 << 3); - static const uint IsDecal = (1 << 4); - static const uint IsTree = (1 << 5); + static const uint IsTree = (1 << 4); } namespace ExtraFeatureFlags diff --git a/package/Shaders/Lighting.hlsl b/package/Shaders/Lighting.hlsl index c9cb950209..bc9c9d209e 100644 --- a/package/Shaders/Lighting.hlsl +++ b/package/Shaders/Lighting.hlsl @@ -2344,7 +2344,7 @@ PS_OUTPUT main(PS_INPUT input, bool frontFace : SV_IsFrontFace) # if defined(DEFERRED) bool useScreenSpaceShadows = true; # else - bool useScreenSpaceShadows = inWorld && !SharedData::InInterior && Permutation::ExtraShaderDescriptor & Permutation::ExtraFlags::IsDecal; + bool useScreenSpaceShadows = inWorld && !SharedData::InInterior; # endif if (useScreenSpaceShadows) diff --git a/src/Deferred.cpp b/src/Deferred.cpp index 697daaa3fe..8956c7e7e6 100644 --- a/src/Deferred.cpp +++ b/src/Deferred.cpp @@ -336,8 +336,7 @@ void Deferred::StartDeferred() { auto context = globals::d3d::context; - static REL::Relocation perFrame{ REL::RelocationID(524768, 411384) }; - ID3D11Buffer* buffers[1] = { *perFrame.get() }; + ID3D11Buffer* buffers[1] = { *globals::game::perFrame.get() }; ID3D11Buffer* vrBuffer = nullptr; @@ -768,9 +767,12 @@ void Deferred::Hooks::Main_RenderShadowMaps::thunk() void Deferred::Hooks::Main_RenderWorld::thunk(bool a1) { - globals::state->inWorld = true; + auto* const state = globals::state; + state->permutationData.ExtraShaderDescriptor |= static_cast(State::ExtraShaderDescriptors::InWorld); + state->inWorld = true; func(a1); - globals::state->inWorld = false; + state->inWorld = false; + state->permutationData.ExtraShaderDescriptor &= ~static_cast(State::ExtraShaderDescriptors::InWorld); }; void Deferred::Hooks::Main_RenderWorld_Start::thunk(RE::BSBatchRenderer* This, uint32_t StartRange, uint32_t EndRanges, uint32_t RenderFlags, int GeometryGroup) @@ -796,9 +798,7 @@ void Deferred::Hooks::Main_RenderWorld_BlendedDecals::thunk(RE::BSShaderAccumula // Deferred blended decals - deferred->inDecals = true; func(This, RenderFlags); - deferred->inDecals = false; deferred->EndDeferred(); @@ -808,9 +808,33 @@ void Deferred::Hooks::Main_RenderWorld_BlendedDecals::thunk(RE::BSShaderAccumula void Deferred::Hooks::BSCubeMapCamera_RenderCubemap::thunk(RE::NiAVObject* camera, int a2, bool a3, bool a4, bool a5) { auto deferred = globals::deferred; + auto state = globals::state; - deferred->inReflections = true; deferred->ReflectionsPrepasses(); + state->permutationData.ExtraShaderDescriptor |= static_cast(State::ExtraShaderDescriptors::IsReflections); func(camera, a2, a3, a4, a5); - deferred->inReflections = false; + state->permutationData.ExtraShaderDescriptor &= ~static_cast(State::ExtraShaderDescriptors::IsReflections); +} + +void Deferred::Hooks::Main_RenderFirstPersonView::thunk(bool a1, bool a2) +{ + auto* const state = globals::state; + state->permutationData.ExtraShaderDescriptor |= static_cast(State::ExtraShaderDescriptors::InWorld); + func(a1, a2); + state->permutationData.ExtraShaderDescriptor &= ~static_cast(State::ExtraShaderDescriptors::InWorld); +} + +void Deferred::Hooks::Renderer_ResetState::thunk(void* This) +{ + func(This); + + auto* const state = globals::state; + auto* const context = globals::d3d::context; + + ID3D11Buffer* buffers[3] = { state->permutationCB->CB(), state->sharedDataCB->CB(), state->featureDataCB->CB() }; + context->PSSetConstantBuffers(4, 3, buffers); + context->CSSetConstantBuffers(5, 2, buffers + 1); + + auto* singleton = globals::truePBR; + singleton->SetupFrame(); } diff --git a/src/Deferred.h b/src/Deferred.h index 0f6080c39c..3413db7bd2 100644 --- a/src/Deferred.h +++ b/src/Deferred.h @@ -46,8 +46,6 @@ class Deferred ID3D11ComputeShader* mainCompositeCS = nullptr; ID3D11ComputeShader* mainCompositeInteriorCS = nullptr; - bool inDecals = false; - bool inReflections = false; bool deferredPass = false; Texture2D* prevDiffuseAmbientTexture = nullptr; @@ -130,6 +128,18 @@ class Deferred static inline REL::Relocation func; }; + struct Main_RenderFirstPersonView + { + static void thunk(bool a1, bool a2); + static inline REL::Relocation func; + }; + + struct Renderer_ResetState + { + static void thunk(void* This); + static inline REL::Relocation func; + }; + static void Install() { stl::write_vfunc<0x35, BSCubeMapCamera_RenderCubemap>(RE::VTABLE_BSCubeMapCamera[0]); @@ -140,6 +150,11 @@ class Deferred stl::write_thunk_call(REL::RelocationID(99938, 106583).address() + REL::Relocate(0x8E, 0x84)); stl::write_thunk_call(REL::RelocationID(99938, 106583).address() + REL::Relocate(0x319, 0x308, 0x321)); + if (!REL::Module::IsVR()) + stl::write_thunk_call(REL::RelocationID(35560, 36559).address() + REL::Relocate(0x944, 0x954)); + + stl::detour_thunk(REL::RelocationID(75570, 77371)); + stl::write_vfunc<0x2, BSImagespaceShaderHDRTonemapBlendCinematic_SetupTechnique>(RE::VTABLE_BSImagespaceShaderHDRTonemapBlendCinematic[0]); stl::write_vfunc<0x2, BSImagespaceShaderHDRTonemapBlendCinematicFade_SetupTechnique>(RE::VTABLE_BSImagespaceShaderHDRTonemapBlendCinematicFade[0]); diff --git a/src/Features/ExtendedTranslucency.cpp b/src/Features/ExtendedTranslucency.cpp index 62fac27fba..b48cb9284d 100644 --- a/src/Features/ExtendedTranslucency.cpp +++ b/src/Features/ExtendedTranslucency.cpp @@ -21,16 +21,14 @@ ExtendedTranslucency* ExtendedTranslucency::GetSingleton() void ExtendedTranslucency::BSLightingShader_SetupGeometry(RE::BSRenderPass* pass) { - globals::state->currentExtraFeatureDescriptor &= ~(ExtraFeatureDescriptorMask << ExtraFeatureDescriptorShift); + globals::state->permutationData.ExtraFeatureDescriptor &= ~(ExtraFeatureDescriptorMask << ExtraFeatureDescriptorShift); // TODO: PERFORMANCE: Caching the feature descriptor in map if this get more complex auto& unknownProperty = pass->geometry->GetGeometryRuntimeData().properties[RE::BSGeometry::States::kProperty]; - static const REL::Relocation NiAlphaPropertyRTTI{ RE::NiAlphaProperty::Ni_RTTI }; - auto alphaProperty = unknownProperty && unknownProperty->GetRTTI() == NiAlphaPropertyRTTI.get() ? static_cast(unknownProperty.get()) : nullptr; + auto alphaProperty = unknownProperty && unknownProperty->GetRTTI() == globals::rtti::NiAlphaPropertyRTTI.get() ? static_cast(unknownProperty.get()) : nullptr; // Check alpha property exists and blending is enabled if (alphaProperty && alphaProperty->GetAlphaBlending()) { if (auto* data = pass->geometry->GetExtraData(NiExtraDataName_AnisotropicAlphaMaterial)) { - static const REL::Relocation NiIntegerExtraDataRTTI{ RE::NiIntegerExtraData::Ni_RTTI }; - if (data->GetRTTI() == NiIntegerExtraDataRTTI.get()) { + if (data->GetRTTI() == globals::rtti::NiIntegerExtraDataRTTI.get()) { uint32_t material = static_cast(static_cast(data)->value) & ExtraFeatureDescriptorMask; if (material == MaterialModel::Disabled) { // MaterialModel::Disabled (0) is the flag when this extra does not exist @@ -38,7 +36,7 @@ void ExtendedTranslucency::BSLightingShader_SetupGeometry(RE::BSRenderPass* pass // Ensure this is disabled by using the ForceDisabled flag material = MaterialModel::ForceDisabled; } - globals::state->currentExtraFeatureDescriptor |= (material << ExtraFeatureDescriptorShift); + globals::state->permutationData.ExtraFeatureDescriptor |= (material << ExtraFeatureDescriptorShift); // TODO: Per-material settings from Nif // Mods supporting this feature should adjust their alpha value in texture already @@ -46,7 +44,7 @@ void ExtendedTranslucency::BSLightingShader_SetupGeometry(RE::BSRenderPass* pass } } } else { - globals::state->currentExtraFeatureDescriptor |= ((MaterialModel::ForceDisabled) << ExtraFeatureDescriptorShift); + globals::state->permutationData.ExtraFeatureDescriptor |= ((MaterialModel::ForceDisabled) << ExtraFeatureDescriptorShift); } } diff --git a/src/Features/LightLimitFix.cpp b/src/Features/LightLimitFix.cpp index 4b77e31df9..b9c81b0b4c 100644 --- a/src/Features/LightLimitFix.cpp +++ b/src/Features/LightLimitFix.cpp @@ -301,15 +301,11 @@ void LightLimitFix::BSLightingShader_SetupGeometry_Before(RE::BSRenderPass* a_pa } } -void LightLimitFix::BSLightingShader_SetupGeometry_GeometrySetupConstantPointLights(RE::BSRenderPass* a_pass, DirectX::XMMATRIX&, uint32_t, uint32_t, float, Space) +void LightLimitFix::BSLightingShader_SetupGeometry_GeometrySetupConstantPointLights(RE::BSRenderPass* a_pass) { - auto shaderCache = globals::shaderCache; auto isl = globals::features::inverseSquareLighting; - if (!shaderCache->IsEnabled()) - return; - - auto accumulator = RE::BSGraphics::BSShaderAccumulator::GetCurrentAccumulator(); + auto accumulator = *globals::game::currentAccumulator.get(); bool inWorld = accumulator->GetRuntimeData().activeShadowSceneNode == globals::game::smState->shadowSceneNode[0]; strictLightDataTemp.NumStrictLights = inWorld ? 0 : (a_pass->numLights - 1); @@ -335,7 +331,7 @@ void LightLimitFix::BSLightingShader_SetupGeometry_GeometrySetupConstantPointLig SetLightPosition(light, niLight->world.translate, inWorld); - if (bsLight->IsShadowLight()) { + if (i < a_pass->numShadowLights) { auto* shadowLight = static_cast(bsLight); GET_INSTANCE_MEMBER(shadowLightIndex, shadowLight); light.shadowMaskIndex = shadowLightIndex; @@ -362,7 +358,7 @@ void LightLimitFix::BSLightingShader_SetupGeometry_After(RE::BSRenderPass*) if (!shaderCache->IsEnabled()) return; - auto accumulator = RE::BSGraphics::BSShaderAccumulator::GetCurrentAccumulator(); + auto accumulator = *globals::game::currentAccumulator.get(); auto shadowSceneNode = smState->shadowSceneNode[0]; @@ -497,14 +493,14 @@ LightLimitFix::ParticleLightReference LightLimitFix::GetParticleLightConfigs(RE: // see https://www.nexusmods.com/skyrimspecialedition/articles/1391 if (settings.EnableParticleLights) { - if (auto shaderProperty = netimmerse_cast(a_pass->shaderProperty)) { + if (auto shaderProperty = a_pass->shaderProperty->GetRTTI() == globals::rtti::BSEffectShaderPropertyRTTI.get() ? static_cast(a_pass->shaderProperty) : nullptr) { if (!shaderProperty->lightData) { if (auto material = shaderProperty->GetMaterial()) { // Check if it's a valid particle light bool billboard = false; - if (!netimmerse_cast(a_pass->geometry)) { + if (a_pass->geometry->GetRTTI() != globals::rtti::NiParticleSystemRTTI.get()) { if (auto parent = a_pass->geometry->parent) { - if (auto billboardNode = netimmerse_cast(parent)) { + if (auto billboardNode = parent->GetRTTI() == globals::rtti::NiBillboardNodeRTTI.get() ? static_cast(parent) : nullptr) { billboard = true; } else { return { false }; @@ -1069,12 +1065,6 @@ float LightLimitFix::Hooks::AIProcess_CalculateLightValue_GetLuminance::thunk(RE return ret; } -void LightLimitFix::Hooks::BSLightingShader_SetupGeometry_GeometrySetupConstantPointLights::thunk(RE::BSGraphics::PixelShader* PixelShader, RE::BSRenderPass* Pass, DirectX::XMMATRIX& Transform, uint32_t LightCount, uint32_t ShadowLightCount, float WorldScale, Space RenderSpace) -{ - globals::features::lightLimitFix->BSLightingShader_SetupGeometry_GeometrySetupConstantPointLights(Pass, Transform, LightCount, ShadowLightCount, WorldScale, RenderSpace); - func(PixelShader, Pass, Transform, LightCount, ShadowLightCount, WorldScale, RenderSpace); -} - void LightLimitFix::Hooks::NiNode_Destroy::thunk(RE::NiNode* This) { globals::features::lightLimitFix->CleanupParticleLights(This); diff --git a/src/Features/LightLimitFix.h b/src/Features/LightLimitFix.h index ade8a09717..b86c20cafd 100644 --- a/src/Features/LightLimitFix.h +++ b/src/Features/LightLimitFix.h @@ -225,13 +225,7 @@ struct LightLimitFix : Feature void BSLightingShader_SetupGeometry_Before(RE::BSRenderPass* a_pass); - enum class Space - { - World = 0, - Model = 1, - }; - - void BSLightingShader_SetupGeometry_GeometrySetupConstantPointLights(RE::BSRenderPass* a_pass, DirectX::XMMATRIX& Transform, uint32_t, uint32_t, float WorldScale, Space RenderSpace); + void BSLightingShader_SetupGeometry_GeometrySetupConstantPointLights(RE::BSRenderPass* a_pass); void BSLightingShader_SetupGeometry_After(RE::BSRenderPass* a_pass); @@ -269,12 +263,6 @@ struct LightLimitFix : Feature static inline REL::Relocation func; }; - struct BSLightingShader_SetupGeometry_GeometrySetupConstantPointLights - { - static void thunk(RE::BSGraphics::PixelShader* PixelShader, RE::BSRenderPass* Pass, DirectX::XMMATRIX& Transform, uint32_t LightCount, uint32_t ShadowLightCount, float WorldScale, Space RenderSpace); - static inline REL::Relocation func; - }; - struct NiNode_Destroy { static void thunk(RE::NiNode* This); @@ -303,8 +291,6 @@ struct LightLimitFix : Feature stl::write_vfunc<0x6, BSEffectShader_SetupGeometry>(RE::VTABLE_BSEffectShader[0]); stl::write_vfunc<0x6, BSWaterShader_SetupGeometry>(RE::VTABLE_BSWaterShader[0]); - stl::write_thunk_call(REL::RelocationID(100565, 107300).address() + REL::Relocate(0x523, 0xB0E, 0x5fe)); - stl::detour_thunk(REL::RelocationID(68937, 70288)); stl::write_thunk_call(REL::RelocationID(100994, 107781).address() + 0x92); diff --git a/src/Features/ScreenSpaceShadows.cpp b/src/Features/ScreenSpaceShadows.cpp index d48480f001..0ed56e97ac 100644 --- a/src/Features/ScreenSpaceShadows.cpp +++ b/src/Features/ScreenSpaceShadows.cpp @@ -91,7 +91,7 @@ void ScreenSpaceShadows::DrawShadows() auto renderer = globals::game::renderer; auto context = globals::d3d::context; - auto accumulator = RE::BSGraphics::BSShaderAccumulator::GetCurrentAccumulator(); + auto accumulator = *globals::game::currentAccumulator.get(); auto dirLight = skyrim_cast(accumulator->GetRuntimeData().activeShadowSceneNode->GetRuntimeData().sunLight->light.get()); auto& directionNi = dirLight->GetWorldDirection(); diff --git a/src/Features/SubsurfaceScattering.cpp b/src/Features/SubsurfaceScattering.cpp index 0525970b85..6bbec3bbed 100644 --- a/src/Features/SubsurfaceScattering.cpp +++ b/src/Features/SubsurfaceScattering.cpp @@ -370,7 +370,9 @@ void SubsurfaceScattering::BSLightingShader_SetupSkin(RE::BSRenderPass* a_pass) validMaterials = true; if (isBeastRace) - state->currentExtraDescriptor |= (uint)State::ExtraShaderDescriptors::IsBeastRace; + state->permutationData.ExtraShaderDescriptor |= (uint)State::ExtraShaderDescriptors::IsBeastRace; + else + state->permutationData.ExtraShaderDescriptor &= ~(uint)State::ExtraShaderDescriptors::IsBeastRace; } } } diff --git a/src/Features/TerrainHelper.cpp b/src/Features/TerrainHelper.cpp index 11c96620db..883076be4e 100644 --- a/src/Features/TerrainHelper.cpp +++ b/src/Features/TerrainHelper.cpp @@ -122,11 +122,33 @@ struct THExtendedRendererState void TerrainHelper::SetShaderResouces(ID3D11DeviceContext* a_context) { - for (uint32_t textureIndex = 0; textureIndex < THExtendedRendererState::NumPSTextures; ++textureIndex) { - if (thExtendedRendererState.PSResourceModifiedBits & (1 << textureIndex)) { - a_context->PSSetShaderResources(THExtendedRendererState::FirstPSTexture + textureIndex, 1, &thExtendedRendererState.PSTexture[textureIndex]); - } + uint32_t mask = thExtendedRendererState.PSResourceModifiedBits; + + if (mask == 0) [[likely]] { + return; // Nothing to update } + + constexpr uint32_t firstTexture = THExtendedRendererState::FirstPSTexture; + auto& textures = thExtendedRendererState.PSTexture; + + while (mask) { + // Find the position of the first set bit + uint32_t batchStart = std::countr_zero(mask); + + // Count consecutive 1s starting from batchStart + uint32_t shiftedMask = mask >> batchStart; + uint32_t batchCount = std::countr_one(shiftedMask); + + a_context->PSSetShaderResources( + firstTexture + batchStart, + batchCount, + &textures[batchStart]); + + // Clear the processed bits + uint32_t clearMask = ((1u << batchCount) - 1u) << batchStart; + mask &= ~clearMask; + } + thExtendedRendererState.PSResourceModifiedBits = 0; } @@ -160,10 +182,10 @@ void TerrainHelper::BSLightingShader_SetupMaterial(RE::BSLightingShaderMaterialB for (uint32_t textureI = 0; textureI < 6; ++textureI) { if (materialBase.parallax[textureI] != nullptr && materialBase.parallax[textureI] != stateData.defaultTextureNormalMap) { thExtendedRendererState.SetPSTexture(textureI, materialBase.parallax[textureI]->rendererTexture); - state->currentExtraFeatureDescriptor |= 1 << textureI; + state->permutationData.ExtraFeatureDescriptor |= 1 << textureI; } else { thExtendedRendererState.SetPSTexture(textureI, nullptr); - state->currentExtraFeatureDescriptor &= ~(1 << textureI); + state->permutationData.ExtraFeatureDescriptor &= ~(1 << textureI); } } } \ No newline at end of file diff --git a/src/Features/TerrainShadows.cpp b/src/Features/TerrainShadows.cpp index 46471bfc41..b36d1e01ee 100644 --- a/src/Features/TerrainShadows.cpp +++ b/src/Features/TerrainShadows.cpp @@ -320,7 +320,7 @@ void TerrainShadows::UpdateShadow() context->CSSetShaderResources(60, (uint)srvs.size(), srvs.data()); } - auto accumulator = RE::BSGraphics::BSShaderAccumulator::GetCurrentAccumulator(); + auto accumulator = *globals::game::currentAccumulator.get(); auto sunLight = skyrim_cast(accumulator->GetRuntimeData().activeShadowSceneNode->GetRuntimeData().sunLight->light.get()); if (!sunLight) return; diff --git a/src/Globals.cpp b/src/Globals.cpp index a6b9345435..10f02e8b2f 100644 --- a/src/Globals.cpp +++ b/src/Globals.cpp @@ -116,6 +116,17 @@ namespace globals RE::Setting* shadowMaskQuarter = nullptr; REL::Relocation perFrame; + REL::Relocation currentAccumulator; + } + + namespace rtti + { + REL::Relocation NiIntegerExtraDataRTTI; + REL::Relocation BSLightingShaderPropertyRTTI; + REL::Relocation BSEffectShaderPropertyRTTI; + REL::Relocation NiParticleSystemRTTI; + REL::Relocation NiBillboardNodeRTTI; + REL::Relocation NiAlphaPropertyRTTI; } State* state = nullptr; @@ -195,6 +206,18 @@ namespace globals ui = RE::UI::GetSingleton(); perFrame = { REL::RelocationID(524768, 411384) }; + + currentAccumulator = { REL::RelocationID(527650, 414600) }; + } + + { + using namespace rtti; + NiIntegerExtraDataRTTI = { RE::NiIntegerExtraData::Ni_RTTI }; + BSLightingShaderPropertyRTTI = { RE::BSLightingShaderProperty::Ni_RTTI }; + BSEffectShaderPropertyRTTI = { RE::BSEffectShaderProperty::Ni_RTTI }; + NiParticleSystemRTTI = { RE::NiParticleSystem::Ni_RTTI }; + NiBillboardNodeRTTI = { RE::NiBillboardNode::Ni_RTTI }; + NiAlphaPropertyRTTI = { RE::NiAlphaProperty::Ni_RTTI }; } d3d::device = reinterpret_cast(game::renderer->GetRuntimeData().forwarder); diff --git a/src/Globals.h b/src/Globals.h index e93bdf6d4f..9626dc28c9 100644 --- a/src/Globals.h +++ b/src/Globals.h @@ -116,6 +116,17 @@ namespace globals extern RE::Setting* bShadowsOnGrass; extern RE::Setting* shadowMaskQuarter; extern REL::Relocation perFrame; + extern REL::Relocation currentAccumulator; + } + + namespace rtti + { + extern REL::Relocation NiIntegerExtraDataRTTI; + extern REL::Relocation BSLightingShaderPropertyRTTI; + extern REL::Relocation BSEffectShaderPropertyRTTI; + extern REL::Relocation NiParticleSystemRTTI; + extern REL::Relocation NiBillboardNodeRTTI; + extern REL::Relocation NiAlphaPropertyRTTI; } extern State* state; diff --git a/src/Hooks.cpp b/src/Hooks.cpp index 8ee3cf8415..ff04ff4e95 100644 --- a/src/Hooks.cpp +++ b/src/Hooks.cpp @@ -124,11 +124,17 @@ bool Hooks::BSShader_BeginTechnique::thunk(RE::BSShader* shader, uint32_t vertex state->currentVertexDescriptor = vertexDescriptor; state->currentPixelDescriptor = pixelDescriptor; + state->permutationData.VertexShaderDescriptor = vertexDescriptor; + state->permutationData.PixelShaderDescriptor = pixelDescriptor; + state->modifiedVertexDescriptor = vertexDescriptor; state->modifiedPixelDescriptor = pixelDescriptor; state->ModifyShaderLookup(*shader, state->modifiedVertexDescriptor, state->modifiedPixelDescriptor); + // Only check against non-shader bits + state->permutationData.PixelShaderDescriptor &= ~state->modifiedPixelDescriptor; + bool shaderFound = func(shader, vertexDescriptor, pixelDescriptor, skipPixelShader); if (!shaderFound && shader->shaderType.get() != RE::BSShader::Type::Effect) { @@ -168,7 +174,7 @@ namespace EffectExtensions if (auto* shaderProperty = static_cast(pass->geometry->GetGeometryRuntimeData().properties[1].get())) { if (shaderProperty->flags.any(RE::BSShaderProperty::EShaderPropertyFlag::kUniformScale)) { auto state = globals::state; - state->currentExtraDescriptor |= (uint)State::ExtraShaderDescriptors::EffectShadows; + state->permutationData.ExtraShaderDescriptor |= (uint)State::ExtraShaderDescriptors::EffectShadows; } } } @@ -184,12 +190,12 @@ namespace LightingExtensions { func(shader, pass, renderFlags); - globals::state->isTree = false; + globals::state->permutationData.ExtraShaderDescriptor &= ~static_cast(State::ExtraShaderDescriptors::IsTree); if (auto userData = pass->geometry->GetUserData()) if (auto baseObject = userData->GetBaseObject()) if (baseObject->As()) - globals::state->isTree = true; + globals::state->permutationData.ExtraShaderDescriptor |= static_cast(State::ExtraShaderDescriptors::IsTree); } static inline REL::Relocation func; }; @@ -998,6 +1004,18 @@ namespace Hooks PatchMemory(Address, Data.begin(), Data.size()); } + struct BSLightingShader_SetupGeometry_GeometrySetupConstantPointLights + { + static void thunk(RE::BSGraphics::PixelShader* PixelShader, RE::BSRenderPass* Pass, DirectX::XMMATRIX& Transform, uint32_t LightCount, uint32_t ShadowLightCount, float WorldScale, uint32_t) + { + if (globals::features::lightLimitFix->loaded) + globals::features::lightLimitFix->BSLightingShader_SetupGeometry_GeometrySetupConstantPointLights(Pass); + else + func(PixelShader, Pass, Transform, LightCount, ShadowLightCount, WorldScale, 0); + } + static inline REL::Relocation func; + }; + /** * @brief Installs hooks, detours, and memory patches for graphics, input, and rendering subsystems. * @@ -1106,6 +1124,7 @@ namespace Hooks REL::Relocation(renderPassCacheCtor, 0x191 - 2).address(), reinterpret_cast(&passCountSE), 4); } + if (!REL::Module::IsVR()) { stl::write_thunk_call(REL::RelocationID(35565, 36564).address() + REL::Relocate(0x53, 0x6E)); stl::write_thunk_call(REL::RelocationID(35565, 36564).address() + REL::Relocate(0x5D2, 0xA97)); @@ -1135,6 +1154,8 @@ namespace Hooks REL::safe_write(setupGeometryUpdateRenderSpace + 0x378, patch3, sizeof(patch3)); } } + + stl::write_thunk_call(REL::RelocationID(100565, 107300).address() + REL::Relocate(0x523, 0xB0E, 0x5FE)); } /** diff --git a/src/Menu.cpp b/src/Menu.cpp index 4fa6a77ccc..e128865923 100644 --- a/src/Menu.cpp +++ b/src/Menu.cpp @@ -1495,7 +1495,7 @@ void Menu::DrawAdvancedSettings() } if (ImGui::TreeNodeEx("Addresses")) { auto Renderer = globals::game::renderer; - auto BSShaderAccumulator = RE::BSGraphics::BSShaderAccumulator::GetCurrentAccumulator(); + auto BSShaderAccumulator = *globals::game::currentAccumulator.get(); auto RendererShadowState = globals::game::shadowState; ADDRESS_NODE(Renderer) ADDRESS_NODE(BSShaderAccumulator) diff --git a/src/State.cpp b/src/State.cpp index 33d943f4d2..464e163d36 100644 --- a/src/State.cpp +++ b/src/State.cpp @@ -8,6 +8,7 @@ #include "Deferred.h" #include "FeatureIssues.h" #include "Features/CloudShadows.h" +#include "Features/PerformanceOverlay.h" #include "Features/TerrainBlending.h" #include "Features/TerrainHelper.h" #include "Menu.h" @@ -18,14 +19,12 @@ void State::Draw() { - auto lock = Lock(); auto shaderCache = globals::shaderCache; auto deferred = globals::deferred; auto terrainBlending = globals::features::terrainBlending; auto terrainHelper = globals::features::terrainHelper; auto cloudShadows = globals::features::cloudShadows; auto truePBR = globals::truePBR; - auto smState = globals::game::smState; auto context = globals::d3d::context; if (shaderCache->IsEnabled()) { @@ -40,111 +39,75 @@ void State::Draw() truePBR->SetShaderResouces(context); - if (!deferred->inReflections) { - if (auto accumulator = RE::BSGraphics::BSShaderAccumulator::GetCurrentAccumulator()) { - // Set an unused bit to indicate if we are rendering an object in the main rendering passes - if (accumulator->GetRuntimeData().activeShadowSceneNode == smState->shadowSceneNode[0]) { - currentExtraDescriptor |= (uint32_t)ExtraShaderDescriptors::InWorld; + if (permutationData != permutationDataPrevious) { + permutationCB->Update(permutationData); + permutationDataPrevious = permutationData; + } + + if (currentShader && updateShader) { + if (currentShader->shaderType.get() == RE::BSShader::Type::Utility) { + if (currentPixelDescriptor & static_cast(SIE::ShaderCache::UtilityShaderFlags::RenderShadowmask)) { + deferred->CopyShadowData(); } } } - if (deferred->inReflections) - currentExtraDescriptor |= (uint32_t)ExtraShaderDescriptors::IsReflections; + if (globals::menu->overlayVisible && globals::features::performanceOverlay->loaded && globals::features::performanceOverlay->IsOverlayVisible()) + Debug(); - if (deferred->inDecals) - currentExtraDescriptor |= (uint32_t)ExtraShaderDescriptors::IsDecal; - - if (isTree) - currentExtraDescriptor |= (uint32_t)ExtraShaderDescriptors::IsTree; - - if (forceUpdatePermutationBuffer || currentPixelDescriptor != lastPixelDescriptor || currentExtraDescriptor != lastExtraDescriptor || currentExtraFeatureDescriptor != lastExtraFeatureDescriptor) { - PermutationCB data{}; - data.VertexShaderDescriptor = currentVertexDescriptor; - data.PixelShaderDescriptor = currentPixelDescriptor; - data.ExtraShaderDescriptor = currentExtraDescriptor; - data.ExtraFeatureDescriptor = currentExtraFeatureDescriptor; - - permutationCB->Update(data); + updateShader = false; + } +} - lastVertexDescriptor = currentVertexDescriptor; - lastPixelDescriptor = currentPixelDescriptor; - lastExtraDescriptor = currentExtraDescriptor; - lastExtraFeatureDescriptor = currentExtraFeatureDescriptor; +void State::Debug() +{ + auto lock = Lock(); - forceUpdatePermutationBuffer = false; + if (frameChecker.IsNewFrame()) { + // Smooth draw calls and frame times for all shader types + for (int i = 0; i < magic_enum::enum_integer(RE::BSShader::Type::Total) + 1; ++i) { + smoothDrawCalls[i] = smoothDrawCalls[i] * static_cast(0.95) + drawCalls[i] * static_cast(0.05); + smoothFrameTimePerType[i] = smoothFrameTimePerType[i] * static_cast(0.95) + frameTimePerType[i] * static_cast(0.05); } - - currentExtraDescriptor = 0; - currentExtraFeatureDescriptor = 0; - - if (frameChecker.IsNewFrame()) { - // Smooth draw calls and frame times for all shader types - for (int i = 0; i < magic_enum::enum_integer(RE::BSShader::Type::Total) + 1; ++i) { - smoothDrawCalls[i] = smoothDrawCalls[i] * static_cast(0.95) + drawCalls[i] * static_cast(0.05); - smoothFrameTimePerType[i] = smoothFrameTimePerType[i] * static_cast(0.95) + frameTimePerType[i] * static_cast(0.05); - } - // Reset counters for next frame - for (auto& c : drawCalls) - c = 0; - for (auto& ft : frameTimePerType) - ft = 0.0f; - - // Start timing for this frame - if (frameTimingFrequency.QuadPart == 0) { - QueryPerformanceFrequency(&frameTimingFrequency); - } - QueryPerformanceCounter(&frameStartTime); - frameTimingActive = true; - - ID3D11Buffer* buffers[3] = { permutationCB->CB(), sharedDataCB->CB(), featureDataCB->CB() }; - context->PSSetConstantBuffers(4, 3, buffers); - context->CSSetConstantBuffers(5, 2, buffers + 1); + // Reset counters for next frame + for (auto& c : drawCalls) + c = 0; + for (auto& ft : frameTimePerType) + ft = 0.0f; + + // Start timing for this frame + if (frameTimingFrequency.QuadPart == 0) { + QueryPerformanceFrequency(&frameTimingFrequency); } + QueryPerformanceCounter(&frameStartTime); + frameTimingActive = true; + } - // Track time for current shader type if timing is active - if (frameTimingActive && currentShader) { - LARGE_INTEGER currentTime; - QueryPerformanceCounter(¤tTime); - - // Calculate elapsed time in milliseconds - float elapsed = (currentTime.QuadPart - frameStartTime.QuadPart) * 1000.0f / frameTimingFrequency.QuadPart; - - // Add elapsed time to the current shader type - frameTimePerType[magic_enum::enum_integer(currentShader->shaderType.get())] += elapsed; - frameTimePerType[magic_enum::enum_integer(RE::BSShader::Type::Total)] += elapsed; + // Track time for current shader type if timing is active + if (frameTimingActive && currentShader) { + LARGE_INTEGER currentTime; + QueryPerformanceCounter(¤tTime); - // Update start time for next measurement - frameStartTime = currentTime; - } + // Calculate elapsed time in milliseconds + float elapsed = (currentTime.QuadPart - frameStartTime.QuadPart) * 1000.0f / frameTimingFrequency.QuadPart; - if (currentShader) { - drawCalls[magic_enum::enum_integer(currentShader->shaderType.get())]++; - drawCalls[magic_enum::enum_integer(RE::BSShader::Type::Total)]++; - } + // Add elapsed time to the current shader type + frameTimePerType[magic_enum::enum_integer(currentShader->shaderType.get())] += elapsed; + frameTimePerType[magic_enum::enum_integer(RE::BSShader::Type::Total)] += elapsed; - if (currentShader && updateShader) { - auto type = magic_enum::enum_integer(currentShader->shaderType.get()); - if (type == magic_enum::enum_integer(RE::BSShader::Type::Utility)) { - if (currentPixelDescriptor & magic_enum::enum_integer(SIE::ShaderCache::UtilityShaderFlags::RenderShadowmask)) { - deferred->CopyShadowData(); - } - } + // Update start time for next measurement + frameStartTime = currentTime; + } - if (type > 0 && type < magic_enum::enum_integer(RE::BSShader::Type::Total)) { - if (enabledClasses[type - 1]) { - // Only check against non-shader bits - currentPixelDescriptor &= ~modifiedPixelDescriptor; + if (currentShader) { + drawCalls[magic_enum::enum_integer(currentShader->shaderType.get())]++; + drawCalls[magic_enum::enum_integer(RE::BSShader::Type::Total)]++; + } - if (frameAnnotations) { - BeginPerfEvent(std::format("Draw: CS {}::{:x}::{}", magic_enum::enum_name(currentShader->shaderType.get()), currentPixelDescriptor, currentShader->fxpFilename)); - SetPerfMarker(std::format("Defines: {}", SIE::ShaderCache::GetDefinesString(*currentShader, currentPixelDescriptor))); - EndPerfEvent(); - } - } - } - } - updateShader = false; + if (currentShader && updateShader && frameAnnotations) { + BeginPerfEvent(std::format("Draw: CS {}::{:x}::{}", magic_enum::enum_name(currentShader->shaderType.get()), permutationData.PixelShaderDescriptor, currentShader->fxpFilename)); + SetPerfMarker(std::format("Defines: {}", SIE::ShaderCache::GetDefinesString(*currentShader, permutationData.PixelShaderDescriptor))); + EndPerfEvent(); } } @@ -160,7 +123,7 @@ void State::Reset() lastPixelDescriptor = 0; lastVertexDescriptor = 0; initialized = false; - forceUpdatePermutationBuffer = true; + std::memset(&permutationDataPrevious, 0xFF, sizeof(PermutationCB)); frameCount++; } diff --git a/src/State.h b/src/State.h index 88ed451e52..9b62847e75 100644 --- a/src/State.h +++ b/src/State.h @@ -81,6 +81,7 @@ class State }; void Draw(); + void Debug(); void Reset(); void Setup(); @@ -144,13 +145,8 @@ class State uint modifiedPixelDescriptor = 0; uint lastModifiedVertexDescriptor = 0; uint lastModifiedPixelDescriptor = 0; - uint currentExtraDescriptor = 0; uint lastExtraDescriptor = 0; - uint currentExtraFeatureDescriptor = 0; uint lastExtraFeatureDescriptor = 0; - bool forceUpdatePermutationBuffer = true; - - bool isTree = false; enum class ExtraShaderDescriptors : uint32_t { @@ -158,8 +154,7 @@ class State IsReflections = 1 << 1, IsBeastRace = 1 << 2, EffectShadows = 1 << 3, - IsDecal = 1 << 4, - IsTree = 1 << 5 + IsTree = 1 << 4 }; enum class ExtraFeatureDescriptors : uint32_t @@ -177,12 +172,19 @@ class State void UpdateSharedData(bool a_inWorld, bool a_prepass); - struct alignas(16) PermutationCB + struct PermutationCB { uint VertexShaderDescriptor; uint PixelShaderDescriptor; uint ExtraShaderDescriptor; uint ExtraFeatureDescriptor; + + bool operator==(const PermutationCB& other) const + { + return PixelShaderDescriptor == other.PixelShaderDescriptor && + ExtraShaderDescriptor == other.ExtraShaderDescriptor && + ExtraFeatureDescriptor == other.ExtraFeatureDescriptor; + } }; ConstantBuffer* permutationCB = nullptr; @@ -208,6 +210,9 @@ class State ConstantBuffer* sharedDataCB = nullptr; ConstantBuffer* featureDataCB = nullptr; + PermutationCB permutationData{}; + PermutationCB permutationDataPrevious{}; + Util::FrameChecker frameChecker; uint frameCount = 0; diff --git a/src/TruePBR.cpp b/src/TruePBR.cpp index 25d0178b44..5e525212c8 100644 --- a/src/TruePBR.cpp +++ b/src/TruePBR.cpp @@ -1213,25 +1213,14 @@ struct TESForm_SetFormEditorID static inline REL::Relocation func; }; -struct SetPerFrameBuffers -{ - static void thunk(void* renderer) - { - func(renderer); - auto* singleton = globals::truePBR; - singleton->SetupFrame(); - } - static inline REL::Relocation func; -}; - struct BSTempEffectSimpleDecal_SetupGeometry { static void thunk(RE::BSTempEffectSimpleDecal* decal, RE::BSGeometry* geometry, RE::BGSTextureSet* textureSet, bool blended) { func(decal, geometry, textureSet, blended); auto* singleton = globals::truePBR; - - if (auto* shaderProperty = netimmerse_cast(geometry->GetGeometryRuntimeData().properties[1].get()); + auto unknownProperty = geometry->GetGeometryRuntimeData().properties[1].get(); + if (auto shaderProperty = unknownProperty->GetRTTI() == globals::rtti::BSLightingShaderPropertyRTTI.get() ? static_cast(unknownProperty) : nullptr; shaderProperty != nullptr && singleton->IsPBRTextureSet(textureSet)) { { BSLightingShaderMaterialPBR srcMaterial; @@ -1562,9 +1551,6 @@ void TruePBR::PostPostLoad() stl::write_vfunc<0x32, TESForm_GetFormEditorID>(RE::VTABLE_TESWeather[0]); stl::write_vfunc<0x33, TESForm_SetFormEditorID>(RE::VTABLE_TESWeather[0]); - logger::info("Hooking SetPerFrameBuffers"); - stl::detour_thunk(REL::RelocationID(75570, 77371)); - logger::info("Hooking BSTempEffectSimpleDecal"); stl::detour_thunk(REL::RelocationID(29253, 30108)); @@ -1604,10 +1590,35 @@ void TruePBR::SetupDefaultPBRLandTextureSet() void TruePBR::SetShaderResouces(ID3D11DeviceContext* a_context) { - for (uint32_t textureIndex = 0; textureIndex < ExtendedRendererState::NumPSTextures; ++textureIndex) { - if (extendedRendererState.PSResourceModifiedBits & (1 << textureIndex)) { - a_context->PSSetShaderResources(ExtendedRendererState::FirstPSTexture + textureIndex, 1, &extendedRendererState.PSTexture[textureIndex]); - } + uint32_t mask = extendedRendererState.PSResourceModifiedBits; + + if (mask == 0) [[likely]] { + // No dirty slots, early exit + return; + } + + constexpr uint32_t firstTexture = ExtendedRendererState::FirstPSTexture; + auto& textures = extendedRendererState.PSTexture; + + while (mask) { + // Find index of the least significant set bit + uint32_t batchStart = std::countr_zero(mask); + + // Check for consecutive set bits and batch them + uint32_t shiftedMask = mask >> batchStart; + uint32_t batchCount = std::countr_one(shiftedMask); + + // Issue one API call for this batch + a_context->PSSetShaderResources( + firstTexture + batchStart, + batchCount, + &textures[batchStart]); + + // Clear the bits we just processed + uint32_t clearMask = ((1u << batchCount) - 1u) << batchStart; + mask &= ~clearMask; } + + // Reset modified bits extendedRendererState.PSResourceModifiedBits = 0; -} +} \ No newline at end of file diff --git a/src/XSEPlugin.cpp b/src/XSEPlugin.cpp index 3a8aed640a..9689b31bff 100644 --- a/src/XSEPlugin.cpp +++ b/src/XSEPlugin.cpp @@ -154,7 +154,7 @@ bool Load() } if (REL::Module::IsVR()) { - REL::IDDatabase::get().IsVRAddressLibraryAtLeastVersion("0.181.0", true); + REL::IDDatabase::get().IsVRAddressLibraryAtLeastVersion("0.182.0", true); } auto privateProfileRedirectorVersion = Util::GetDllVersion(L"Data/SKSE/Plugins/PrivateProfileRedirector.dll");