diff --git a/src/Menu/AdvancedSettingsRenderer.cpp b/src/Menu/AdvancedSettingsRenderer.cpp index 85310a0ec4..03695094c5 100644 --- a/src/Menu/AdvancedSettingsRenderer.cpp +++ b/src/Menu/AdvancedSettingsRenderer.cpp @@ -659,6 +659,43 @@ void AdvancedSettingsRenderer::RenderDeveloperSection() ImGui::Text("Enable detailed frame annotations for debugging render passes and draw calls."); } + // Half-precision (partial precision) shader compile flag + bool partialPrecision = globals::state->enablePartialPrecision.load(std::memory_order_relaxed); + if (ImGui::Checkbox("Half Precision (Partial Precision)", &partialPrecision)) { + globals::state->enablePartialPrecision.store(partialPrecision, std::memory_order_relaxed); + // Force a recompile so the flag actually takes effect on subsequent shader builds. + globals::shaderCache->Clear(); + } + if (auto _tt = Util::HoverTooltipWrapper()) { + ImGui::Text( + "Adds D3DCOMPILE_PARTIAL_PRECISION to the shader compiler flags.\n" + "Lets fxc downgrade unmarked float ops to FP16 where it can prove safety, " + "on top of the existing min16float type hints.\n" + "On FP16-capable GPUs (Pascal+ / GCN+ / Skylake+) this can halve register " + "pressure and double ALU throughput, but it can also introduce minor visual " + "differences in shaders that haven't been audited for precision sensitivity.\n" + "Toggling this clears the shader cache and triggers a full recompile."); + } + + // Avoid flow control compiler flag (transient — not saved to config because the + // right setting depends on the current scene, not the user). + bool avoidFlowControl = globals::state->enableAvoidFlowControl.load(std::memory_order_relaxed); + if (ImGui::Checkbox("Avoid Flow Control", &avoidFlowControl)) { + globals::state->enableAvoidFlowControl.store(avoidFlowControl, std::memory_order_relaxed); + // Force a recompile so the flag actually takes effect on subsequent shader builds. + globals::shaderCache->Clear(); + } + if (auto _tt = Util::HoverTooltipWrapper()) { + ImGui::Text( + "Adds D3DCOMPILE_AVOID_FLOW_CONTROL to the shader compiler flags.\n" + "Forces fxc to flatten branches into predicated ops rather than emitting " + "dynamic flow control. Often a win for short branch bodies and uniformly-" + "taken branches; usually a loss for long divergent branches that vanilla " + "flow control would skip entirely.\n" + "Resets every launch. Toggling this clears the shader cache and triggers a " + "full recompile."); + } + ImGui::Spacing(); ImGui::Separator(); ImGui::Spacing(); diff --git a/src/State.cpp b/src/State.cpp index c0ba1574f4..91c3e93256 100644 --- a/src/State.cpp +++ b/src/State.cpp @@ -437,6 +437,7 @@ void State::SaveToJson(nlohmann::json& settings) advanced["Background Compiler Threads"] = shaderCache->backgroundCompilationThreadCount; advanced["Use FileWatcher"] = shaderCache->UseFileWatcher(); advanced["Frame Annotations"] = frameAnnotations; + advanced["Partial Precision"] = enablePartialPrecision.load(std::memory_order_relaxed); settings["Advanced"] = advanced; json general; @@ -511,6 +512,8 @@ void State::LoadFromJson(nlohmann::json& settings) shaderCache->SetFileWatcher(advanced["Use FileWatcher"]); if (advanced.contains("Frame Annotations") && advanced["Frame Annotations"].is_boolean()) frameAnnotations = advanced["Frame Annotations"]; + if (advanced.contains("Partial Precision") && advanced["Partial Precision"].is_boolean()) + enablePartialPrecision.store(advanced["Partial Precision"].get(), std::memory_order_relaxed); } if (settings.contains("General") && settings["General"].is_object()) { diff --git a/src/State.h b/src/State.h index f7877166d5..16018dda7f 100644 --- a/src/State.h +++ b/src/State.h @@ -5,6 +5,7 @@ #include #include +#include #include #include @@ -141,6 +142,21 @@ class State bool frameAnnotations = false; + // Pass D3DCOMPILE_PARTIAL_PRECISION to fxc. With explicit min16float types this is + // mostly belt-and-braces in SM5, but it lets the compiler downgrade unmarked float + // ops to FP16 where it can prove safety. On by default; toggle off when reversing + // shaders or chasing a precision bug. + // Atomic: written from the UI thread, read from compilation pool workers. + std::atomic_bool enablePartialPrecision{ false }; + + // Pass D3DCOMPILE_AVOID_FLOW_CONTROL to fxc. Forces the compiler to flatten branches + // into predicated ops instead of using dynamic flow control. Can win on uniform-branch + // or short-body branches; can lose on long divergent branches that vanilla flow + // control would skip. Transient (session-only); not saved to config because the + // right setting depends on the current scene/work, not the user. + // Atomic: written from the UI thread, read from compilation pool workers. + std::atomic_bool enableAvoidFlowControl{ false }; + uint lastVertexDescriptor = 0; uint lastPixelDescriptor = 0; uint modifiedVertexDescriptor = 0; diff --git a/src/Utils/D3D.cpp b/src/Utils/D3D.cpp index 042d2718b4..202e78467e 100644 --- a/src/Utils/D3D.cpp +++ b/src/Utils/D3D.cpp @@ -1,6 +1,7 @@ #include "D3D.h" #include "Features/TerrainBlending.h" +#include "ShaderCache.h" #include "State.h" #include "Utils/Format.h" #include @@ -193,6 +194,15 @@ namespace Util // Compiler setup uint32_t flags = !globals::state->IsDeveloperMode() ? (D3DCOMPILE_ENABLE_STRICTNESS | D3DCOMPILE_OPTIMIZATION_LEVEL3) : D3DCOMPILE_DEBUG; + if (globals::state->enablePartialPrecision.load(std::memory_order_relaxed)) + flags |= D3DCOMPILE_PARTIAL_PRECISION; + if (globals::state->enableAvoidFlowControl.load(std::memory_order_relaxed)) + flags |= D3DCOMPILE_AVOID_FLOW_CONTROL; + // Disk cache on = user is running shipped, known-good shaders — skip the fxc + // validation pass to trim compile time. Disk cache off = dev workflow, keep + // validation so malformed source produces a clean error instead of UB. + if (globals::shaderCache->IsDiskCache()) + flags |= D3DCOMPILE_SKIP_VALIDATION; ID3DBlob* shaderBlob; ID3DBlob* shaderErrors;