diff --git a/features/Upscaling/Shaders/Upscaling/RCAS/RCAS.hlsl b/features/Upscaling/Shaders/Upscaling/RCAS/RCAS.hlsl new file mode 100644 index 0000000000..50587525b2 --- /dev/null +++ b/features/Upscaling/Shaders/Upscaling/RCAS/RCAS.hlsl @@ -0,0 +1,116 @@ +// FidelityFX Super Resolution - Robust Contrast Adaptive Sharpening (RCAS) +// Based on https://github.com/GPUOpen-Effects/FidelityFX-FSR/blob/master/ffx-fsr/ffx_fsr1.h +// +// Copyright (c) 2021 Advanced Micro Devices, Inc. All rights reserved. +// +// Permission is hereby granted, free of charge, to any person obtaining a copy +// of this software and associated documentation files(the "Software"), to deal +// in the Software without restriction, including without limitation the rights +// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +// copies of the Software, and to permit persons to whom the Software is +// furnished to do so, subject to the following conditions: +// +// The above copyright notice and this permission notice shall be included in +// all copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.IN NO EVENT SHALL THE +// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +// THE SOFTWARE. + +#define FSR_RCAS_LIMIT (0.25 - (1.0 / 16.0)) + +cbuffer RCASConfig : register(b0) +{ + float sharpness; + float3 pad; +}; + +Texture2D Source : register(t0); +RWTexture2D Dest : register(u0); + +[numthreads(8, 8, 1)] void main(uint3 DTid : SV_DispatchThreadID) +{ + uint2 texDim; + Dest.GetDimensions(texDim.x, texDim.y); + + if (DTid.x >= texDim.x || DTid.y >= texDim.y) + return; + + // Algorithm uses minimal 3x3 pixel neighborhood. + // b + // d e f + // h + int2 sp = int2(DTid.xy); + float3 b = Source.Load(int3(sp + int2(0, -1), 0)).rgb; + float3 d = Source.Load(int3(sp + int2(-1, 0), 0)).rgb; + float3 e = Source.Load(int3(sp, 0)).rgb; + float3 f = Source.Load(int3(sp + int2(1, 0), 0)).rgb; + float3 h = Source.Load(int3(sp + int2(0, 1), 0)).rgb; + + // Rename (32-bit) or regroup (16-bit). + float bR = b.r; + float bG = b.g; + float bB = b.b; + float dR = d.r; + float dG = d.g; + float dB = d.b; + float eR = e.r; + float eG = e.g; + float eB = e.b; + float fR = f.r; + float fG = f.g; + float fB = f.b; + float hR = h.r; + float hG = h.g; + float hB = h.b; + + // Luma times 2. + float bL = bB * 0.5 + (bR * 0.5 + bG); + float dL = dB * 0.5 + (dR * 0.5 + dG); + float eL = eB * 0.5 + (eR * 0.5 + eG); + float fL = fB * 0.5 + (fR * 0.5 + fG); + float hL = hB * 0.5 + (hR * 0.5 + hG); + + // Noise detection. + float nz = 0.25 * bL + 0.25 * dL + 0.25 * fL + 0.25 * hL - eL; + nz = saturate(abs(nz) * rcp(max(max(max(bL, dL), max(eL, fL)), hL) - min(min(min(bL, dL), min(eL, fL)), hL))); + nz = -0.5 * nz + 1.0; + + // Min and max of ring. + float mn4R = min(min(min(bR, dR), fR), hR); + float mn4G = min(min(min(bG, dG), fG), hG); + float mn4B = min(min(min(bB, dB), fB), hB); + float mx4R = max(max(max(bR, dR), fR), hR); + float mx4G = max(max(max(bG, dG), fG), hG); + float mx4B = max(max(max(bB, dB), fB), hB); + + // Immediate constants for peak range. + float2 peakC = float2(1.0, -1.0 * 4.0); + + // Limiters, these need to be high precision RCPs. + float hitMinR = min(mn4R, eR) * rcp(4.0 * mx4R); + float hitMinG = min(mn4G, eG) * rcp(4.0 * mx4G); + float hitMinB = min(mn4B, eB) * rcp(4.0 * mx4B); + float hitMaxR = (peakC.x - max(mx4R, eR)) * rcp(4.0 * mn4R + peakC.y); + float hitMaxG = (peakC.x - max(mx4G, eG)) * rcp(4.0 * mn4G + peakC.y); + float hitMaxB = (peakC.x - max(mx4B, eB)) * rcp(4.0 * mn4B + peakC.y); + float lobeR = max(-hitMinR, hitMaxR); + float lobeG = max(-hitMinG, hitMaxG); + float lobeB = max(-hitMinB, hitMaxB); + float lobe = max(-FSR_RCAS_LIMIT, min(max(lobeR, max(lobeG, lobeB)), 0.0)) * sharpness; + + // Apply noise removal. + lobe *= nz; + + // Resolve, which needs the medium precision rcp approximation to avoid visible tonality changes. + float rcpL = rcp(4.0 * lobe + 1.0); + float pixR = (lobe * bR + lobe * dR + lobe * hR + lobe * fR + eR) * rcpL; + float pixG = (lobe * bG + lobe * dG + lobe * hG + lobe * fG + eG) * rcpL; + float pixB = (lobe * bB + lobe * dB + lobe * hB + lobe * fB + eB) * rcpL; + + Dest[DTid.xy] = float4(pixR, pixG, pixB, 1.0); +} diff --git a/src/Features/Upscaling.cpp b/src/Features/Upscaling.cpp index 140564eca4..6cd4ab770f 100644 --- a/src/Features/Upscaling.cpp +++ b/src/Features/Upscaling.cpp @@ -497,14 +497,25 @@ void Upscaling::CreateUpscalingTextureResources(UpscaleMethod a_upscalemethod) motionVectorCopyTexture->CreateUAV(uavDesc); } - if (!nisSharpenerTexture) { - texDesc.Format = DXGI_FORMAT_R8G8B8A8_UNORM; + // RCAS sharpener texture - matches kMAIN format for HDR sharpening + if (!sharpenerTexture) { + main.texture->GetDesc(&texDesc); + main.SRV->GetDesc(&srvDesc); + + texDesc.BindFlags = D3D11_BIND_SHADER_RESOURCE | D3D11_BIND_UNORDERED_ACCESS; + srvDesc.Format = texDesc.Format; + srvDesc.ViewDimension = D3D11_SRV_DIMENSION_TEXTURE2D; + srvDesc.Texture2D.MostDetailedMip = 0; + srvDesc.Texture2D.MipLevels = 1; + uavDesc.Format = texDesc.Format; + uavDesc.ViewDimension = D3D11_UAV_DIMENSION_TEXTURE2D; + uavDesc.Texture2D.MipSlice = 0; - nisSharpenerTexture = new Texture2D(texDesc); - nisSharpenerTexture->CreateSRV(srvDesc); - nisSharpenerTexture->CreateUAV(uavDesc); + sharpenerTexture = new Texture2D(texDesc); + sharpenerTexture->CreateSRV(srvDesc); + sharpenerTexture->CreateUAV(uavDesc); } } } @@ -545,13 +556,13 @@ void Upscaling::DestroyUpscalingTextureResources(UpscaleMethod a_upscalemethod) delete motionVectorCopyTexture; motionVectorCopyTexture = nullptr; } - if (nisSharpenerTexture) { - nisSharpenerTexture->srv = nullptr; - nisSharpenerTexture->uav = nullptr; - nisSharpenerTexture->resource = nullptr; + if (sharpenerTexture) { + sharpenerTexture->srv = nullptr; + sharpenerTexture->uav = nullptr; + sharpenerTexture->resource = nullptr; - delete nisSharpenerTexture; - nisSharpenerTexture = nullptr; + delete sharpenerTexture; + sharpenerTexture = nullptr; } } } @@ -877,6 +888,8 @@ void Upscaling::SetupResources() CheckResources(GetUpscaleMethod()); + rcas.Initialize(); + if (d3d12SwapChainActive) dx12SwapChain.CreateSharedResources(); @@ -1407,32 +1420,35 @@ void Upscaling::UpscaleDepth() } } -void Upscaling::ApplyNISSharpening() +void Upscaling::ApplySharpening() { - if (!streamline.featureNIS || settings.sharpnessDLSS <= 0.0f) { + if (settings.sharpnessDLSS <= 0.0f) return; - } - auto context = globals::d3d::context; + if (!sharpenerTexture) + return; - ID3D11RenderTargetView* renderTarget = nullptr; - context->OMGetRenderTargets(1, &renderTarget, nullptr); + float currentSharpness = (-2.0f * settings.sharpnessDLSS) + 2.0f; + currentSharpness = exp2(-currentSharpness); - winrt::com_ptr mainResource; - renderTarget->GetResource(mainResource.put()); + auto context = globals::d3d::context; + auto renderer = globals::game::renderer; + auto& main = renderer->GetRuntimeData().renderTargets[RE::RENDER_TARGETS::kMAIN]; - context->OMSetRenderTargets(0, nullptr, nullptr); // Unbind all bound render targets + ID3D11Resource* mainResource = nullptr; + main.SRV->GetResource(&mainResource); - context->CopyResource(nisSharpenerTexture->resource.get(), mainResource.get()); + if (!mainResource) + return; - streamline.ApplyNISSharpening(nisSharpenerTexture->resource.get(), settings.sharpnessDLSS); + context->OMSetRenderTargets(0, nullptr, nullptr); - context->CopyResource(mainResource.get(), nisSharpenerTexture->resource.get()); + rcas.ApplySharpen(main.SRV, sharpenerTexture->uav.get(), currentSharpness); + context->CopyResource(mainResource, sharpenerTexture->resource.get()); - globals::game::stateUpdateFlags->set(RE::BSGraphics::ShaderFlags::DIRTY_RENDERTARGET); // Run OMSetRenderTargets again + mainResource->Release(); - if (renderTarget) - renderTarget->Release(); + globals::game::stateUpdateFlags->set(RE::BSGraphics::ShaderFlags::DIRTY_RENDERTARGET); } void Upscaling::Main_UpdateJitter::thunk(RE::BSGraphics::State* a_state) @@ -1459,6 +1475,9 @@ void Upscaling::Main_PostProcessing::thunk(RE::ImageSpaceManager* a_this, uint32 if (upscaleMethod != UpscaleMethod::kNONE && upscaleMethod != UpscaleMethod::kTAA) upscaling.PerformUpscaling(); + if (upscaleMethod == UpscaleMethod::kDLSS) + upscaling.ApplySharpening(); + auto imageSpaceManager = RE::ImageSpaceManager::GetSingleton(); GET_INSTANCE_MEMBER(BSImagespaceShaderISTemporalAA, imageSpaceManager); @@ -1466,10 +1485,6 @@ void Upscaling::Main_PostProcessing::thunk(RE::ImageSpaceManager* a_this, uint32 func(a_this, a3, a_target, a_4, a_5); - if (upscaleMethod == UpscaleMethod::kDLSS) - upscaling.ApplyNISSharpening(); - - // Disable TAA in some menus BSImagespaceShaderISTemporalAA->taaEnabled = false; } @@ -1503,4 +1518,4 @@ void Upscaling::BSFaceGenManager_UpdatePendingCustomizationTextures::thunk() runtimeData.dynamicResolutionLock = 1; func(); runtimeData.dynamicResolutionLock = 0; -} \ No newline at end of file +} diff --git a/src/Features/Upscaling.h b/src/Features/Upscaling.h index c5056a68c1..933e78f2bb 100644 --- a/src/Features/Upscaling.h +++ b/src/Features/Upscaling.h @@ -3,6 +3,7 @@ #include "Feature.h" #include "Upscaling/DX12SwapChain.h" #include "Upscaling/FidelityFX.h" +#include "Upscaling/RCAS/RCAS.h" #include "Upscaling/Streamline.h" #include #include @@ -55,7 +56,7 @@ struct Upscaling : Feature uint frameGenerationForceEnable = 0; uint streamlineLogLevel = 0; // 0=Off, 1=Default, 2=Verbose float sharpnessFSR = 1.0f; - float sharpnessDLSS = 0.1f; + float sharpnessDLSS = 1.0f; uint DLSSPreset = 2; // VR-specific DLSS preset: 0=F, 1=J, 2=K }; @@ -138,14 +139,15 @@ struct Upscaling : Feature Texture2D* reactiveMaskTexture = nullptr; Texture2D* transparencyCompositionMaskTexture = nullptr; Texture2D* motionVectorCopyTexture = nullptr; - Texture2D* nisSharpenerTexture = nullptr; + Texture2D* sharpenerTexture = nullptr; virtual void ClearShaderCache() override; // Static instances instead of singletons static inline Streamline streamline; - static inline FidelityFX fidelityFX; // Only for frame generation + static inline FidelityFX fidelityFX; ///< Only for frame generation static inline DX12SwapChain dx12SwapChain; + static inline RCAS rcas; ///< Standalone RCAS sharpening for DLSS winrt::com_ptr copyDepthToSharedBufferPS; @@ -162,7 +164,12 @@ struct Upscaling : Feature void PerformUpscaling(); void UpscaleDepth(); - void ApplyNISSharpening(); + /** + * @brief Applies RCAS sharpening to the main render target after DLSS upscaling. + * + * Runs in HDR space before tonemapping. Only called when DLSS is active and sharpness > 0. + */ + void ApplySharpening(); static void TimerSleepQPC(int64_t targetQPC); diff --git a/src/Features/Upscaling/RCAS/RCAS.cpp b/src/Features/Upscaling/RCAS/RCAS.cpp new file mode 100644 index 0000000000..f7af7ce2b3 --- /dev/null +++ b/src/Features/Upscaling/RCAS/RCAS.cpp @@ -0,0 +1,78 @@ +#include "RCAS.h" + +#include "../../../Deferred.h" +#include "../../../State.h" +#include "../../../Util.h" + +struct RCASConfig +{ + float sharpness; + float3 pad; +}; + +RCAS::~RCAS() +{ + delete rcasConfigCB; + rcasConfigCB = nullptr; +} + +void RCAS::Initialize() +{ + if (rcasConfigCB) + return; + + logger::info("[RCAS] Creating resources"); + CreateComputeShader(); + rcasConfigCB = new ConstantBuffer(ConstantBufferDesc()); +} + +void RCAS::CreateComputeShader() +{ + std::vector> defines; + rcasComputeShader.attach((ID3D11ComputeShader*)Util::CompileShader(L"Data\\Shaders\\Upscaling\\RCAS\\RCAS.hlsl", defines, "cs_5_0")); +} + +void RCAS::ApplySharpen(ID3D11ShaderResourceView* inputSRV, ID3D11UnorderedAccessView* outputUAV, float sharpness) +{ + auto state = globals::state; + auto context = globals::d3d::context; + + if (!rcasComputeShader) { + logger::warn("[RCAS] Compute shader not compiled"); + return; + } + + state->BeginPerfEvent("RCAS Sharpening"); + + uint32_t screenWidth = (uint32_t)state->screenSize.x; + uint32_t screenHeight = (uint32_t)state->screenSize.y; + + RCASConfig config{}; + config.sharpness = sharpness; + + rcasConfigCB->Update(config); + auto bufferArray = rcasConfigCB->CB(); + + context->CSSetShader(rcasComputeShader.get(), nullptr, 0); + context->CSSetConstantBuffers(0, 1, &bufferArray); + + ID3D11ShaderResourceView* srvs[] = { inputSRV }; + context->CSSetShaderResources(0, 1, srvs); + + ID3D11UnorderedAccessView* uavs[] = { outputUAV }; + context->CSSetUnorderedAccessViews(0, 1, uavs, nullptr); + + uint32_t dispatchX = (screenWidth + 7) / 8; + uint32_t dispatchY = (screenHeight + 7) / 8; + context->Dispatch(dispatchX, dispatchY, 1); + + ID3D11ShaderResourceView* nullSRVs[] = { nullptr }; + context->CSSetShaderResources(0, 1, nullSRVs); + + ID3D11UnorderedAccessView* nullUAVs[] = { nullptr }; + context->CSSetUnorderedAccessViews(0, 1, nullUAVs, nullptr); + + context->CSSetShader(nullptr, nullptr, 0); + + state->EndPerfEvent(); +} diff --git a/src/Features/Upscaling/RCAS/RCAS.h b/src/Features/Upscaling/RCAS/RCAS.h new file mode 100644 index 0000000000..c5b42ae251 --- /dev/null +++ b/src/Features/Upscaling/RCAS/RCAS.h @@ -0,0 +1,42 @@ +#pragma once + +#include "../../../Buffer.h" +#include "../../../State.h" + +#include +#include + +/** + * @brief Robust Contrast Adaptive Sharpening (RCAS) implementation. + * + * Standalone sharpening pass based on AMD FidelityFX FSR1 RCAS algorithm. + * Used to apply sharpening to DLSS output in HDR space before tonemapping. + */ +class RCAS +{ +public: + RCAS() = default; + ~RCAS(); + + /** + * @brief Initializes RCAS resources including compute shader and constant buffer. + * + * Safe to call multiple times - will early-out if already initialized. + */ + void Initialize(); + + /** + * @brief Applies RCAS sharpening to the input texture. + * + * @param inputTexture SRV of the texture to sharpen (typically kMAIN render target). + * @param outputUAV UAV to write sharpened result to. + * @param sharpness Sharpening strength (0.0 = no sharpening, higher = more sharp). + */ + void ApplySharpen(ID3D11ShaderResourceView* inputTexture, ID3D11UnorderedAccessView* outputUAV, float sharpness); + +private: + void CreateComputeShader(); + + winrt::com_ptr rcasComputeShader; + ConstantBuffer* rcasConfigCB = nullptr; +}; diff --git a/src/Features/Upscaling/Streamline.cpp b/src/Features/Upscaling/Streamline.cpp index e3ab84ef4a..88eb7ccf75 100644 --- a/src/Features/Upscaling/Streamline.cpp +++ b/src/Features/Upscaling/Streamline.cpp @@ -102,8 +102,8 @@ void Streamline::LoadInterposer() sl::Preferences pref; - sl::Feature featuresToLoad[] = { sl::kFeatureDLSS, sl::kFeatureNIS }; - sl::Feature featuresToLoadVR[] = { sl::kFeatureDLSS, sl::kFeatureNIS }; + sl::Feature featuresToLoad[] = { sl::kFeatureDLSS }; + sl::Feature featuresToLoadVR[] = { sl::kFeatureDLSS }; pref.featuresToLoad = REL::Module::IsVR() ? featuresToLoadVR : featuresToLoad; pref.numFeaturesToLoad = REL::Module::IsVR() ? _countof(featuresToLoadVR) : _countof(featuresToLoad); @@ -181,21 +181,7 @@ void Streamline::CheckFeatures(IDXGIAdapter* a_adapter) } } - slIsFeatureLoaded(sl::kFeatureNIS, featureNIS); - if (featureNIS) { - logger::info("[Streamline] NIS feature is loaded"); - featureNIS = slIsFeatureSupported(sl::kFeatureNIS, adapterInfo) == sl::Result::eOk; - } else { - logger::info("[Streamline] NIS feature is not loaded"); - sl::FeatureRequirements featureRequirements; - sl::Result result = slGetFeatureRequirements(sl::kFeatureNIS, featureRequirements); - if (result != sl::Result::eOk) { - logger::info("[Streamline] NIS feature failed to load due to: {}", magic_enum::enum_name(result)); - } - } - logger::info("[Streamline] DLSS {} available", featureDLSS ? "is" : "is not"); - logger::info("[Streamline] NIS {} available", featureNIS ? "is" : "is not"); } void Streamline::PostDevice() @@ -207,11 +193,6 @@ void Streamline::PostDevice() slGetFeatureFunction(sl::kFeatureDLSS, "slDLSSGetState", (void*&)slDLSSGetState); slGetFeatureFunction(sl::kFeatureDLSS, "slDLSSSetOptions", (void*&)slDLSSSetOptions); } - - if (featureNIS) { - slGetFeatureFunction(sl::kFeatureNIS, "slNISSetOptions", (void*&)slNISSetOptions); - slGetFeatureFunction(sl::kFeatureNIS, "slNISGetState", (void*&)slNISGetState); - } } /** @@ -435,41 +416,3 @@ void Streamline::DestroyDLSSResources() slDLSSSetOptions(viewport, dlssOptions); slFreeResources(sl::kFeatureDLSS, viewport); } - -void Streamline::ApplyNISSharpening(ID3D11Resource* a_texture, float sharpness) -{ - if (!featureNIS) { - return; - } - - CheckFrameConstants(); - - sl::NISOptions nisOptions{}; - nisOptions.mode = sl::NISMode::eSharpen; - nisOptions.sharpness = std::clamp(sharpness, 0.0f, 1.0f); - nisOptions.hdrMode = sl::NISHDR::eNone; - - if (SL_FAILED(result, slNISSetOptions(viewport, nisOptions))) { - logger::error("[Streamline] Could not set NIS options"); - return; - } - - auto state = globals::state; - sl::Extent fullExtent{ 0, 0, (uint)state->screenSize.x, (uint)state->screenSize.y }; - - sl::Resource colorIn = { sl::ResourceType::eTex2d, a_texture, 0 }; - sl::Resource colorOut = { sl::ResourceType::eTex2d, a_texture, 0 }; - - sl::ResourceTag colorInTag = sl::ResourceTag{ &colorIn, sl::kBufferTypeScalingInputColor, sl::ResourceLifecycle::eOnlyValidNow, &fullExtent }; - sl::ResourceTag colorOutTag = sl::ResourceTag{ &colorOut, sl::kBufferTypeScalingOutputColor, sl::ResourceLifecycle::eOnlyValidNow, &fullExtent }; - - sl::ResourceTag resourceTags[] = { colorInTag, colorOutTag }; - - slSetTag(viewport, resourceTags, _countof(resourceTags), globals::d3d::context); - - sl::ViewportHandle view(viewport); - const sl::BaseStructure* inputs[] = { &view }; - if (SL_FAILED(result, slEvaluateFeature(sl::kFeatureNIS, *frameToken, inputs, _countof(inputs), globals::d3d::context))) { - logger::error("[Streamline] Failed to evaluate NIS feature"); - } -} \ No newline at end of file diff --git a/src/Features/Upscaling/Streamline.h b/src/Features/Upscaling/Streamline.h index d1a34184d9..1397e36505 100644 --- a/src/Features/Upscaling/Streamline.h +++ b/src/Features/Upscaling/Streamline.h @@ -14,7 +14,6 @@ #include #include #include -#include #include #pragma warning(pop) @@ -32,7 +31,6 @@ class Streamline bool triedInitialization = false; bool featureDLSS = false; - bool featureNIS = false; sl::ViewportHandle viewport{ 0 }; @@ -62,10 +60,6 @@ class Streamline PFun_slDLSSGetState* slDLSSGetState{}; PFun_slDLSSSetOptions* slDLSSSetOptions{}; - // NIS specific functions - PFun_slNISSetOptions* slNISSetOptions{}; - PFun_slNISGetState* slNISGetState{}; - Util::FrameChecker frameChecker; sl::FrameToken* frameToken = nullptr; @@ -87,6 +81,4 @@ class Streamline float2 GetInputResolutionScale(uint32_t outputWidth, uint32_t outputHeight, uint32_t qualityPreset); void DestroyDLSSResources(); - - void ApplyNISSharpening(ID3D11Resource* a_texture, float sharpness); };