Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[dxvk] Add low-latency frame pacing #4654

Draft
wants to merge 5 commits into
base: master
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
52 changes: 51 additions & 1 deletion dxvk.conf
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,51 @@
# dxgi.enableHDR = True


# Frame pacing mode managing CPU-GPU synchronization.
# Defaults to "low-latency" in the draft-PR for demonstration purposes.
#
# "max-frame-latency" provides stable latency in the GPU-limit as long as
# GPU render times are stable. Latency generally is higher but offers great
# visual smoothness.
#
# "low-latency" provides lower latency in the GPU-limit and can be fine-tuned
# via dxvk.lowLatencyOffset and dxvk.lowLatencyAllowCpuFramesOverlap.
#
# "min-latency" possibly provides the lowest latency (low-latency can be
# quicker in some situations), and offers less fps in the GPU-limit
# due to stalling the GPU between frames. Generally not recommended,
# but helpful to get insights to fine-tune the low-latency mode and
# possibly is useful for running games in the CPU-limit.
#
# "low/min-latency" also supports its own fps-limiting enabled via common
# variables.
#
# Supported values: "max-frame-latency", "low-latency", "min-latency"

# dxvk.framePacing = ""


# Allows fine-tuning the low-latency frame pacing mode.
# Positive values make a frame begin later which might improve responsiveness,
# although only very slightly, but may be relevant for edge cases.
# Negative values make a frame begin earlier which might improve fps.
# Values are given in microseconds. Defaults to 0.
#
# Supported values: -10000 to 10000

# dxvk.lowLatencyOffset = 0


# Determines whether a frame is allowed to begin before finishing processing
# the cpu-part of the previous one, when low-latency frame pacing is used.
# Snappiness may be improved when disallowing overlap. On the other hand, this
# might also decrease fps in certain cases. Defaults to True.
#
# Supported values: True, False

# dxvk.lowLatencyAllowCpuFramesOverlap = True


# Expose support for dcomp swap chains with a dummy window.
#
# This is not a valid implementation of DirectComposition swapchains,
Expand Down Expand Up @@ -104,8 +149,13 @@
# The implementation will either use VK_NV_low_latency2 if supported
# by the driver, or a custom algorithm.
# - False: Disable Reflex support as well as built-in latency reduction.
# This build defaults to False to enable dxvk.framePacing. You need to
# enable Reflex manually (Auto) until we support switching back and
# forth between Reflex and the low-latency frame pacing - for example
# via the ingame options - and more critically we want to enable
# low-latency frame pacing if the game doesn't support Reflex.

# dxvk.latencySleep = Auto
# dxvk.latencySleep = False


# Tolerance for the latency sleep heuristic, in microseconds. Higher values
Expand Down
16 changes: 15 additions & 1 deletion src/d3d11/d3d11_swapchain.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
#include "d3d11_swapchain.h"

#include "../dxvk/dxvk_latency_builtin.h"
#include "../dxvk/framepacer/dxvk_framepacer.h"

#include "../util/util_win32_compat.h"

Expand Down Expand Up @@ -294,6 +295,9 @@ namespace dxvk {
if (m_latencyHud)
m_latencyHud->accumulateStats(latencyStats);

if (m_renderLatencyHud)
m_renderLatencyHud->updateLatencyTracker(m_latency);

return hr;
}

Expand Down Expand Up @@ -354,6 +358,10 @@ namespace dxvk {

if (m_presenter != nullptr)
m_presenter->setFrameRateLimit(m_targetFrameRate, GetActualFrameLatency());

FramePacer* framePacer = dynamic_cast<FramePacer*>(m_latency.ptr());
if (framePacer != nullptr)
framePacer->setTargetFrameRate(FrameRate);
}


Expand Down Expand Up @@ -599,8 +607,14 @@ namespace dxvk {
if (hud) {
hud->addItem<hud::HudClientApiItem>("api", 1, GetApiName());

if (m_latency)
if (m_latency) {
m_latencyHud = hud->addItem<hud::HudLatencyItem>("latency", 4);
FramePacer* framePacer = dynamic_cast<FramePacer*>(m_latency.ptr());
if (framePacer) {
int32_t fpsItemPos = hud->getItemPos<hud::HudFpsItem>();
m_renderLatencyHud = hud->addItem<hud::HudRenderLatencyItem>("renderlatency", fpsItemPos+1);
}
}
}

m_blitter = new DxvkSwapchainBlitter(m_device, std::move(hud));
Expand Down
3 changes: 2 additions & 1 deletion src/d3d11/d3d11_swapchain.h
Original file line number Diff line number Diff line change
Expand Up @@ -125,7 +125,8 @@ namespace dxvk {
dxvk::mutex m_frameStatisticsLock;
DXGI_VK_FRAME_STATISTICS m_frameStatistics = { };

Rc<hud::HudLatencyItem> m_latencyHud;
Rc<hud::HudLatencyItem> m_latencyHud;
Rc<hud::HudRenderLatencyItem> m_renderLatencyHud;

Rc<DxvkImageView> GetBackBufferView();

Expand Down
16 changes: 15 additions & 1 deletion src/d3d9/d3d9_swapchain.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,8 @@
#include "d3d9_hud.h"
#include "d3d9_window.h"

#include "../dxvk/framepacer/dxvk_framepacer.h"

namespace dxvk {

static uint16_t MapGammaControlPoint(float x) {
Expand Down Expand Up @@ -923,6 +925,9 @@ namespace dxvk {
if (m_latencyHud)
m_latencyHud->accumulateStats(latencyStats);

if (m_renderLatencyHud)
m_renderLatencyHud->updateLatencyTracker(m_latencyTracker);

// Rotate swap chain buffers so that the back
// buffer at index 0 becomes the front buffer.
for (uint32_t i = 1; i < m_backBuffers.size(); i++)
Expand Down Expand Up @@ -1060,8 +1065,14 @@ namespace dxvk {
if (hud) {
m_apiHud = hud->addItem<hud::HudClientApiItem>("api", 1, GetApiName());

if (m_latencyTracking)
if (m_latencyTracking) {
m_latencyHud = hud->addItem<hud::HudLatencyItem>("latency", 4);
FramePacer* framePacer = dynamic_cast<FramePacer*>(m_latencyTracker.ptr());
if (framePacer) {
int32_t fpsItemPos = hud->getItemPos<hud::HudFpsItem>();
m_renderLatencyHud = hud->addItem<hud::HudRenderLatencyItem>("renderlatency", fpsItemPos+1);
}
}

hud->addItem<hud::HudSamplerCount>("samplers", -1, m_parent);
hud->addItem<hud::HudFixedFunctionShaders>("ffshaders", -1, m_parent);
Expand Down Expand Up @@ -1112,6 +1123,9 @@ namespace dxvk {
}

m_wctx->presenter->setFrameRateLimit(frameRate, GetActualFrameLatency());
FramePacer* framePacer = dynamic_cast<FramePacer*>(m_latencyTracker.ptr());
if (framePacer != nullptr)
framePacer->setTargetFrameRate(frameRate);
m_targetFrameRate = frameRate;
}

Expand Down
5 changes: 3 additions & 2 deletions src/d3d9/d3d9_swapchain.h
Original file line number Diff line number Diff line change
Expand Up @@ -183,8 +183,9 @@ namespace dxvk {
bool m_latencyTracking = false;
Rc<DxvkLatencyTracker> m_latencyTracker = nullptr;

Rc<hud::HudClientApiItem> m_apiHud;
Rc<hud::HudLatencyItem> m_latencyHud;
Rc<hud::HudClientApiItem> m_apiHud;
Rc<hud::HudLatencyItem> m_latencyHud;
Rc<hud::HudRenderLatencyItem> m_renderLatencyHud;

std::optional<VkHdrMetadataEXT> m_hdrMetadata;
bool m_unlockAdditionalFormats = false;
Expand Down
2 changes: 1 addition & 1 deletion src/dxvk/dxvk_context.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -113,7 +113,7 @@ namespace dxvk {
void DxvkContext::beginLatencyTracking(
const Rc<DxvkLatencyTracker>& tracker,
uint64_t frameId) {
if (tracker && (!m_latencyTracker || m_latencyTracker == tracker)) {
if (tracker && m_latencyTracker != tracker) {
tracker->notifyCsRenderBegin(frameId);

m_latencyTracker = tracker;
Expand Down
5 changes: 3 additions & 2 deletions src/dxvk/dxvk_device.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
#include "dxvk_instance.h"
#include "dxvk_latency_builtin.h"
#include "dxvk_latency_reflex.h"
#include "framepacer/dxvk_framepacer.h"

namespace dxvk {

Expand Down Expand Up @@ -310,13 +311,13 @@ namespace dxvk {
Rc<DxvkLatencyTracker> DxvkDevice::createLatencyTracker(
const Rc<Presenter>& presenter) {
if (m_options.latencySleep == Tristate::False)
return nullptr;
return new FramePacer(m_options);

if (m_options.latencySleep == Tristate::Auto) {
if (m_features.nvLowLatency2)
return new DxvkReflexLatencyTrackerNv(presenter);
else
return nullptr;
return new FramePacer(m_options);
}

return new DxvkBuiltInLatencyTracker(presenter,
Expand Down
7 changes: 7 additions & 0 deletions src/dxvk/dxvk_latency.h
Original file line number Diff line number Diff line change
Expand Up @@ -128,6 +128,10 @@ namespace dxvk {
virtual void notifyCpuPresentEnd(
uint64_t frameId) = 0;

virtual void notifySubmit() { }
virtual void notifyPresent(
uint64_t frameId) { }

/**
* \brief Called when a command list is submitted to the GPU
*
Expand Down Expand Up @@ -174,6 +178,9 @@ namespace dxvk {
virtual void notifyGpuExecutionEnd(
uint64_t frameId) = 0;

virtual void notifyGpuPresentBegin(
uint64_t frameId) { }

/**
* \brief Called when presentation of a given frame finishes on the GPU
*
Expand Down
6 changes: 5 additions & 1 deletion src/dxvk/dxvk_options.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -12,12 +12,16 @@ namespace dxvk {
useRawSsbo = config.getOption<Tristate>("dxvk.useRawSsbo", Tristate::Auto);
hud = config.getOption<std::string>("dxvk.hud", "");
tearFree = config.getOption<Tristate>("dxvk.tearFree", Tristate::Auto);
latencySleep = config.getOption<Tristate>("dxvk.latencySleep", Tristate::Auto);
latencySleep = config.getOption<Tristate>("dxvk.latencySleep", Tristate::False);
latencyTolerance = config.getOption<int32_t> ("dxvk.latencyTolerance", 1000);
disableNvLowLatency2 = config.getOption<Tristate>("dxvk.disableNvLowLatency2", Tristate::Auto);
hideIntegratedGraphics = config.getOption<bool> ("dxvk.hideIntegratedGraphics", false);
zeroMappedMemory = config.getOption<bool> ("dxvk.zeroMappedMemory", false);
allowFse = config.getOption<bool> ("dxvk.allowFse", false);
framePace = config.getOption<std::string>("dxvk.framePace", "");
lowLatencyOffset = config.getOption<int32_t> ("dxvk.lowLatencyOffset", 0);
lowLatencyAllowCpuFramesOverlap
= config.getOption<bool> ("dxvk.lowLatencyAllowCpuFramesOverlap", true);
deviceFilter = config.getOption<std::string>("dxvk.deviceFilter", "");
tilerMode = config.getOption<Tristate>("dxvk.tilerMode", Tristate::Auto);
}
Expand Down
16 changes: 15 additions & 1 deletion src/dxvk/dxvk_options.h
Original file line number Diff line number Diff line change
Expand Up @@ -38,7 +38,9 @@ namespace dxvk {
Tristate tearFree = Tristate::Auto;

/// Enables latency sleep
Tristate latencySleep = Tristate::Auto;
/// Defaults to false in this build to activate the FramePacer,
/// especially for the case when the game doesn't support Reflex
Tristate latencySleep = Tristate::False;

/// Latency tolerance, in microseconds
int32_t latencyTolerance = 0u;
Expand All @@ -61,6 +63,18 @@ namespace dxvk {
/// Whether to enable tiler optimizations
Tristate tilerMode = Tristate::Auto;

/// Frame pacing
std::string framePace;

/// A value in microseconds to fine-tune the low-latency frame pacing.
/// Positive values make a frame begin later which might improve responsiveness.
/// Negative values make a frame begin earlier which might improve fps.
int32_t lowLatencyOffset;

/// Determines whether a frame is allowed to begin before finishing processing
/// the cpu-part of the previous one, when low-latency frame pacing is used.
bool lowLatencyAllowCpuFramesOverlap;

// Device name
std::string deviceFilter;
};
Expand Down
70 changes: 27 additions & 43 deletions src/dxvk/dxvk_presenter.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -259,18 +259,11 @@ namespace dxvk {
return;

if (m_device->features().khrPresentWait.presentWait) {
bool canSignal = false;

{ std::unique_lock lock(m_frameMutex);

m_lastSignaled = frameId;
canSignal = m_lastCompleted >= frameId;
}

if (canSignal)
m_signal->signal(frameId);
std::lock_guard lock(m_frameMutex);
m_lastSignaled = frameId;
m_frameCond.notify_one();
} else {
m_fpsLimiter.delay();
m_fpsLimiter.delay(tracker);
m_signal->signal(frameId);

if (tracker)
Expand Down Expand Up @@ -1210,26 +1203,25 @@ namespace dxvk {
void Presenter::runFrameThread() {
env::setThreadName("dxvk-frame");

while (true) {
PresenterFrame frame = { };
std::unique_lock lock(m_frameMutex);

while (true) {
// Wait for all GPU work for this frame to complete in order to maintain
// ordering guarantees of the frame signal w.r.t. objects being released
{ std::unique_lock lock(m_frameMutex);
m_frameCond.wait(lock, [this] {
return !m_frameQueue.empty() && m_frameQueue.front().frameId <= m_lastSignaled;
});

m_frameCond.wait(lock, [this] {
return !m_frameQueue.empty();
});
// Use a frame ID of 0 as an exit condition
PresenterFrame frame = m_frameQueue.front();

// Use a frame ID of 0 as an exit condition
frame = m_frameQueue.front();

if (!frame.frameId) {
m_frameQueue.pop();
return;
}
if (!frame.frameId) {
m_frameQueue.pop();
return;
}

lock.unlock();

// If the present operation has succeeded, actually wait for it to complete.
// Don't bother with it on MAILBOX / IMMEDIATE modes since doing so would
// restrict us to the display refresh rate on some platforms (XWayland).
Expand All @@ -1243,32 +1235,24 @@ namespace dxvk {

// Signal latency tracker right away to get more accurate
// measurements if the frame rate limiter is enabled.
if (frame.tracker) {
if (frame.tracker)
frame.tracker->notifyGpuPresentEnd(frame.frameId);
frame.tracker = nullptr;
}

// Apply FPS limiter here to align it as closely with scanout as we can,
// Apply FPS limtier here to align it as closely with scanout as we can,
// and delay signaling the frame latency event to emulate behaviour of a
// low refresh rate display as closely as we can.
m_fpsLimiter.delay();

// Wake up any thread that may be waiting for the queue to become empty
bool canSignal = false;

{ std::unique_lock lock(m_frameMutex);

m_frameQueue.pop();
m_frameDrain.notify_one();

m_lastCompleted = frame.frameId;
canSignal = m_lastSignaled >= frame.frameId;
}
m_fpsLimiter.delay(frame.tracker);
frame.tracker = nullptr;

// Always signal even on error, since failures here
// are transparent to the front-end.
if (canSignal)
m_signal->signal(frame.frameId);
m_signal->signal(frame.frameId);

// Wake up any thread that may be waiting for the queue to become empty
lock.lock();

m_frameQueue.pop();
m_frameDrain.notify_one();
}
}

Expand Down
1 change: 0 additions & 1 deletion src/dxvk/dxvk_presenter.h
Original file line number Diff line number Diff line change
Expand Up @@ -315,7 +315,6 @@ namespace dxvk {
std::queue<PresenterFrame> m_frameQueue;

uint64_t m_lastSignaled = 0u;
uint64_t m_lastCompleted = 0u;

alignas(CACHE_LINE_SIZE)
FpsLimiter m_fpsLimiter;
Expand Down
Loading