Skip to content

Commit

Permalink
Expose maximum_frame_latency (#4899)
Browse files Browse the repository at this point in the history
Co-authored-by: Emil Ernerfeldt <[email protected]>
  • Loading branch information
Wumpf and emilk authored Jan 17, 2024
1 parent 2e38187 commit b8f27c7
Show file tree
Hide file tree
Showing 19 changed files with 83 additions and 70 deletions.
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -98,6 +98,7 @@ By @wumpf in [#5044](https://github.com/gfx-rs/wgpu/pull/5044)
- Added support for the float32-filterable feature. By @almarklein in [#4759](https://github.com/gfx-rs/wgpu/pull/4759)
- GPU buffer memory is released during "lose the device". By @bradwerth in [#4851](https://github.com/gfx-rs/wgpu/pull/4851)
- wgpu and wgpu-core features are now documented on docs.rs. By @wumpf in [#4886](https://github.com/gfx-rs/wgpu/pull/4886)
- `SurfaceConfiguration` now exposes `desired_maximum_frame_latency` which was previously hard-coded to 2. By setting it to 1 you can reduce latency under the risk of making GPU & CPU work sequential. Currently, on DX12 this affects the `MaximumFrameLatency`, on all other backends except OpenGL the size of the swapchain (on OpenGL this has no effect). By @emilk & @wumpf in [#4899](https://github.com/gfx-rs/wgpu/pull/4899)
- DeviceLostClosure is guaranteed to be invoked exactly once. By @bradwerth in [#4862](https://github.com/gfx-rs/wgpu/pull/4862)

#### OpenGL
Expand Down
1 change: 1 addition & 0 deletions examples/src/framework.rs
Original file line number Diff line number Diff line change
Expand Up @@ -571,6 +571,7 @@ impl<E: Example + wgpu::WasmNotSendSync> From<ExampleTestParams<E>>
format,
width: params.width,
height: params.height,
desired_maximum_frame_latency: 2,
present_mode: wgpu::PresentMode::Fifo,
alpha_mode: wgpu::CompositeAlphaMode::Auto,
view_formats: vec![format],
Expand Down
13 changes: 3 additions & 10 deletions examples/src/hello_triangle/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -72,16 +72,9 @@ async fn run(event_loop: EventLoop<()>, window: Window) {
multiview: None,
});

let mut config = wgpu::SurfaceConfiguration {
usage: wgpu::TextureUsages::RENDER_ATTACHMENT,
format: swapchain_format,
width: size.width,
height: size.height,
present_mode: wgpu::PresentMode::Fifo,
alpha_mode: swapchain_capabilities.alpha_modes[0],
view_formats: vec![],
};

let mut config = surface
.get_default_config(&adapter, size.width, size.height)
.unwrap();
surface.configure(&device, &config);

let window = &window;
Expand Down
17 changes: 4 additions & 13 deletions examples/src/hello_windows/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -30,20 +30,11 @@ impl ViewportDesc {

fn build(self, adapter: &wgpu::Adapter, device: &wgpu::Device) -> Viewport {
let size = self.window.inner_size();

let caps = self.surface.get_capabilities(adapter);
let config = wgpu::SurfaceConfiguration {
usage: wgpu::TextureUsages::RENDER_ATTACHMENT,
format: caps.formats[0],
width: size.width,
height: size.height,
present_mode: wgpu::PresentMode::Fifo,
alpha_mode: caps.alpha_modes[0],
view_formats: vec![],
};

let config = self
.surface
.get_default_config(adapter, size.width, size.height)
.unwrap();
self.surface.configure(device, &config);

Viewport { desc: self, config }
}
}
Expand Down
12 changes: 3 additions & 9 deletions examples/src/uniform_values/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -192,15 +192,9 @@ impl WgpuContext {
multiview: None,
});

let surface_config = wgpu::SurfaceConfiguration {
usage: wgpu::TextureUsages::RENDER_ATTACHMENT,
format: swapchain_format,
width: size.width,
height: size.height,
present_mode: wgpu::PresentMode::Fifo,
alpha_mode: swapchain_capabilities.alpha_modes[0],
view_formats: vec![],
};
let surface_config = surface
.get_default_config(&adapter, size.width, size.height)
.unwrap();
surface.configure(&device, &surface_config);

// (5)
Expand Down
9 changes: 5 additions & 4 deletions wgpu-core/src/device/global.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1982,10 +1982,12 @@ impl<G: GlobalIdentityHandlerFactory> Global<G> {
}
}

let num_frames = present::DESIRED_NUM_FRAMES
.clamp(*caps.swap_chain_sizes.start(), *caps.swap_chain_sizes.end());
let maximum_frame_latency = config.desired_maximum_frame_latency.clamp(
*caps.maximum_frame_latency.start(),
*caps.maximum_frame_latency.end(),
);
let mut hal_config = hal::SurfaceConfiguration {
swap_chain_size: num_frames,
maximum_frame_latency,
present_mode: config.present_mode,
composite_alpha_mode: config.alpha_mode,
format: config.format,
Expand Down Expand Up @@ -2056,7 +2058,6 @@ impl<G: GlobalIdentityHandlerFactory> Global<G> {
*presentation = Some(present::Presentation {
device: super::any_device::AnyDevice::new(device.clone()),
config: config.clone(),
num_frames,
acquired_texture: None,
});
}
Expand Down
3 changes: 0 additions & 3 deletions wgpu-core/src/present.rs
Original file line number Diff line number Diff line change
Expand Up @@ -37,14 +37,11 @@ use thiserror::Error;
use wgt::SurfaceStatus as Status;

const FRAME_TIMEOUT_MS: u32 = 1000;
pub const DESIRED_NUM_FRAMES: u32 = 3;

#[derive(Debug)]
pub(crate) struct Presentation {
pub(crate) device: AnyDevice,
pub(crate) config: wgt::SurfaceConfiguration<Vec<wgt::TextureFormat>>,
#[allow(unused)]
pub(crate) num_frames: u32,
pub(crate) acquired_texture: Option<TextureId>,
}

Expand Down
8 changes: 4 additions & 4 deletions wgpu-hal/examples/halmark/main.rs
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@ const BUNNY_SIZE: f32 = 0.15 * 256.0;
const GRAVITY: f32 = -9.8 * 100.0;
const MAX_VELOCITY: f32 = 750.0;
const COMMAND_BUFFER_PER_CONTEXT: usize = 100;
const DESIRED_FRAMES: u32 = 3;
const DESIRED_MAX_LATENCY: u32 = 2;

#[repr(C)]
#[derive(Clone, Copy)]
Expand Down Expand Up @@ -132,9 +132,9 @@ impl<A: hal::Api> Example<A> {

let window_size: (u32, u32) = window.inner_size().into();
let surface_config = hal::SurfaceConfiguration {
swap_chain_size: DESIRED_FRAMES.clamp(
*surface_caps.swap_chain_sizes.start(),
*surface_caps.swap_chain_sizes.end(),
maximum_frame_latency: DESIRED_MAX_LATENCY.clamp(
*surface_caps.maximum_frame_latency.start(),
*surface_caps.maximum_frame_latency.end(),
),
present_mode: wgt::PresentMode::Fifo,
composite_alpha_mode: wgt::CompositeAlphaMode::Opaque,
Expand Down
8 changes: 4 additions & 4 deletions wgpu-hal/examples/ray-traced-triangle/main.rs
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@ use std::{
use winit::window::WindowButtons;

const COMMAND_BUFFER_PER_CONTEXT: usize = 100;
const DESIRED_FRAMES: u32 = 3;
const DESIRED_MAX_LATENCY: u32 = 2;

/// [D3D12_RAYTRACING_INSTANCE_DESC](https://microsoft.github.io/DirectX-Specs/d3d/Raytracing.html#d3d12_raytracing_instance_desc)
/// [VkAccelerationStructureInstanceKHR](https://registry.khronos.org/vulkan/specs/1.3-extensions/man/html/VkAccelerationStructureInstanceKHR.html)
Expand Down Expand Up @@ -264,9 +264,9 @@ impl<A: hal::Api> Example<A> {
*surface_caps.formats.first().unwrap()
};
let surface_config = hal::SurfaceConfiguration {
swap_chain_size: DESIRED_FRAMES
.max(*surface_caps.swap_chain_sizes.start())
.min(*surface_caps.swap_chain_sizes.end()),
maximum_frame_latency: DESIRED_MAX_LATENCY
.max(*surface_caps.maximum_frame_latency.start())
.min(*surface_caps.maximum_frame_latency.end()),
present_mode: wgt::PresentMode::Fifo,
composite_alpha_mode: wgt::CompositeAlphaMode::Opaque,
format: surface_format,
Expand Down
4 changes: 2 additions & 2 deletions wgpu-hal/src/dx12/adapter.rs
Original file line number Diff line number Diff line change
Expand Up @@ -626,8 +626,8 @@ impl crate::Adapter<super::Api> for super::Adapter {
wgt::TextureFormat::Rgb10a2Unorm,
wgt::TextureFormat::Rgba16Float,
],
// we currently use a flip effect which supports 2..=16 buffers
swap_chain_sizes: 2..=16,
// See https://learn.microsoft.com/en-us/windows/win32/api/dxgi/nf-dxgi-idxgidevice1-setmaximumframelatency
maximum_frame_latency: 1..=16,
current_extent,
usage: crate::TextureUses::COLOR_TARGET
| crate::TextureUses::COPY_SRC
Expand Down
15 changes: 10 additions & 5 deletions wgpu-hal/src/dx12/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -660,13 +660,18 @@ impl crate::Surface<Api> for Surface {

let non_srgb_format = auxil::dxgi::conv::map_texture_format_nosrgb(config.format);

// Nvidia recommends to use 1-2 more buffers than the maximum latency
// https://developer.nvidia.com/blog/advanced-api-performance-swap-chains/
// For high latency extra buffers seems excessive, so go with a minimum of 3 and beyond that add 1.
let swap_chain_buffer = (config.maximum_frame_latency + 1).min(3);

let swap_chain = match self.swap_chain.write().take() {
//Note: this path doesn't properly re-initialize all of the things
Some(sc) => {
let raw = unsafe { sc.release_resources() };
let result = unsafe {
raw.ResizeBuffers(
config.swap_chain_size,
swap_chain_buffer,
config.extent.width,
config.extent.height,
non_srgb_format,
Expand All @@ -693,7 +698,7 @@ impl crate::Surface<Api> for Surface {
quality: 0,
},
buffer_usage: dxgitype::DXGI_USAGE_RENDER_TARGET_OUTPUT,
buffer_count: config.swap_chain_size,
buffer_count: swap_chain_buffer,
scaling: d3d12::Scaling::Stretch,
swap_effect: d3d12::SwapEffect::FlipDiscard,
flags,
Expand Down Expand Up @@ -797,11 +802,11 @@ impl crate::Surface<Api> for Surface {
| SurfaceTarget::SwapChainPanel(_) => {}
}

unsafe { swap_chain.SetMaximumFrameLatency(config.swap_chain_size) };
unsafe { swap_chain.SetMaximumFrameLatency(config.maximum_frame_latency) };
let waitable = unsafe { swap_chain.GetFrameLatencyWaitableObject() };

let mut resources = Vec::with_capacity(config.swap_chain_size as usize);
for i in 0..config.swap_chain_size {
let mut resources = Vec::with_capacity(config.maximum_frame_latency as usize);
for i in 0..config.maximum_frame_latency {
let mut resource = d3d12::Resource::null();
unsafe {
swap_chain.GetBuffer(i, &d3d12_ty::ID3D12Resource::uuidof(), resource.mut_void())
Expand Down
2 changes: 1 addition & 1 deletion wgpu-hal/src/gles/adapter.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1138,7 +1138,7 @@ impl crate::Adapter<super::Api> for super::Adapter {
vec![wgt::PresentMode::Fifo] //TODO
},
composite_alpha_modes: vec![wgt::CompositeAlphaMode::Opaque], //TODO
swap_chain_sizes: 2..=2,
maximum_frame_latency: 2..=2, //TODO, unused currently
current_extent: None,
usage: crate::TextureUses::COLOR_TARGET,
})
Expand Down
17 changes: 10 additions & 7 deletions wgpu-hal/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -922,11 +922,14 @@ pub struct SurfaceCapabilities {
/// Must be at least one.
pub formats: Vec<wgt::TextureFormat>,

/// Range for the swap chain sizes.
/// Range for the number of queued frames.
///
/// - `swap_chain_sizes.start` must be at least 1.
/// - `swap_chain_sizes.end` must be larger or equal to `swap_chain_sizes.start`.
pub swap_chain_sizes: RangeInclusive<u32>,
/// This adjusts either the swapchain frame count to value + 1 - or sets SetMaximumFrameLatency to the value given,
/// or uses a wait-for-present in the acquire method to limit rendering such that it acts like it's a value + 1 swapchain frame set.
///
/// - `maximum_frame_latency.start` must be at least 1.
/// - `maximum_frame_latency.end` must be larger or equal to `maximum_frame_latency.start`.
pub maximum_frame_latency: RangeInclusive<u32>,

/// Current extent of the surface, if known.
pub current_extent: Option<wgt::Extent3d>,
Expand Down Expand Up @@ -1252,9 +1255,9 @@ pub struct RenderPipelineDescriptor<'a, A: Api> {

#[derive(Debug, Clone)]
pub struct SurfaceConfiguration {
/// Number of textures in the swap chain. Must be in
/// `SurfaceCapabilities::swap_chain_size` range.
pub swap_chain_size: u32,
/// Maximum number of queued frames. Must be in
/// `SurfaceCapabilities::maximum_frame_latency` range.
pub maximum_frame_latency: u32,
/// Vertical synchronization mode.
pub present_mode: wgt::PresentMode,
/// Alpha composition mode.
Expand Down
11 changes: 6 additions & 5 deletions wgpu-hal/src/metal/adapter.rs
Original file line number Diff line number Diff line change
Expand Up @@ -320,13 +320,14 @@ impl crate::Adapter<super::Api> for super::Adapter {
let pc = &self.shared.private_caps;
Some(crate::SurfaceCapabilities {
formats,
//Note: this is hardcoded in `CAMetalLayer` documentation
swap_chain_sizes: if pc.can_set_maximum_drawables_count {
2..=3
// We use this here to govern the maximum number of drawables + 1.
// See https://developer.apple.com/documentation/quartzcore/cametallayer/2938720-maximumdrawablecount
maximum_frame_latency: if pc.can_set_maximum_drawables_count {
1..=2
} else {
// 3 is the default in `CAMetalLayer` documentation
// 3 is the default value for maximum drawables in `CAMetalLayer` documentation
// iOS 10.3 was tested to use 3 on iphone5s
3..=3
2..=2
},
present_modes: if pc.can_set_display_sync {
vec![wgt::PresentMode::Fifo, wgt::PresentMode::Immediate]
Expand Down
2 changes: 1 addition & 1 deletion wgpu-hal/src/metal/surface.rs
Original file line number Diff line number Diff line change
Expand Up @@ -221,7 +221,7 @@ impl crate::Surface<super::Api> for super::Surface {
}

// this gets ignored on iOS for certain OS/device combinations (iphone5s iOS 10.3)
render_layer.set_maximum_drawable_count(config.swap_chain_size as _);
render_layer.set_maximum_drawable_count(config.maximum_frame_latency as u64 + 1);
render_layer.set_drawable_size(drawable_size);
if caps.can_set_next_drawable_timeout {
let () = msg_send![*render_layer, setAllowsNextDrawableTimeout:false];
Expand Down
6 changes: 5 additions & 1 deletion wgpu-hal/src/vulkan/adapter.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1848,7 +1848,11 @@ impl crate::Adapter<super::Api> for super::Adapter {
.collect();
Some(crate::SurfaceCapabilities {
formats,
swap_chain_sizes: caps.min_image_count..=max_image_count,
// TODO: Right now we're always trunkating the swap chain
// (presumably - we're actually setting the min image count which isn't necessarily the swap chain size)
// Instead, we should use extensions when available to wait in present.
// See https://github.com/gfx-rs/wgpu/issues/2869
maximum_frame_latency: (caps.min_image_count - 1)..=(max_image_count - 1), // Note this can't underflow since both `min_image_count` is at least one and we already patched `max_image_count`.
current_extent,
usage: conv::map_vk_image_usage(caps.supported_usage_flags),
present_modes: raw_present_modes
Expand Down
2 changes: 1 addition & 1 deletion wgpu-hal/src/vulkan/device.rs
Original file line number Diff line number Diff line change
Expand Up @@ -579,7 +579,7 @@ impl super::Device {
let mut info = vk::SwapchainCreateInfoKHR::builder()
.flags(raw_flags)
.surface(surface.raw)
.min_image_count(config.swap_chain_size)
.min_image_count(config.maximum_frame_latency + 1) // TODO: https://github.com/gfx-rs/wgpu/issues/2869
.image_format(original_format)
.image_color_space(color_space)
.image_extent(vk::Extent2D {
Expand Down
21 changes: 21 additions & 0 deletions wgpu-types/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -5151,6 +5151,26 @@ pub struct SurfaceConfiguration<V> {
/// AutoNoVsync will gracefully do a designed sets of fallbacks if their primary modes are
/// unsupported.
pub present_mode: PresentMode,
/// Desired maximum number of frames that the presentation engine should queue in advance.
///
/// This is a hint to the backend implementation and will always be clamped to the supported range.
/// As a consequence, either the maximum frame latency is set directly on the swap chain,
/// or waits on present are scheduled to avoid exceeding the maximum frame latency if supported,
/// or the swap chain size is set to (max-latency + 1).
///
/// Defaults to 2 when created via `wgpu::Surface::get_default_config`.
///
/// Typical values range from 3 to 1, but higher values are possible:
/// * Choose 2 or higher for potentially smoother frame display, as it allows to be at least one frame
/// to be queued up. This typically avoids starving the GPU's work queue.
/// Higher values are useful for achieving a constant flow of frames to the display under varying load.
/// * Choose 1 for low latency from frame recording to frame display.
/// ⚠️ If the backend does not support waiting on present, this will cause the CPU to wait for the GPU
/// to finish all work related to the previous frame when calling `wgpu::Surface::get_current_texture`,
/// causing CPU-GPU serialization (i.e. when `wgpu::Surface::get_current_texture` returns, the GPU might be idle).
/// It is currently not possible to query this. See <https://github.com/gfx-rs/wgpu/issues/2869>.
/// * A value of 0 is generally not supported and always clamped to a higher value.
pub desired_maximum_frame_latency: u32,
/// Specifies how the alpha channel of the textures should be handled during compositing.
pub alpha_mode: CompositeAlphaMode,
/// Specifies what view formats will be allowed when calling create_view() on texture returned by get_current_texture().
Expand All @@ -5170,6 +5190,7 @@ impl<V: Clone> SurfaceConfiguration<V> {
width: self.width,
height: self.height,
present_mode: self.present_mode,
desired_maximum_frame_latency: self.desired_maximum_frame_latency,
alpha_mode: self.alpha_mode,
view_formats: fun(self.view_formats.clone()),
}
Expand Down
1 change: 1 addition & 0 deletions wgpu/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -4793,6 +4793,7 @@ impl Surface<'_> {
format: *caps.formats.get(0)?,
width,
height,
desired_maximum_frame_latency: 2,
present_mode: *caps.present_modes.get(0)?,
alpha_mode: wgt::CompositeAlphaMode::Auto,
view_formats: vec![],
Expand Down

0 comments on commit b8f27c7

Please sign in to comment.