Expose maximum_frame_latency (#4899)

Co-authored-by: Emil Ernerfeldt <[email protected]>
gfx-rs · Jan 17, 2024 · b8f27c7 · b8f27c7
1 parent 2e38187
commit b8f27c7
Show file tree

Hide file tree

Showing 19 changed files with 83 additions and 70 deletions.
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -98,6 +98,7 @@ By @wumpf in [#5044](https://github.com/gfx-rs/wgpu/pull/5044)
 - Added support for the float32-filterable feature. By @almarklein in [#4759](https://github.com/gfx-rs/wgpu/pull/4759)
 - GPU buffer memory is released during "lose the device". By @bradwerth in [#4851](https://github.com/gfx-rs/wgpu/pull/4851)
 - wgpu and wgpu-core features are now documented on docs.rs. By @wumpf in [#4886](https://github.com/gfx-rs/wgpu/pull/4886)
+- `SurfaceConfiguration` now exposes `desired_maximum_frame_latency` which was previously hard-coded to 2. By setting it to 1 you can reduce latency under the risk of making GPU & CPU work sequential. Currently, on DX12 this affects the `MaximumFrameLatency`, on all other backends except OpenGL the size of the swapchain (on OpenGL this has no effect). By @emilk & @wumpf in [#4899](https://github.com/gfx-rs/wgpu/pull/4899)
 - DeviceLostClosure is guaranteed to be invoked exactly once. By @bradwerth in [#4862](https://github.com/gfx-rs/wgpu/pull/4862)
 
 #### OpenGL

diff --git a/examples/src/framework.rs b/examples/src/framework.rs
@@ -571,6 +571,7 @@ impl<E: Example + wgpu::WasmNotSendSync> From<ExampleTestParams<E>>
                         format,
                         width: params.width,
                         height: params.height,
+                        desired_maximum_frame_latency: 2,
                         present_mode: wgpu::PresentMode::Fifo,
                         alpha_mode: wgpu::CompositeAlphaMode::Auto,
                         view_formats: vec![format],

diff --git a/examples/src/hello_triangle/mod.rs b/examples/src/hello_triangle/mod.rs
@@ -72,16 +72,9 @@ async fn run(event_loop: EventLoop<()>, window: Window) {
         multiview: None,
     });
 
-    let mut config = wgpu::SurfaceConfiguration {
-        usage: wgpu::TextureUsages::RENDER_ATTACHMENT,
-        format: swapchain_format,
-        width: size.width,
-        height: size.height,
-        present_mode: wgpu::PresentMode::Fifo,
-        alpha_mode: swapchain_capabilities.alpha_modes[0],
-        view_formats: vec![],
-    };
-
+    let mut config = surface
+        .get_default_config(&adapter, size.width, size.height)
+        .unwrap();
     surface.configure(&device, &config);
 
     let window = &window;

diff --git a/examples/src/hello_windows/mod.rs b/examples/src/hello_windows/mod.rs
@@ -30,20 +30,11 @@ impl ViewportDesc {
 
     fn build(self, adapter: &wgpu::Adapter, device: &wgpu::Device) -> Viewport {
         let size = self.window.inner_size();
-
-        let caps = self.surface.get_capabilities(adapter);
-        let config = wgpu::SurfaceConfiguration {
-            usage: wgpu::TextureUsages::RENDER_ATTACHMENT,
-            format: caps.formats[0],
-            width: size.width,
-            height: size.height,
-            present_mode: wgpu::PresentMode::Fifo,
-            alpha_mode: caps.alpha_modes[0],
-            view_formats: vec![],
-        };
-
+        let config = self
+            .surface
+            .get_default_config(adapter, size.width, size.height)
+            .unwrap();
         self.surface.configure(device, &config);
-
         Viewport { desc: self, config }
     }
 }

diff --git a/examples/src/uniform_values/mod.rs b/examples/src/uniform_values/mod.rs
@@ -192,15 +192,9 @@ impl WgpuContext {
             multiview: None,
         });
 
-        let surface_config = wgpu::SurfaceConfiguration {
-            usage: wgpu::TextureUsages::RENDER_ATTACHMENT,
-            format: swapchain_format,
-            width: size.width,
-            height: size.height,
-            present_mode: wgpu::PresentMode::Fifo,
-            alpha_mode: swapchain_capabilities.alpha_modes[0],
-            view_formats: vec![],
-        };
+        let surface_config = surface
+            .get_default_config(&adapter, size.width, size.height)
+            .unwrap();
         surface.configure(&device, &surface_config);
 
         // (5)

diff --git a/wgpu-core/src/device/global.rs b/wgpu-core/src/device/global.rs
@@ -1982,10 +1982,12 @@ impl<G: GlobalIdentityHandlerFactory> Global<G> {
                     }
                 }
 
-                let num_frames = present::DESIRED_NUM_FRAMES
-                    .clamp(*caps.swap_chain_sizes.start(), *caps.swap_chain_sizes.end());
+                let maximum_frame_latency = config.desired_maximum_frame_latency.clamp(
+                    *caps.maximum_frame_latency.start(),
+                    *caps.maximum_frame_latency.end(),
+                );
                 let mut hal_config = hal::SurfaceConfiguration {
-                    swap_chain_size: num_frames,
+                    maximum_frame_latency,
                     present_mode: config.present_mode,
                     composite_alpha_mode: config.alpha_mode,
                     format: config.format,
@@ -2056,7 +2058,6 @@ impl<G: GlobalIdentityHandlerFactory> Global<G> {
                 *presentation = Some(present::Presentation {
                     device: super::any_device::AnyDevice::new(device.clone()),
                     config: config.clone(),
-                    num_frames,
                     acquired_texture: None,
                 });
             }

diff --git a/wgpu-core/src/present.rs b/wgpu-core/src/present.rs
@@ -37,14 +37,11 @@ use thiserror::Error;
 use wgt::SurfaceStatus as Status;
 
 const FRAME_TIMEOUT_MS: u32 = 1000;
-pub const DESIRED_NUM_FRAMES: u32 = 3;
 
 #[derive(Debug)]
 pub(crate) struct Presentation {
     pub(crate) device: AnyDevice,
     pub(crate) config: wgt::SurfaceConfiguration<Vec<wgt::TextureFormat>>,
-    #[allow(unused)]
-    pub(crate) num_frames: u32,
     pub(crate) acquired_texture: Option<TextureId>,
 }
 

diff --git a/wgpu-hal/examples/halmark/main.rs b/wgpu-hal/examples/halmark/main.rs
@@ -23,7 +23,7 @@ const BUNNY_SIZE: f32 = 0.15 * 256.0;
 const GRAVITY: f32 = -9.8 * 100.0;
 const MAX_VELOCITY: f32 = 750.0;
 const COMMAND_BUFFER_PER_CONTEXT: usize = 100;
-const DESIRED_FRAMES: u32 = 3;
+const DESIRED_MAX_LATENCY: u32 = 2;
 
 #[repr(C)]
 #[derive(Clone, Copy)]
@@ -132,9 +132,9 @@ impl<A: hal::Api> Example<A> {
 
         let window_size: (u32, u32) = window.inner_size().into();
         let surface_config = hal::SurfaceConfiguration {
-            swap_chain_size: DESIRED_FRAMES.clamp(
-                *surface_caps.swap_chain_sizes.start(),
-                *surface_caps.swap_chain_sizes.end(),
+            maximum_frame_latency: DESIRED_MAX_LATENCY.clamp(
+                *surface_caps.maximum_frame_latency.start(),
+                *surface_caps.maximum_frame_latency.end(),
             ),
             present_mode: wgt::PresentMode::Fifo,
             composite_alpha_mode: wgt::CompositeAlphaMode::Opaque,

diff --git a/wgpu-hal/examples/ray-traced-triangle/main.rs b/wgpu-hal/examples/ray-traced-triangle/main.rs
@@ -14,7 +14,7 @@ use std::{
 use winit::window::WindowButtons;
 
 const COMMAND_BUFFER_PER_CONTEXT: usize = 100;
-const DESIRED_FRAMES: u32 = 3;
+const DESIRED_MAX_LATENCY: u32 = 2;
 
 /// [D3D12_RAYTRACING_INSTANCE_DESC](https://microsoft.github.io/DirectX-Specs/d3d/Raytracing.html#d3d12_raytracing_instance_desc)
 /// [VkAccelerationStructureInstanceKHR](https://registry.khronos.org/vulkan/specs/1.3-extensions/man/html/VkAccelerationStructureInstanceKHR.html)
@@ -264,9 +264,9 @@ impl<A: hal::Api> Example<A> {
             *surface_caps.formats.first().unwrap()
         };
         let surface_config = hal::SurfaceConfiguration {
-            swap_chain_size: DESIRED_FRAMES
-                .max(*surface_caps.swap_chain_sizes.start())
-                .min(*surface_caps.swap_chain_sizes.end()),
+            maximum_frame_latency: DESIRED_MAX_LATENCY
+                .max(*surface_caps.maximum_frame_latency.start())
+                .min(*surface_caps.maximum_frame_latency.end()),
             present_mode: wgt::PresentMode::Fifo,
             composite_alpha_mode: wgt::CompositeAlphaMode::Opaque,
             format: surface_format,

diff --git a/wgpu-hal/src/dx12/adapter.rs b/wgpu-hal/src/dx12/adapter.rs
@@ -626,8 +626,8 @@ impl crate::Adapter<super::Api> for super::Adapter {
                 wgt::TextureFormat::Rgb10a2Unorm,
                 wgt::TextureFormat::Rgba16Float,
             ],
-            // we currently use a flip effect which supports 2..=16 buffers
-            swap_chain_sizes: 2..=16,
+            // See https://learn.microsoft.com/en-us/windows/win32/api/dxgi/nf-dxgi-idxgidevice1-setmaximumframelatency
+            maximum_frame_latency: 1..=16,
             current_extent,
             usage: crate::TextureUses::COLOR_TARGET
                 | crate::TextureUses::COPY_SRC

diff --git a/wgpu-hal/src/dx12/mod.rs b/wgpu-hal/src/dx12/mod.rs
@@ -660,13 +660,18 @@ impl crate::Surface<Api> for Surface {
 
         let non_srgb_format = auxil::dxgi::conv::map_texture_format_nosrgb(config.format);
 
+        // Nvidia recommends to use 1-2 more buffers than the maximum latency
+        // https://developer.nvidia.com/blog/advanced-api-performance-swap-chains/
+        // For high latency extra buffers seems excessive, so go with a minimum of 3 and beyond that add 1.
+        let swap_chain_buffer = (config.maximum_frame_latency + 1).min(3);
+
         let swap_chain = match self.swap_chain.write().take() {
             //Note: this path doesn't properly re-initialize all of the things
             Some(sc) => {
                 let raw = unsafe { sc.release_resources() };
                 let result = unsafe {
                     raw.ResizeBuffers(
-                        config.swap_chain_size,
+                        swap_chain_buffer,
                         config.extent.width,
                         config.extent.height,
                         non_srgb_format,
@@ -693,7 +698,7 @@ impl crate::Surface<Api> for Surface {
                         quality: 0,
                     },
                     buffer_usage: dxgitype::DXGI_USAGE_RENDER_TARGET_OUTPUT,
-                    buffer_count: config.swap_chain_size,
+                    buffer_count: swap_chain_buffer,
                     scaling: d3d12::Scaling::Stretch,
                     swap_effect: d3d12::SwapEffect::FlipDiscard,
                     flags,
@@ -797,11 +802,11 @@ impl crate::Surface<Api> for Surface {
             | SurfaceTarget::SwapChainPanel(_) => {}
         }
 
-        unsafe { swap_chain.SetMaximumFrameLatency(config.swap_chain_size) };
+        unsafe { swap_chain.SetMaximumFrameLatency(config.maximum_frame_latency) };
         let waitable = unsafe { swap_chain.GetFrameLatencyWaitableObject() };
 
-        let mut resources = Vec::with_capacity(config.swap_chain_size as usize);
-        for i in 0..config.swap_chain_size {
+        let mut resources = Vec::with_capacity(config.maximum_frame_latency as usize);
+        for i in 0..config.maximum_frame_latency {
             let mut resource = d3d12::Resource::null();
             unsafe {
                 swap_chain.GetBuffer(i, &d3d12_ty::ID3D12Resource::uuidof(), resource.mut_void())

diff --git a/wgpu-hal/src/gles/adapter.rs b/wgpu-hal/src/gles/adapter.rs
@@ -1138,7 +1138,7 @@ impl crate::Adapter<super::Api> for super::Adapter {
                     vec![wgt::PresentMode::Fifo] //TODO
                 },
                 composite_alpha_modes: vec![wgt::CompositeAlphaMode::Opaque], //TODO
-                swap_chain_sizes: 2..=2,
+                maximum_frame_latency: 2..=2, //TODO, unused currently
                 current_extent: None,
                 usage: crate::TextureUses::COLOR_TARGET,
             })

diff --git a/wgpu-hal/src/lib.rs b/wgpu-hal/src/lib.rs
@@ -922,11 +922,14 @@ pub struct SurfaceCapabilities {
     /// Must be at least one.
     pub formats: Vec<wgt::TextureFormat>,
 
-    /// Range for the swap chain sizes.
+    /// Range for the number of queued frames.
     ///
-    /// - `swap_chain_sizes.start` must be at least 1.
-    /// - `swap_chain_sizes.end` must be larger or equal to `swap_chain_sizes.start`.
-    pub swap_chain_sizes: RangeInclusive<u32>,
+    /// This adjusts either the swapchain frame count to value + 1 - or sets SetMaximumFrameLatency to the value given,
+    /// or uses a wait-for-present in the acquire method to limit rendering such that it acts like it's a value + 1 swapchain frame set.
+    ///
+    /// - `maximum_frame_latency.start` must be at least 1.
+    /// - `maximum_frame_latency.end` must be larger or equal to `maximum_frame_latency.start`.
+    pub maximum_frame_latency: RangeInclusive<u32>,
 
     /// Current extent of the surface, if known.
     pub current_extent: Option<wgt::Extent3d>,
@@ -1252,9 +1255,9 @@ pub struct RenderPipelineDescriptor<'a, A: Api> {
 
 #[derive(Debug, Clone)]
 pub struct SurfaceConfiguration {
-    /// Number of textures in the swap chain. Must be in
-    /// `SurfaceCapabilities::swap_chain_size` range.
-    pub swap_chain_size: u32,
+    /// Maximum number of queued frames. Must be in
+    /// `SurfaceCapabilities::maximum_frame_latency` range.
+    pub maximum_frame_latency: u32,
     /// Vertical synchronization mode.
     pub present_mode: wgt::PresentMode,
     /// Alpha composition mode.

diff --git a/wgpu-hal/src/metal/adapter.rs b/wgpu-hal/src/metal/adapter.rs
@@ -320,13 +320,14 @@ impl crate::Adapter<super::Api> for super::Adapter {
         let pc = &self.shared.private_caps;
         Some(crate::SurfaceCapabilities {
             formats,
-            //Note: this is hardcoded in `CAMetalLayer` documentation
-            swap_chain_sizes: if pc.can_set_maximum_drawables_count {
-                2..=3
+            // We use this here to govern the maximum number of drawables + 1.
+            // See https://developer.apple.com/documentation/quartzcore/cametallayer/2938720-maximumdrawablecount
+            maximum_frame_latency: if pc.can_set_maximum_drawables_count {
+                1..=2
             } else {
-                // 3 is the default in `CAMetalLayer` documentation
+                // 3 is the default value for maximum drawables in `CAMetalLayer` documentation
                 // iOS 10.3 was tested to use 3 on iphone5s
-                3..=3
+                2..=2
             },
             present_modes: if pc.can_set_display_sync {
                 vec![wgt::PresentMode::Fifo, wgt::PresentMode::Immediate]

diff --git a/wgpu-hal/src/metal/surface.rs b/wgpu-hal/src/metal/surface.rs
@@ -221,7 +221,7 @@ impl crate::Surface<super::Api> for super::Surface {
         }
 
         // this gets ignored on iOS for certain OS/device combinations (iphone5s iOS 10.3)
-        render_layer.set_maximum_drawable_count(config.swap_chain_size as _);
+        render_layer.set_maximum_drawable_count(config.maximum_frame_latency as u64 + 1);
         render_layer.set_drawable_size(drawable_size);
         if caps.can_set_next_drawable_timeout {
             let () = msg_send![*render_layer, setAllowsNextDrawableTimeout:false];

diff --git a/wgpu-hal/src/vulkan/adapter.rs b/wgpu-hal/src/vulkan/adapter.rs
@@ -1848,7 +1848,11 @@ impl crate::Adapter<super::Api> for super::Adapter {
             .collect();
         Some(crate::SurfaceCapabilities {
             formats,
-            swap_chain_sizes: caps.min_image_count..=max_image_count,
+            // TODO: Right now we're always trunkating the swap chain
+            // (presumably - we're actually setting the min image count which isn't necessarily the swap chain size)
+            // Instead, we should use extensions when available to wait in present.
+            // See https://github.com/gfx-rs/wgpu/issues/2869
+            maximum_frame_latency: (caps.min_image_count - 1)..=(max_image_count - 1), // Note this can't underflow since both `min_image_count` is at least one and we already patched `max_image_count`.
             current_extent,
             usage: conv::map_vk_image_usage(caps.supported_usage_flags),
             present_modes: raw_present_modes

diff --git a/wgpu-hal/src/vulkan/device.rs b/wgpu-hal/src/vulkan/device.rs
@@ -579,7 +579,7 @@ impl super::Device {
         let mut info = vk::SwapchainCreateInfoKHR::builder()
             .flags(raw_flags)
             .surface(surface.raw)
-            .min_image_count(config.swap_chain_size)
+            .min_image_count(config.maximum_frame_latency + 1) // TODO: https://github.com/gfx-rs/wgpu/issues/2869
             .image_format(original_format)
             .image_color_space(color_space)
             .image_extent(vk::Extent2D {

diff --git a/wgpu-types/src/lib.rs b/wgpu-types/src/lib.rs
@@ -5151,6 +5151,26 @@ pub struct SurfaceConfiguration<V> {
     /// AutoNoVsync will gracefully do a designed sets of fallbacks if their primary modes are
     /// unsupported.
     pub present_mode: PresentMode,
+    /// Desired maximum number of frames that the presentation engine should queue in advance.
+    ///
+    /// This is a hint to the backend implementation and will always be clamped to the supported range.
+    /// As a consequence, either the maximum frame latency is set directly on the swap chain,
+    /// or waits on present are scheduled to avoid exceeding the maximum frame latency if supported,
+    /// or the swap chain size is set to (max-latency + 1).
+    ///
+    /// Defaults to 2 when created via `wgpu::Surface::get_default_config`.
+    ///
+    /// Typical values range from 3 to 1, but higher values are possible:
+    /// * Choose 2 or higher for potentially smoother frame display, as it allows to be at least one frame
+    /// to be queued up. This typically avoids starving the GPU's work queue.
+    /// Higher values are useful for achieving a constant flow of frames to the display under varying load.
+    /// * Choose 1 for low latency from frame recording to frame display.
+    /// ⚠️ If the backend does not support waiting on present, this will cause the CPU to wait for the GPU
+    /// to finish all work related to the previous frame when calling `wgpu::Surface::get_current_texture`,
+    /// causing CPU-GPU serialization (i.e. when `wgpu::Surface::get_current_texture` returns, the GPU might be idle).
+    /// It is currently not possible to query this. See <https://github.com/gfx-rs/wgpu/issues/2869>.
+    /// * A value of 0 is generally not supported and always clamped to a higher value.
+    pub desired_maximum_frame_latency: u32,
     /// Specifies how the alpha channel of the textures should be handled during compositing.
     pub alpha_mode: CompositeAlphaMode,
     /// Specifies what view formats will be allowed when calling create_view() on texture returned by get_current_texture().
@@ -5170,6 +5190,7 @@ impl<V: Clone> SurfaceConfiguration<V> {
             width: self.width,
             height: self.height,
             present_mode: self.present_mode,
+            desired_maximum_frame_latency: self.desired_maximum_frame_latency,
             alpha_mode: self.alpha_mode,
             view_formats: fun(self.view_formats.clone()),
         }

diff --git a/wgpu/src/lib.rs b/wgpu/src/lib.rs
@@ -4793,6 +4793,7 @@ impl Surface<'_> {
             format: *caps.formats.get(0)?,
             width,
             height,
+            desired_maximum_frame_latency: 2,
             present_mode: *caps.present_modes.get(0)?,
             alpha_mode: wgt::CompositeAlphaMode::Auto,
             view_formats: vec![],