Move subgroup size handling in line with spec #8609

ErichDonGubler · 2025-12-03T16:57:11Z

Q: Is it possible to use more than this on some machines? If so, do we have any follow-up work for permitting larger groups?

No - the max observed subgroup size in the wild is 128 on qualcomm chips (and WARP). Similarly WARP supports the same range (4 to 128) so you can test with the full range of subgroups in the wild.

-Original file line number
+Diff line change
@@ Expand Up / @@ -140,6 +140,23 @@ event happens. Our new log policy is as follows: @@
     By @cwfitzgerald in [#8579](https://github.com/gfx-rs/wgpu/pull/8579).
+    #### `subgroup_{min,max}_size` renamed and moved from `Limits` -> `AdapterInfo`
+    To bring our code in line with the WebGPU spec, we have moved information about subgroup size
+    from limits to adapter info. Limits was not the correct place for this anyway, and we had some
+    code special casing those limits.
+    Additionally we have renamed the fields to match the spec.
+    ```diff
+    - let min = limits.min_subgroup_size;
+    + let min = info.subgroup_min_size;
+    - let max = limits.max_subgroup_size;
+    + let max = info.subgroup_max_size;
+    ```
+    By @cwfitzgerald in [#8609](https://github.com/gfx-rs/wgpu/pull/8609).
     ### New Features
     - Added support for transient textures on Vulkan and Metal. By @opstic in [#8247](https://github.com/gfx-rs/wgpu/pull/8247)
@@ Expand Down @@

-Original file line number
+Diff line change
@@ Expand Up / @@ -80,13 +80,8 @@ impl GPUAdapter { @@
       fn info(&self, scope: &mut v8::HandleScope) -> v8::Global<v8::Object> {
         self.info.get(scope, |_| {
           let info = self.instance.adapter_get_info(self.id);
-          let limits = self.instance.adapter_limits(self.id);
-          GPUAdapterInfo {
-            info,
-            subgroup_min_size: limits.min_subgroup_size,
-            subgroup_max_size: limits.max_subgroup_size,
-          }
+          GPUAdapterInfo { info }
         })
       }
@@ Expand Down Expand Up / @@ -429,8 +424,6 @@ impl GPUSupportedFeatures { @@
     pub struct GPUAdapterInfo {
       pub info: wgpu_types::AdapterInfo,
-      pub subgroup_min_size: u32,
-      pub subgroup_max_size: u32,
     }
     impl GarbageCollected for GPUAdapterInfo {
@@ Expand Down Expand Up / @@ -473,12 +466,12 @@ impl GPUAdapterInfo { @@
       #[getter]
       fn subgroup_min_size(&self) -> u32 {
-        self.subgroup_min_size
+        self.info.subgroup_min_size
       }
       #[getter]
       fn subgroup_max_size(&self) -> u32 {
-        self.subgroup_max_size
+        self.info.subgroup_max_size
       }
       #[getter]
@@ Expand Down @@

-Original file line number
+Diff line change
@@ Expand Up / @@ -119,13 +119,8 @@ impl GPUDevice { @@
       ) -> v8::Global<v8::Object> {
         self.adapter_info.get(scope, |_| {
           let info = self.instance.adapter_get_info(self.adapter);
-          let limits = self.instance.adapter_limits(self.adapter);
-          GPUAdapterInfo {
-            info,
-            subgroup_min_size: limits.min_subgroup_size,
-            subgroup_max_size: limits.max_subgroup_size,
-          }
+          GPUAdapterInfo { info }
         })
       }
@@ Expand Down @@

-Original file line number
+Diff line change
@@ Expand Up / @@ -323,6 +323,8 @@ mod tests { @@
             driver: String::new(),
             driver_info: String::new(),
             backend: wgt::Backend::Vulkan,
+            subgroup_min_size: 32,
+            subgroup_max_size: 32,
             transient_saves_memory: true,
         };
@@ Expand Down @@

-Original file line number
+Diff line change
@@ Expand Up / @@ -116,6 +116,15 @@ impl super::Adapter { @@
             }
             .unwrap();
+            let mut features1 = Direct3D12::D3D12_FEATURE_DATA_D3D12_OPTIONS1::default();
+            let hr = unsafe {
+                device.CheckFeatureSupport(
+                    Direct3D12::D3D12_FEATURE_D3D12_OPTIONS1,
+                    <*mut _>::cast(&mut features1),
+                    size_of_val(&features1) as u32,
+                )
+            };
             let driver_version = unsafe { adapter.CheckInterfaceSupport(&Dxgi::IDXGIDevice::IID) }
                 .ok()
                 .map(|i| {
@@ Expand Down Expand Up / @@ -156,6 +165,8 @@ impl super::Adapter { @@
                     driver_version.0, driver_version.1, driver_version.2, driver_version.3
                 ),
                 driver_info: String::new(),
+                subgroup_min_size: features1.WaveLaneCountMin,
+                subgroup_max_size: features1.WaveLaneCountMax,
                 transient_saves_memory: false,
             };
@@ Expand Down Expand Up / @@ -483,15 +494,6 @@ impl super::Adapter { @@
             };
             features.set(wgt::Features::TEXTURE_FORMAT_P010, p010_format_supported);
-            let mut features1 = Direct3D12::D3D12_FEATURE_DATA_D3D12_OPTIONS1::default();
-            let hr = unsafe {
-                device.CheckFeatureSupport(
-                    Direct3D12::D3D12_FEATURE_D3D12_OPTIONS1,
-                    <*mut _>::cast(&mut features1),
-                    size_of_val(&features1) as u32,
-                )
-            };
             features.set(
                 wgt::Features::SHADER_INT64,
                 shader_model >= naga::back::hlsl::ShaderModel::V6_0
@@ Expand Down Expand Up / @@ -694,8 +696,6 @@ impl super::Adapter { @@
                             .min(crate::MAX_VERTEX_BUFFERS as u32),
                         max_vertex_attributes: Direct3D12::D3D12_IA_VERTEX_INPUT_RESOURCE_SLOT_COUNT,
                         max_vertex_buffer_array_stride: Direct3D12::D3D12_SO_BUFFER_MAX_STRIDE_IN_BYTES,
-                        min_subgroup_size: 4, // Not using `features1.WaveLaneCountMin` as it is unreliable
-                        max_subgroup_size: 128,
                         // The immediates are part of the root signature which
                         // has a limit of 64 DWORDS (256 bytes), but other resources
                         // also share the root signature:
@@ Expand Down @@

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

Move subgroup size handling in line with spec #8609

Uh oh!

Diff view

Diff view

There are no files selected for viewing

ErichDonGubler Dec 3, 2025

Uh oh!

cwfitzgerald Dec 4, 2025

Uh oh!

Uh oh!

Uh oh!

Uh oh!

-Original file line number
+Diff line change
@@ Expand Up / @@ -190,6 +190,8 @@ impl super::Adapter { @@
                 device_pci_bus_id: String::new(),
                 driver_info: version,
                 backend: wgt::Backend::Gl,
+                subgroup_min_size: wgt::MINIMUM_SUBGROUP_MIN_SIZE,
+                subgroup_max_size: wgt::MAXIMUM_SUBGROUP_MAX_SIZE,
                 transient_saves_memory: false,
             }
         }
@@ Expand Down Expand Up / @@ -751,8 +753,6 @@ impl super::Adapter { @@
                 } else {
                     !0
                 },
-                min_subgroup_size: 0,
-                max_subgroup_size: 0,
                 max_immediate_size: super::MAX_IMMEDIATES as u32 * 4,
                 min_uniform_buffer_offset_alignment,
                 min_storage_buffer_offset_alignment,
@@ Expand Down @@

-Original file line number
+Diff line change
@@ Expand Up / @@ -1100,8 +1100,6 @@ impl super::PrivateCapabilities { @@
                     max_vertex_buffers: self.max_vertex_buffers,
                     max_vertex_attributes: 31,
                     max_vertex_buffer_array_stride: base.max_vertex_buffer_array_stride,
-                    min_subgroup_size: 4,
-                    max_subgroup_size: 64,
                     max_immediate_size: 0x1000,
                     min_uniform_buffer_offset_alignment: self.buffer_alignment as u32,
                     min_storage_buffer_offset_alignment: self.buffer_alignment as u32,
@@ Expand Down @@

-Original file line number
+Diff line change
@@ Expand Up / @@ -160,6 +160,12 @@ impl crate::Instance for Instance { @@
                             driver: String::new(),
                             driver_info: String::new(),
                             backend: wgt::Backend::Metal,
+                            // These are hardcoded based on typical values for Metal devices
+                            //
+                            // See <https://github.com/gpuweb/gpuweb/blob/main/proposals/subgroups.md#adapter-info>
+                            // for more information.
+                            subgroup_min_size: 4,
+                            subgroup_max_size: 64,
                             transient_saves_memory: shared.private_caps.supports_memoryless_storage,
                         },
                         features: shared.private_caps.features(),
@@ Expand Down @@

-Original file line number
+Diff line change
@@ Expand Up / @@ -141,6 +141,8 @@ pub fn adapter_info() -> wgt::AdapterInfo { @@
             driver: String::from("wgpu"),
             driver_info: String::new(),
             backend: wgt::Backend::Noop,
+            subgroup_min_size: wgt::MINIMUM_SUBGROUP_MIN_SIZE,
+            subgroup_max_size: wgt::MAXIMUM_SUBGROUP_MAX_SIZE,
             transient_saves_memory: false,
         }
     }
@@ Expand Down Expand Up / @@ -189,8 +191,6 @@ pub const CAPABILITIES: crate::Capabilities = { @@
                 max_compute_workgroup_size_y: ALLOC_MAX_U32,
                 max_compute_workgroup_size_z: ALLOC_MAX_U32,
                 max_compute_workgroups_per_dimension: ALLOC_MAX_U32,
-                min_subgroup_size: 1,
-                max_subgroup_size: ALLOC_MAX_U32,
                 max_immediate_size: ALLOC_MAX_U32,
                 max_non_sampler_bindings: ALLOC_MAX_U32,
@@ Expand Down @@

-Original file line number
+Diff line change
@@ Expand Up / @@ -1331,14 +1331,6 @@ impl PhysicalDeviceProperties { @@
                     .min(crate::MAX_VERTEX_BUFFERS as u32),
                 max_vertex_attributes: limits.max_vertex_input_attributes,
                 max_vertex_buffer_array_stride: limits.max_vertex_input_binding_stride,
-                min_subgroup_size: self
-                    .subgroup_size_control
-                    .map(|subgroup_size| subgroup_size.min_subgroup_size)
-                    .unwrap_or(0),
-                max_subgroup_size: self
-                    .subgroup_size_control
-                    .map(|subgroup_size| subgroup_size.max_subgroup_size)
-                    .unwrap_or(0),
                 max_immediate_size: limits.max_push_constants_size,
                 min_uniform_buffer_offset_alignment: limits.min_uniform_buffer_offset_alignment as u32,
                 min_storage_buffer_offset_alignment: limits.min_storage_buffer_offset_alignment as u32,
@@ Expand Down Expand Up / @@ -1776,6 +1768,14 @@ impl super::Instance { @@
                         .to_owned()
                 },
                 backend: wgt::Backend::Vulkan,
+                subgroup_min_size: phd_capabilities
+                    .subgroup_size_control
+                    .map(|subgroup_size| subgroup_size.min_subgroup_size)
+                    .unwrap_or(wgt::MINIMUM_SUBGROUP_MIN_SIZE),
+                subgroup_max_size: phd_capabilities
+                    .subgroup_size_control
+                    .map(|subgroup_size| subgroup_size.max_subgroup_size)
+                    .unwrap_or(wgt::MAXIMUM_SUBGROUP_MAX_SIZE),
                 transient_saves_memory: supports_lazily_allocated,
             };
             let (available_features, mut downlevel_flags) =
@@ Expand Down @@

-Original file line number
+Diff line change
@@ -1,6 +1,7 @@
     use std::io;
     use bitflags::Flags;
+    use wgpu::AdapterInfo;
     use crate::{
         report::{AdapterReport, GpuReport},
@@ Expand Down Expand Up @@
         // Adapter Info //
         //////////////////
+        let AdapterInfo {
+            name,
+            vendor,
+            device,
+            device_type,
+            device_pci_bus_id,
+            driver,
+            driver_info,
+            backend,
+            subgroup_min_size,
+            subgroup_max_size,
+            transient_saves_memory,
+        } = info;
         if matches!(verbosity, PrintingVerbosity::NameOnly) {
             writeln!(output, "Adapter {idx}: {} ({:?})", info.name, info.backend)?;
             return Ok(());
         }
         writeln!(output, "Adapter {idx}:")?;
-        writeln!(output, "\t         Backend: {:?}", info.backend)?;
-        writeln!(output, "\t            Name: {}", info.name)?;
-        writeln!(output, "\t        VendorID: {:#X?}", info.vendor)?;
-        writeln!(output, "\t        DeviceID: {:#X?}", info.device)?;
-        writeln!(output, "\t  DevicePCIBusId: {}", print_empty_string(&info.device_pci_bus_id))?;
-        writeln!(output, "\t            Type: {:?}", info.device_type)?;
-        writeln!(output, "\t          Driver: {}", print_empty_string(&info.driver))?;
-        writeln!(output, "\t      DriverInfo: {}", print_empty_string(&info.driver_info))?;
-        writeln!(output, "\tWebGPU Compliant: {:?}", downlevel.is_webgpu_compliant())?;
+        writeln!(output, "\t               Backend: {backend:?}")?;
+        writeln!(output, "\t                  Name: {name}")?;
+        writeln!(output, "\t             Vendor ID: {vendor:#X?}")?;
+        writeln!(output, "\t             Device ID: {device:#X?}")?;
+        writeln!(output, "\t     Device PCI Bus ID: {}", print_empty_string(device_pci_bus_id))?;
+        writeln!(output, "\t                  Type: {device_type:?}")?;
+        writeln!(output, "\t                Driver: {}", print_empty_string(driver))?;
+        writeln!(output, "\t           Driver Info: {}", print_empty_string(driver_info))?;
+        writeln!(output, "\t     Subgroup Min Size: {subgroup_min_size}")?;
+        writeln!(output, "\t     Subgroup Max Size: {subgroup_max_size}")?;
+        writeln!(output, "\tTransient Saves Memory: {transient_saves_memory}")?;
+        writeln!(output, "\t      WebGPU Compliant: {:?}", downlevel.is_webgpu_compliant())?;
         if matches!(verbosity, PrintingVerbosity::Information) {
             return Ok(());
@@ Expand Down Expand Up @@
             max_compute_workgroup_size_y,
             max_compute_workgroup_size_z,
             max_compute_workgroups_per_dimension,
-            min_subgroup_size,
-            max_subgroup_size,
             max_immediate_size,
             max_non_sampler_bindings,
@@ Expand Down Expand Up @@
         writeln!(output, "\t\t                                 Max Vertex Buffers: {max_vertex_buffers}")?;
         writeln!(output, "\t\t                              Max Vertex Attributes: {max_vertex_attributes}")?;
         writeln!(output, "\t\t                     Max Vertex Buffer Array Stride: {max_vertex_buffer_array_stride}")?;
-        writeln!(output, "\t\t                                  Min Subgroup Size: {min_subgroup_size}")?;
-        writeln!(output, "\t\t                                  Max Subgroup Size: {max_subgroup_size}")?;
         writeln!(output, "\t\t                             Max Immediate data Size: {max_immediate_size}")?;
         writeln!(output, "\t\t                Min Uniform Buffer Offset Alignment: {min_uniform_buffer_offset_alignment}")?;
         writeln!(output, "\t\t                Min Storage Buffer Offset Alignment: {min_storage_buffer_offset_alignment}")?;
@@ Expand Down @@

-Original file line number
+Diff line change
@@ Expand Up / @@ -132,6 +132,32 @@ pub struct AdapterInfo { @@
         pub driver_info: String,
         /// Backend used for device
         pub backend: Backend,
+        /// Minimum possible size of a subgroup on this adapter. Will
+        /// never be lower than [`crate::MINIMUM_SUBGROUP_MIN_SIZE`].
+        ///
+        /// This will vary from device to device. Typical values are listed below.
+        ///
+        /// - NVIDIA: 32
+        /// - AMD GCN/Vega: 64
+        /// - AMD RDNA+: 32
+        /// - Intel: 8 or 16
+        /// - Qualcomm: 64
+        /// - WARP: 4
+        /// - lavapipe: 8
+        pub subgroup_min_size: u32,
+        /// Maximum possible size of a subgroup on this adapter. Will
+        /// never be higher than [`crate::MAXIMUM_SUBGROUP_MAX_SIZE`].
+        ///
+        /// This will vary from device to device. Typical values are listed below:
+        ///
+        /// - NVIDIA: 32
+        /// - AMD GCN/Vega: 64
+        /// - AMD RDNA+: 64
+        /// - Intel: 16 or 32
+        /// - Qualcomm: 128
+        /// - WARP: 4 or 128
+        /// - lavapipe: 8
+        pub subgroup_max_size: u32,
         /// If true, adding [`TextureUsages::TRANSIENT`] to a texture will decrease memory usage.
         pub transient_saves_memory: bool,
     }
@@ Expand Down @@

-Original file line number
+Diff line change
@@ Expand Up / @@ -207,6 +207,17 @@ pub const QUERY_SET_MAX_QUERIES: u32 = 4096; @@
     #[doc = link_to_wgpu_docs!(["query"]: "struct.QuerySet.html")]
     pub const QUERY_SIZE: u32 = 8;
+    /// The minimum allowed value for [`AdapterInfo::subgroup_min_size`].
+    ///
+    /// See <https://gpuweb.github.io/gpuweb/#gpuadapterinfo>
+    /// where you can always use these values on all devices
+    pub const MINIMUM_SUBGROUP_MIN_SIZE: u32 = 4;
+    /// The maximum allowed value for [`AdapterInfo::subgroup_max_size`].
+    ///
+    /// See <https://gpuweb.github.io/gpuweb/#gpuadapterinfo>
+    /// where you can always use these values on all devices.
+    pub const MAXIMUM_SUBGROUP_MAX_SIZE: u32 = 128;
     /// Passed to `Device::poll` to control how and if it should block.
     #[derive(Clone, Debug)]
     pub enum PollType<T> {
@@ Expand Down @@

Move subgroup size handling in line with spec #8609

Uh oh!

Move subgroup size handling in line with spec #8609

Uh oh!

Uh oh!

Diff view

Diff view

There are no files selected for viewing

ErichDonGubler Dec 3, 2025

Choose a reason for hiding this comment

Uh oh!

cwfitzgerald Dec 4, 2025

Choose a reason for hiding this comment

Uh oh!

Uh oh!

Uh oh!

Uh oh!