diff --git a/CHANGELOG.md b/CHANGELOG.md index 58b8b0c6a4e..b227ccd955d 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -140,6 +140,23 @@ event happens. Our new log policy is as follows: By @cwfitzgerald in [#8579](https://github.com/gfx-rs/wgpu/pull/8579). +#### `subgroup_{min,max}_size` renamed and moved from `Limits` -> `AdapterInfo` + +To bring our code in line with the WebGPU spec, we have moved information about subgroup size +from limits to adapter info. Limits was not the correct place for this anyway, and we had some +code special casing those limits. + +Additionally we have renamed the fields to match the spec. + +```diff +- let min = limits.min_subgroup_size; ++ let min = info.subgroup_min_size; +- let max = limits.max_subgroup_size; ++ let max = info.subgroup_max_size; +``` + +By @cwfitzgerald in [#8609](https://github.com/gfx-rs/wgpu/pull/8609). + ### New Features - Added support for transient textures on Vulkan and Metal. By @opstic in [#8247](https://github.com/gfx-rs/wgpu/pull/8247) diff --git a/deno_webgpu/adapter.rs b/deno_webgpu/adapter.rs index 711a7190c7b..42d24d88cab 100644 --- a/deno_webgpu/adapter.rs +++ b/deno_webgpu/adapter.rs @@ -80,13 +80,8 @@ impl GPUAdapter { fn info(&self, scope: &mut v8::HandleScope) -> v8::Global { self.info.get(scope, |_| { let info = self.instance.adapter_get_info(self.id); - let limits = self.instance.adapter_limits(self.id); - GPUAdapterInfo { - info, - subgroup_min_size: limits.min_subgroup_size, - subgroup_max_size: limits.max_subgroup_size, - } + GPUAdapterInfo { info } }) } @@ -429,8 +424,6 @@ impl GPUSupportedFeatures { pub struct GPUAdapterInfo { pub info: wgpu_types::AdapterInfo, - pub subgroup_min_size: u32, - pub subgroup_max_size: u32, } impl GarbageCollected for GPUAdapterInfo { @@ -473,12 +466,12 @@ impl GPUAdapterInfo { #[getter] fn subgroup_min_size(&self) -> u32 { - self.subgroup_min_size + self.info.subgroup_min_size } #[getter] fn subgroup_max_size(&self) -> u32 { - self.subgroup_max_size + self.info.subgroup_max_size } #[getter] diff --git a/deno_webgpu/device.rs b/deno_webgpu/device.rs index 97110e13793..86893dff689 100644 --- a/deno_webgpu/device.rs +++ b/deno_webgpu/device.rs @@ -119,13 +119,8 @@ impl GPUDevice { ) -> v8::Global { self.adapter_info.get(scope, |_| { let info = self.instance.adapter_get_info(self.adapter); - let limits = self.instance.adapter_limits(self.adapter); - GPUAdapterInfo { - info, - subgroup_min_size: limits.min_subgroup_size, - subgroup_max_size: limits.max_subgroup_size, - } + GPUAdapterInfo { info } }) } diff --git a/wgpu-core/src/pipeline_cache.rs b/wgpu-core/src/pipeline_cache.rs index 2297e740538..a0c36157687 100644 --- a/wgpu-core/src/pipeline_cache.rs +++ b/wgpu-core/src/pipeline_cache.rs @@ -323,6 +323,8 @@ mod tests { driver: String::new(), driver_info: String::new(), backend: wgt::Backend::Vulkan, + subgroup_min_size: 32, + subgroup_max_size: 32, transient_saves_memory: true, }; diff --git a/wgpu-hal/src/dx12/adapter.rs b/wgpu-hal/src/dx12/adapter.rs index acd8031af96..5f36ecea750 100644 --- a/wgpu-hal/src/dx12/adapter.rs +++ b/wgpu-hal/src/dx12/adapter.rs @@ -116,6 +116,15 @@ impl super::Adapter { } .unwrap(); + let mut features1 = Direct3D12::D3D12_FEATURE_DATA_D3D12_OPTIONS1::default(); + let hr = unsafe { + device.CheckFeatureSupport( + Direct3D12::D3D12_FEATURE_D3D12_OPTIONS1, + <*mut _>::cast(&mut features1), + size_of_val(&features1) as u32, + ) + }; + let driver_version = unsafe { adapter.CheckInterfaceSupport(&Dxgi::IDXGIDevice::IID) } .ok() .map(|i| { @@ -156,6 +165,8 @@ impl super::Adapter { driver_version.0, driver_version.1, driver_version.2, driver_version.3 ), driver_info: String::new(), + subgroup_min_size: features1.WaveLaneCountMin, + subgroup_max_size: features1.WaveLaneCountMax, transient_saves_memory: false, }; @@ -483,15 +494,6 @@ impl super::Adapter { }; features.set(wgt::Features::TEXTURE_FORMAT_P010, p010_format_supported); - let mut features1 = Direct3D12::D3D12_FEATURE_DATA_D3D12_OPTIONS1::default(); - let hr = unsafe { - device.CheckFeatureSupport( - Direct3D12::D3D12_FEATURE_D3D12_OPTIONS1, - <*mut _>::cast(&mut features1), - size_of_val(&features1) as u32, - ) - }; - features.set( wgt::Features::SHADER_INT64, shader_model >= naga::back::hlsl::ShaderModel::V6_0 @@ -694,8 +696,6 @@ impl super::Adapter { .min(crate::MAX_VERTEX_BUFFERS as u32), max_vertex_attributes: Direct3D12::D3D12_IA_VERTEX_INPUT_RESOURCE_SLOT_COUNT, max_vertex_buffer_array_stride: Direct3D12::D3D12_SO_BUFFER_MAX_STRIDE_IN_BYTES, - min_subgroup_size: 4, // Not using `features1.WaveLaneCountMin` as it is unreliable - max_subgroup_size: 128, // The immediates are part of the root signature which // has a limit of 64 DWORDS (256 bytes), but other resources // also share the root signature: diff --git a/wgpu-hal/src/gles/adapter.rs b/wgpu-hal/src/gles/adapter.rs index 83fd9e2b825..c03e237b9a0 100644 --- a/wgpu-hal/src/gles/adapter.rs +++ b/wgpu-hal/src/gles/adapter.rs @@ -190,6 +190,8 @@ impl super::Adapter { device_pci_bus_id: String::new(), driver_info: version, backend: wgt::Backend::Gl, + subgroup_min_size: wgt::MINIMUM_SUBGROUP_MIN_SIZE, + subgroup_max_size: wgt::MAXIMUM_SUBGROUP_MAX_SIZE, transient_saves_memory: false, } } @@ -751,8 +753,6 @@ impl super::Adapter { } else { !0 }, - min_subgroup_size: 0, - max_subgroup_size: 0, max_immediate_size: super::MAX_IMMEDIATES as u32 * 4, min_uniform_buffer_offset_alignment, min_storage_buffer_offset_alignment, diff --git a/wgpu-hal/src/metal/adapter.rs b/wgpu-hal/src/metal/adapter.rs index 6158df52f53..cdd0ea07288 100644 --- a/wgpu-hal/src/metal/adapter.rs +++ b/wgpu-hal/src/metal/adapter.rs @@ -1100,8 +1100,6 @@ impl super::PrivateCapabilities { max_vertex_buffers: self.max_vertex_buffers, max_vertex_attributes: 31, max_vertex_buffer_array_stride: base.max_vertex_buffer_array_stride, - min_subgroup_size: 4, - max_subgroup_size: 64, max_immediate_size: 0x1000, min_uniform_buffer_offset_alignment: self.buffer_alignment as u32, min_storage_buffer_offset_alignment: self.buffer_alignment as u32, diff --git a/wgpu-hal/src/metal/mod.rs b/wgpu-hal/src/metal/mod.rs index c3f879627c3..0d404c146cf 100644 --- a/wgpu-hal/src/metal/mod.rs +++ b/wgpu-hal/src/metal/mod.rs @@ -160,6 +160,12 @@ impl crate::Instance for Instance { driver: String::new(), driver_info: String::new(), backend: wgt::Backend::Metal, + // These are hardcoded based on typical values for Metal devices + // + // See + // for more information. + subgroup_min_size: 4, + subgroup_max_size: 64, transient_saves_memory: shared.private_caps.supports_memoryless_storage, }, features: shared.private_caps.features(), diff --git a/wgpu-hal/src/noop/mod.rs b/wgpu-hal/src/noop/mod.rs index 2dd4a17597f..c0275b4d168 100644 --- a/wgpu-hal/src/noop/mod.rs +++ b/wgpu-hal/src/noop/mod.rs @@ -141,6 +141,8 @@ pub fn adapter_info() -> wgt::AdapterInfo { driver: String::from("wgpu"), driver_info: String::new(), backend: wgt::Backend::Noop, + subgroup_min_size: wgt::MINIMUM_SUBGROUP_MIN_SIZE, + subgroup_max_size: wgt::MAXIMUM_SUBGROUP_MAX_SIZE, transient_saves_memory: false, } } @@ -189,8 +191,6 @@ pub const CAPABILITIES: crate::Capabilities = { max_compute_workgroup_size_y: ALLOC_MAX_U32, max_compute_workgroup_size_z: ALLOC_MAX_U32, max_compute_workgroups_per_dimension: ALLOC_MAX_U32, - min_subgroup_size: 1, - max_subgroup_size: ALLOC_MAX_U32, max_immediate_size: ALLOC_MAX_U32, max_non_sampler_bindings: ALLOC_MAX_U32, diff --git a/wgpu-hal/src/vulkan/adapter.rs b/wgpu-hal/src/vulkan/adapter.rs index a511db0b911..d1deba019d7 100644 --- a/wgpu-hal/src/vulkan/adapter.rs +++ b/wgpu-hal/src/vulkan/adapter.rs @@ -1331,14 +1331,6 @@ impl PhysicalDeviceProperties { .min(crate::MAX_VERTEX_BUFFERS as u32), max_vertex_attributes: limits.max_vertex_input_attributes, max_vertex_buffer_array_stride: limits.max_vertex_input_binding_stride, - min_subgroup_size: self - .subgroup_size_control - .map(|subgroup_size| subgroup_size.min_subgroup_size) - .unwrap_or(0), - max_subgroup_size: self - .subgroup_size_control - .map(|subgroup_size| subgroup_size.max_subgroup_size) - .unwrap_or(0), max_immediate_size: limits.max_push_constants_size, min_uniform_buffer_offset_alignment: limits.min_uniform_buffer_offset_alignment as u32, min_storage_buffer_offset_alignment: limits.min_storage_buffer_offset_alignment as u32, @@ -1776,6 +1768,14 @@ impl super::Instance { .to_owned() }, backend: wgt::Backend::Vulkan, + subgroup_min_size: phd_capabilities + .subgroup_size_control + .map(|subgroup_size| subgroup_size.min_subgroup_size) + .unwrap_or(wgt::MINIMUM_SUBGROUP_MIN_SIZE), + subgroup_max_size: phd_capabilities + .subgroup_size_control + .map(|subgroup_size| subgroup_size.max_subgroup_size) + .unwrap_or(wgt::MAXIMUM_SUBGROUP_MAX_SIZE), transient_saves_memory: supports_lazily_allocated, }; let (available_features, mut downlevel_flags) = diff --git a/wgpu-info/src/human.rs b/wgpu-info/src/human.rs index 08f2c0b3b5a..a54495c281d 100644 --- a/wgpu-info/src/human.rs +++ b/wgpu-info/src/human.rs @@ -1,6 +1,7 @@ use std::io; use bitflags::Flags; +use wgpu::AdapterInfo; use crate::{ report::{AdapterReport, GpuReport}, @@ -89,21 +90,38 @@ fn print_adapter(output: &mut impl io::Write, report: &AdapterReport, idx: usize // Adapter Info // ////////////////// + let AdapterInfo { + name, + vendor, + device, + device_type, + device_pci_bus_id, + driver, + driver_info, + backend, + subgroup_min_size, + subgroup_max_size, + transient_saves_memory, + } = info; + if matches!(verbosity, PrintingVerbosity::NameOnly) { writeln!(output, "Adapter {idx}: {} ({:?})", info.name, info.backend)?; return Ok(()); } writeln!(output, "Adapter {idx}:")?; - writeln!(output, "\t Backend: {:?}", info.backend)?; - writeln!(output, "\t Name: {}", info.name)?; - writeln!(output, "\t VendorID: {:#X?}", info.vendor)?; - writeln!(output, "\t DeviceID: {:#X?}", info.device)?; - writeln!(output, "\t DevicePCIBusId: {}", print_empty_string(&info.device_pci_bus_id))?; - writeln!(output, "\t Type: {:?}", info.device_type)?; - writeln!(output, "\t Driver: {}", print_empty_string(&info.driver))?; - writeln!(output, "\t DriverInfo: {}", print_empty_string(&info.driver_info))?; - writeln!(output, "\tWebGPU Compliant: {:?}", downlevel.is_webgpu_compliant())?; + writeln!(output, "\t Backend: {backend:?}")?; + writeln!(output, "\t Name: {name}")?; + writeln!(output, "\t Vendor ID: {vendor:#X?}")?; + writeln!(output, "\t Device ID: {device:#X?}")?; + writeln!(output, "\t Device PCI Bus ID: {}", print_empty_string(device_pci_bus_id))?; + writeln!(output, "\t Type: {device_type:?}")?; + writeln!(output, "\t Driver: {}", print_empty_string(driver))?; + writeln!(output, "\t Driver Info: {}", print_empty_string(driver_info))?; + writeln!(output, "\t Subgroup Min Size: {subgroup_min_size}")?; + writeln!(output, "\t Subgroup Max Size: {subgroup_max_size}")?; + writeln!(output, "\tTransient Saves Memory: {transient_saves_memory}")?; + writeln!(output, "\t WebGPU Compliant: {:?}", downlevel.is_webgpu_compliant())?; if matches!(verbosity, PrintingVerbosity::Information) { return Ok(()); @@ -157,8 +175,6 @@ fn print_adapter(output: &mut impl io::Write, report: &AdapterReport, idx: usize max_compute_workgroup_size_y, max_compute_workgroup_size_z, max_compute_workgroups_per_dimension, - min_subgroup_size, - max_subgroup_size, max_immediate_size, max_non_sampler_bindings, @@ -195,8 +211,6 @@ fn print_adapter(output: &mut impl io::Write, report: &AdapterReport, idx: usize writeln!(output, "\t\t Max Vertex Buffers: {max_vertex_buffers}")?; writeln!(output, "\t\t Max Vertex Attributes: {max_vertex_attributes}")?; writeln!(output, "\t\t Max Vertex Buffer Array Stride: {max_vertex_buffer_array_stride}")?; - writeln!(output, "\t\t Min Subgroup Size: {min_subgroup_size}")?; - writeln!(output, "\t\t Max Subgroup Size: {max_subgroup_size}")?; writeln!(output, "\t\t Max Immediate data Size: {max_immediate_size}")?; writeln!(output, "\t\t Min Uniform Buffer Offset Alignment: {min_uniform_buffer_offset_alignment}")?; writeln!(output, "\t\t Min Storage Buffer Offset Alignment: {min_storage_buffer_offset_alignment}")?; diff --git a/wgpu-types/src/adapter.rs b/wgpu-types/src/adapter.rs index a23d9cba43f..501011a13e0 100644 --- a/wgpu-types/src/adapter.rs +++ b/wgpu-types/src/adapter.rs @@ -132,6 +132,32 @@ pub struct AdapterInfo { pub driver_info: String, /// Backend used for device pub backend: Backend, + /// Minimum possible size of a subgroup on this adapter. Will + /// never be lower than [`crate::MINIMUM_SUBGROUP_MIN_SIZE`]. + /// + /// This will vary from device to device. Typical values are listed below. + /// + /// - NVIDIA: 32 + /// - AMD GCN/Vega: 64 + /// - AMD RDNA+: 32 + /// - Intel: 8 or 16 + /// - Qualcomm: 64 + /// - WARP: 4 + /// - lavapipe: 8 + pub subgroup_min_size: u32, + /// Maximum possible size of a subgroup on this adapter. Will + /// never be higher than [`crate::MAXIMUM_SUBGROUP_MAX_SIZE`]. + /// + /// This will vary from device to device. Typical values are listed below: + /// + /// - NVIDIA: 32 + /// - AMD GCN/Vega: 64 + /// - AMD RDNA+: 64 + /// - Intel: 16 or 32 + /// - Qualcomm: 128 + /// - WARP: 4 or 128 + /// - lavapipe: 8 + pub subgroup_max_size: u32, /// If true, adding [`TextureUsages::TRANSIENT`] to a texture will decrease memory usage. pub transient_saves_memory: bool, } diff --git a/wgpu-types/src/lib.rs b/wgpu-types/src/lib.rs index 302d6ff0c9b..980013ff1b9 100644 --- a/wgpu-types/src/lib.rs +++ b/wgpu-types/src/lib.rs @@ -207,6 +207,17 @@ pub const QUERY_SET_MAX_QUERIES: u32 = 4096; #[doc = link_to_wgpu_docs!(["query"]: "struct.QuerySet.html")] pub const QUERY_SIZE: u32 = 8; +/// The minimum allowed value for [`AdapterInfo::subgroup_min_size`]. +/// +/// See +/// where you can always use these values on all devices +pub const MINIMUM_SUBGROUP_MIN_SIZE: u32 = 4; +/// The maximum allowed value for [`AdapterInfo::subgroup_max_size`]. +/// +/// See +/// where you can always use these values on all devices. +pub const MAXIMUM_SUBGROUP_MAX_SIZE: u32 = 128; + /// Passed to `Device::poll` to control how and if it should block. #[derive(Clone, Debug)] pub enum PollType { diff --git a/wgpu-types/src/limits.rs b/wgpu-types/src/limits.rs index 3527e391865..48e6af89460 100644 --- a/wgpu-types/src/limits.rs +++ b/wgpu-types/src/limits.rs @@ -56,9 +56,6 @@ macro_rules! with_limits { $macro_name!(max_compute_workgroup_size_z, Ordering::Less); $macro_name!(max_compute_workgroups_per_dimension, Ordering::Less); - $macro_name!(min_subgroup_size, Ordering::Greater); - $macro_name!(max_subgroup_size, Ordering::Less); - $macro_name!(max_immediate_size, Ordering::Less); $macro_name!(max_non_sampler_bindings, Ordering::Less); @@ -214,10 +211,6 @@ pub struct Limits { /// Defaults to 65535. Higher is "better". pub max_compute_workgroups_per_dimension: u32, - /// Minimal number of invocations in a subgroup. Lower is "better". - pub min_subgroup_size: u32, - /// Maximal number of invocations in a subgroup. Higher is "better". - pub max_subgroup_size: u32, /// Amount of storage available for immediates in bytes. Defaults to 0. Higher is "better". /// Requesting more than 0 during device creation requires [`Features::IMMEDIATES`] to be enabled. /// @@ -316,8 +309,6 @@ impl Limits { /// max_compute_workgroup_size_y: 256, /// max_compute_workgroup_size_z: 64, /// max_compute_workgroups_per_dimension: 65535, - /// min_subgroup_size: 0, - /// max_subgroup_size: 0, /// max_immediate_size: 0, /// max_non_sampler_bindings: 1_000_000, /// max_task_workgroup_total_count: 0, @@ -369,8 +360,6 @@ impl Limits { max_compute_workgroup_size_y: 256, max_compute_workgroup_size_z: 64, max_compute_workgroups_per_dimension: 65535, - min_subgroup_size: 0, - max_subgroup_size: 0, max_immediate_size: 0, max_non_sampler_bindings: 1_000_000, @@ -414,8 +403,6 @@ impl Limits { /// max_vertex_buffers: 8, /// max_vertex_attributes: 16, /// max_vertex_buffer_array_stride: 2048, - /// min_subgroup_size: 0, - /// max_subgroup_size: 0, /// max_immediate_size: 0, /// min_uniform_buffer_offset_alignment: 256, /// min_storage_buffer_offset_alignment: 256, @@ -491,8 +478,6 @@ impl Limits { /// max_vertex_buffers: 8, /// max_vertex_attributes: 16, /// max_vertex_buffer_array_stride: 255, // + - /// min_subgroup_size: 0, - /// max_subgroup_size: 0, /// max_immediate_size: 0, /// min_uniform_buffer_offset_alignment: 256, /// min_storage_buffer_offset_alignment: 256, @@ -536,8 +521,6 @@ impl Limits { max_compute_workgroup_size_y: 0, max_compute_workgroup_size_z: 0, max_compute_workgroups_per_dimension: 0, - min_subgroup_size: 0, - max_subgroup_size: 0, // Value supported by Intel Celeron B830 on Windows (OpenGL 3.1) max_inter_stage_shader_components: 31, @@ -647,12 +630,7 @@ impl Limits { macro_rules! check_with_fail_fn { ($name:ident, $ordering:expr) => { let invalid_ord = $ordering.reverse(); - // In the case of `min_subgroup_size`, requesting a value of - // zero means "I'm not going to use subgroups", so we have to - // special case that. If any of our minimum limits could - // meaningfully go all the way to zero, that would conflict with - // this. - if self.$name != 0 && self.$name.cmp(&allowed.$name) == invalid_ord { + if self.$name.cmp(&allowed.$name) == invalid_ord { fail_fn(stringify!($name), self.$name as u64, allowed.$name as u64); if fatal { return; @@ -661,13 +639,6 @@ impl Limits { }; } - if self.min_subgroup_size > self.max_subgroup_size { - fail_fn( - "max_subgroup_size", - self.min_subgroup_size as u64, - allowed.min_subgroup_size as u64, - ); - } with_limits!(check_with_fail_fn); } diff --git a/wgpu/src/backend/webgpu.rs b/wgpu/src/backend/webgpu.rs index 36e755ab209..0c92b936418 100644 --- a/wgpu/src/backend/webgpu.rs +++ b/wgpu/src/backend/webgpu.rs @@ -818,9 +818,6 @@ fn map_wgt_limits(limits: webgpu_sys::GpuSupportedLimits) -> wgt::Limits { max_compute_workgroup_size_y: limits.max_compute_workgroup_size_y(), max_compute_workgroup_size_z: limits.max_compute_workgroup_size_z(), max_compute_workgroups_per_dimension: limits.max_compute_workgroups_per_dimension(), - // The following are not part of WebGPU - min_subgroup_size: wgt::Limits::default().min_subgroup_size, - max_subgroup_size: wgt::Limits::default().max_subgroup_size, max_immediate_size: wgt::Limits::default().max_immediate_size, max_non_sampler_bindings: wgt::Limits::default().max_non_sampler_bindings, max_inter_stage_shader_components: wgt::Limits::default().max_inter_stage_shader_components, @@ -1716,6 +1713,8 @@ impl dispatch::AdapterInterface for WebAdapter { driver: String::new(), driver_info: String::new(), backend: wgt::Backend::BrowserWebGpu, + subgroup_min_size: wgt::MINIMUM_SUBGROUP_MIN_SIZE, + subgroup_max_size: wgt::MAXIMUM_SUBGROUP_MAX_SIZE, transient_saves_memory: false, } } diff --git a/wgpu/src/lib.rs b/wgpu/src/lib.rs index 27935433578..7f0fe518f4b 100644 --- a/wgpu/src/lib.rs +++ b/wgpu/src/lib.rs @@ -150,8 +150,9 @@ pub use wgt::{ TextureFormat, TextureFormatFeatureFlags, TextureFormatFeatures, TextureSampleType, TextureTransition, TextureUsages, TextureUses, TextureViewDimension, Trace, VertexAttribute, VertexFormat, VertexStepMode, WasmNotSend, WasmNotSendSync, WasmNotSync, COPY_BUFFER_ALIGNMENT, - COPY_BYTES_PER_ROW_ALIGNMENT, IMMEDIATES_ALIGNMENT, MAP_ALIGNMENT, - QUERY_RESOLVE_BUFFER_ALIGNMENT, QUERY_SET_MAX_QUERIES, QUERY_SIZE, VERTEX_ALIGNMENT, + COPY_BYTES_PER_ROW_ALIGNMENT, IMMEDIATES_ALIGNMENT, MAP_ALIGNMENT, MAXIMUM_SUBGROUP_MAX_SIZE, + MINIMUM_SUBGROUP_MIN_SIZE, QUERY_RESOLVE_BUFFER_ALIGNMENT, QUERY_SET_MAX_QUERIES, QUERY_SIZE, + VERTEX_ALIGNMENT, }; #[expect(deprecated)]