Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
17 changes: 17 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -140,6 +140,23 @@ event happens. Our new log policy is as follows:

By @cwfitzgerald in [#8579](https://github.com/gfx-rs/wgpu/pull/8579).

#### `subgroup_{min,max}_size` renamed and moved from `Limits` -> `AdapterInfo`

To bring our code in line with the WebGPU spec, we have moved information about subgroup size
from limits to adapter info. Limits was not the correct place for this anyway, and we had some
code special casing those limits.

Additionally we have renamed the fields to match the spec.

```diff
- let min = limits.min_subgroup_size;
+ let min = info.subgroup_min_size;
- let max = limits.max_subgroup_size;
+ let max = info.subgroup_max_size;
```

By @cwfitzgerald in [#8609](https://github.com/gfx-rs/wgpu/pull/8609).

### New Features

- Added support for transient textures on Vulkan and Metal. By @opstic in [#8247](https://github.com/gfx-rs/wgpu/pull/8247)
Expand Down
13 changes: 3 additions & 10 deletions deno_webgpu/adapter.rs
Original file line number Diff line number Diff line change
Expand Up @@ -80,13 +80,8 @@ impl GPUAdapter {
fn info(&self, scope: &mut v8::HandleScope) -> v8::Global<v8::Object> {
self.info.get(scope, |_| {
let info = self.instance.adapter_get_info(self.id);
let limits = self.instance.adapter_limits(self.id);

GPUAdapterInfo {
info,
subgroup_min_size: limits.min_subgroup_size,
subgroup_max_size: limits.max_subgroup_size,
}
GPUAdapterInfo { info }
})
}

Expand Down Expand Up @@ -429,8 +424,6 @@ impl GPUSupportedFeatures {

pub struct GPUAdapterInfo {
pub info: wgpu_types::AdapterInfo,
pub subgroup_min_size: u32,
pub subgroup_max_size: u32,
}

impl GarbageCollected for GPUAdapterInfo {
Expand Down Expand Up @@ -473,12 +466,12 @@ impl GPUAdapterInfo {

#[getter]
fn subgroup_min_size(&self) -> u32 {
self.subgroup_min_size
self.info.subgroup_min_size
}

#[getter]
fn subgroup_max_size(&self) -> u32 {
self.subgroup_max_size
self.info.subgroup_max_size
}

#[getter]
Expand Down
7 changes: 1 addition & 6 deletions deno_webgpu/device.rs
Original file line number Diff line number Diff line change
Expand Up @@ -119,13 +119,8 @@ impl GPUDevice {
) -> v8::Global<v8::Object> {
self.adapter_info.get(scope, |_| {
let info = self.instance.adapter_get_info(self.adapter);
let limits = self.instance.adapter_limits(self.adapter);

GPUAdapterInfo {
info,
subgroup_min_size: limits.min_subgroup_size,
subgroup_max_size: limits.max_subgroup_size,
}
GPUAdapterInfo { info }
})
}

Expand Down
2 changes: 2 additions & 0 deletions wgpu-core/src/pipeline_cache.rs
Original file line number Diff line number Diff line change
Expand Up @@ -323,6 +323,8 @@ mod tests {
driver: String::new(),
driver_info: String::new(),
backend: wgt::Backend::Vulkan,
subgroup_min_size: 32,
subgroup_max_size: 32,
transient_saves_memory: true,
};

Expand Down
22 changes: 11 additions & 11 deletions wgpu-hal/src/dx12/adapter.rs
Original file line number Diff line number Diff line change
Expand Up @@ -116,6 +116,15 @@ impl super::Adapter {
}
.unwrap();

let mut features1 = Direct3D12::D3D12_FEATURE_DATA_D3D12_OPTIONS1::default();
let hr = unsafe {
device.CheckFeatureSupport(
Direct3D12::D3D12_FEATURE_D3D12_OPTIONS1,
<*mut _>::cast(&mut features1),
size_of_val(&features1) as u32,
)
};

let driver_version = unsafe { adapter.CheckInterfaceSupport(&Dxgi::IDXGIDevice::IID) }
.ok()
.map(|i| {
Expand Down Expand Up @@ -156,6 +165,8 @@ impl super::Adapter {
driver_version.0, driver_version.1, driver_version.2, driver_version.3
),
driver_info: String::new(),
subgroup_min_size: features1.WaveLaneCountMin,
subgroup_max_size: features1.WaveLaneCountMax,
transient_saves_memory: false,
};

Expand Down Expand Up @@ -483,15 +494,6 @@ impl super::Adapter {
};
features.set(wgt::Features::TEXTURE_FORMAT_P010, p010_format_supported);

let mut features1 = Direct3D12::D3D12_FEATURE_DATA_D3D12_OPTIONS1::default();
let hr = unsafe {
device.CheckFeatureSupport(
Direct3D12::D3D12_FEATURE_D3D12_OPTIONS1,
<*mut _>::cast(&mut features1),
size_of_val(&features1) as u32,
)
};

features.set(
wgt::Features::SHADER_INT64,
shader_model >= naga::back::hlsl::ShaderModel::V6_0
Expand Down Expand Up @@ -694,8 +696,6 @@ impl super::Adapter {
.min(crate::MAX_VERTEX_BUFFERS as u32),
max_vertex_attributes: Direct3D12::D3D12_IA_VERTEX_INPUT_RESOURCE_SLOT_COUNT,
max_vertex_buffer_array_stride: Direct3D12::D3D12_SO_BUFFER_MAX_STRIDE_IN_BYTES,
min_subgroup_size: 4, // Not using `features1.WaveLaneCountMin` as it is unreliable
max_subgroup_size: 128,
// The immediates are part of the root signature which
// has a limit of 64 DWORDS (256 bytes), but other resources
// also share the root signature:
Expand Down
4 changes: 2 additions & 2 deletions wgpu-hal/src/gles/adapter.rs
Original file line number Diff line number Diff line change
Expand Up @@ -190,6 +190,8 @@ impl super::Adapter {
device_pci_bus_id: String::new(),
driver_info: version,
backend: wgt::Backend::Gl,
subgroup_min_size: wgt::MINIMUM_SUBGROUP_MIN_SIZE,
subgroup_max_size: wgt::MAXIMUM_SUBGROUP_MAX_SIZE,
transient_saves_memory: false,
}
}
Expand Down Expand Up @@ -751,8 +753,6 @@ impl super::Adapter {
} else {
!0
},
min_subgroup_size: 0,
max_subgroup_size: 0,
max_immediate_size: super::MAX_IMMEDIATES as u32 * 4,
min_uniform_buffer_offset_alignment,
min_storage_buffer_offset_alignment,
Expand Down
2 changes: 0 additions & 2 deletions wgpu-hal/src/metal/adapter.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1100,8 +1100,6 @@ impl super::PrivateCapabilities {
max_vertex_buffers: self.max_vertex_buffers,
max_vertex_attributes: 31,
max_vertex_buffer_array_stride: base.max_vertex_buffer_array_stride,
min_subgroup_size: 4,
max_subgroup_size: 64,
max_immediate_size: 0x1000,
min_uniform_buffer_offset_alignment: self.buffer_alignment as u32,
min_storage_buffer_offset_alignment: self.buffer_alignment as u32,
Expand Down
6 changes: 6 additions & 0 deletions wgpu-hal/src/metal/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -160,6 +160,12 @@ impl crate::Instance for Instance {
driver: String::new(),
driver_info: String::new(),
backend: wgt::Backend::Metal,
// These are hardcoded based on typical values for Metal devices
//
// See <https://github.com/gpuweb/gpuweb/blob/main/proposals/subgroups.md#adapter-info>
// for more information.
subgroup_min_size: 4,
subgroup_max_size: 64,
transient_saves_memory: shared.private_caps.supports_memoryless_storage,
},
features: shared.private_caps.features(),
Expand Down
4 changes: 2 additions & 2 deletions wgpu-hal/src/noop/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -141,6 +141,8 @@ pub fn adapter_info() -> wgt::AdapterInfo {
driver: String::from("wgpu"),
driver_info: String::new(),
backend: wgt::Backend::Noop,
subgroup_min_size: wgt::MINIMUM_SUBGROUP_MIN_SIZE,
subgroup_max_size: wgt::MAXIMUM_SUBGROUP_MAX_SIZE,
transient_saves_memory: false,
}
}
Expand Down Expand Up @@ -189,8 +191,6 @@ pub const CAPABILITIES: crate::Capabilities = {
max_compute_workgroup_size_y: ALLOC_MAX_U32,
max_compute_workgroup_size_z: ALLOC_MAX_U32,
max_compute_workgroups_per_dimension: ALLOC_MAX_U32,
min_subgroup_size: 1,
max_subgroup_size: ALLOC_MAX_U32,
max_immediate_size: ALLOC_MAX_U32,
max_non_sampler_bindings: ALLOC_MAX_U32,

Expand Down
16 changes: 8 additions & 8 deletions wgpu-hal/src/vulkan/adapter.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1331,14 +1331,6 @@ impl PhysicalDeviceProperties {
.min(crate::MAX_VERTEX_BUFFERS as u32),
max_vertex_attributes: limits.max_vertex_input_attributes,
max_vertex_buffer_array_stride: limits.max_vertex_input_binding_stride,
min_subgroup_size: self
.subgroup_size_control
.map(|subgroup_size| subgroup_size.min_subgroup_size)
.unwrap_or(0),
max_subgroup_size: self
.subgroup_size_control
.map(|subgroup_size| subgroup_size.max_subgroup_size)
.unwrap_or(0),
max_immediate_size: limits.max_push_constants_size,
min_uniform_buffer_offset_alignment: limits.min_uniform_buffer_offset_alignment as u32,
min_storage_buffer_offset_alignment: limits.min_storage_buffer_offset_alignment as u32,
Expand Down Expand Up @@ -1776,6 +1768,14 @@ impl super::Instance {
.to_owned()
},
backend: wgt::Backend::Vulkan,
subgroup_min_size: phd_capabilities
.subgroup_size_control
.map(|subgroup_size| subgroup_size.min_subgroup_size)
.unwrap_or(wgt::MINIMUM_SUBGROUP_MIN_SIZE),
subgroup_max_size: phd_capabilities
.subgroup_size_control
.map(|subgroup_size| subgroup_size.max_subgroup_size)
.unwrap_or(wgt::MAXIMUM_SUBGROUP_MAX_SIZE),
transient_saves_memory: supports_lazily_allocated,
};
let (available_features, mut downlevel_flags) =
Expand Down
40 changes: 27 additions & 13 deletions wgpu-info/src/human.rs
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
use std::io;

use bitflags::Flags;
use wgpu::AdapterInfo;

use crate::{
report::{AdapterReport, GpuReport},
Expand Down Expand Up @@ -89,21 +90,38 @@ fn print_adapter(output: &mut impl io::Write, report: &AdapterReport, idx: usize
// Adapter Info //
//////////////////

let AdapterInfo {
name,
vendor,
device,
device_type,
device_pci_bus_id,
driver,
driver_info,
backend,
subgroup_min_size,
subgroup_max_size,
transient_saves_memory,
} = info;

if matches!(verbosity, PrintingVerbosity::NameOnly) {
writeln!(output, "Adapter {idx}: {} ({:?})", info.name, info.backend)?;
return Ok(());
}

writeln!(output, "Adapter {idx}:")?;
writeln!(output, "\t Backend: {:?}", info.backend)?;
writeln!(output, "\t Name: {}", info.name)?;
writeln!(output, "\t VendorID: {:#X?}", info.vendor)?;
writeln!(output, "\t DeviceID: {:#X?}", info.device)?;
writeln!(output, "\t DevicePCIBusId: {}", print_empty_string(&info.device_pci_bus_id))?;
writeln!(output, "\t Type: {:?}", info.device_type)?;
writeln!(output, "\t Driver: {}", print_empty_string(&info.driver))?;
writeln!(output, "\t DriverInfo: {}", print_empty_string(&info.driver_info))?;
writeln!(output, "\tWebGPU Compliant: {:?}", downlevel.is_webgpu_compliant())?;
writeln!(output, "\t Backend: {backend:?}")?;
writeln!(output, "\t Name: {name}")?;
writeln!(output, "\t Vendor ID: {vendor:#X?}")?;
writeln!(output, "\t Device ID: {device:#X?}")?;
writeln!(output, "\t Device PCI Bus ID: {}", print_empty_string(device_pci_bus_id))?;
writeln!(output, "\t Type: {device_type:?}")?;
writeln!(output, "\t Driver: {}", print_empty_string(driver))?;
writeln!(output, "\t Driver Info: {}", print_empty_string(driver_info))?;
writeln!(output, "\t Subgroup Min Size: {subgroup_min_size}")?;
writeln!(output, "\t Subgroup Max Size: {subgroup_max_size}")?;
writeln!(output, "\tTransient Saves Memory: {transient_saves_memory}")?;
writeln!(output, "\t WebGPU Compliant: {:?}", downlevel.is_webgpu_compliant())?;

if matches!(verbosity, PrintingVerbosity::Information) {
return Ok(());
Expand Down Expand Up @@ -157,8 +175,6 @@ fn print_adapter(output: &mut impl io::Write, report: &AdapterReport, idx: usize
max_compute_workgroup_size_y,
max_compute_workgroup_size_z,
max_compute_workgroups_per_dimension,
min_subgroup_size,
max_subgroup_size,
max_immediate_size,
max_non_sampler_bindings,

Expand Down Expand Up @@ -195,8 +211,6 @@ fn print_adapter(output: &mut impl io::Write, report: &AdapterReport, idx: usize
writeln!(output, "\t\t Max Vertex Buffers: {max_vertex_buffers}")?;
writeln!(output, "\t\t Max Vertex Attributes: {max_vertex_attributes}")?;
writeln!(output, "\t\t Max Vertex Buffer Array Stride: {max_vertex_buffer_array_stride}")?;
writeln!(output, "\t\t Min Subgroup Size: {min_subgroup_size}")?;
writeln!(output, "\t\t Max Subgroup Size: {max_subgroup_size}")?;
writeln!(output, "\t\t Max Immediate data Size: {max_immediate_size}")?;
writeln!(output, "\t\t Min Uniform Buffer Offset Alignment: {min_uniform_buffer_offset_alignment}")?;
writeln!(output, "\t\t Min Storage Buffer Offset Alignment: {min_storage_buffer_offset_alignment}")?;
Expand Down
26 changes: 26 additions & 0 deletions wgpu-types/src/adapter.rs
Original file line number Diff line number Diff line change
Expand Up @@ -132,6 +132,32 @@ pub struct AdapterInfo {
pub driver_info: String,
/// Backend used for device
pub backend: Backend,
/// Minimum possible size of a subgroup on this adapter. Will
/// never be lower than [`crate::MINIMUM_SUBGROUP_MIN_SIZE`].
///
/// This will vary from device to device. Typical values are listed below.
///
/// - NVIDIA: 32
/// - AMD GCN/Vega: 64
/// - AMD RDNA+: 32
/// - Intel: 8 or 16
/// - Qualcomm: 64
/// - WARP: 4
/// - lavapipe: 8
pub subgroup_min_size: u32,
/// Maximum possible size of a subgroup on this adapter. Will
/// never be higher than [`crate::MAXIMUM_SUBGROUP_MAX_SIZE`].
///
/// This will vary from device to device. Typical values are listed below:
///
/// - NVIDIA: 32
/// - AMD GCN/Vega: 64
/// - AMD RDNA+: 64
/// - Intel: 16 or 32
/// - Qualcomm: 128
/// - WARP: 4 or 128
/// - lavapipe: 8
pub subgroup_max_size: u32,
/// If true, adding [`TextureUsages::TRANSIENT`] to a texture will decrease memory usage.
pub transient_saves_memory: bool,
}
Expand Down
11 changes: 11 additions & 0 deletions wgpu-types/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -207,6 +207,17 @@ pub const QUERY_SET_MAX_QUERIES: u32 = 4096;
#[doc = link_to_wgpu_docs!(["query"]: "struct.QuerySet.html")]
pub const QUERY_SIZE: u32 = 8;

/// The minimum allowed value for [`AdapterInfo::subgroup_min_size`].
///
/// See <https://gpuweb.github.io/gpuweb/#gpuadapterinfo>
/// where you can always use these values on all devices
pub const MINIMUM_SUBGROUP_MIN_SIZE: u32 = 4;
/// The maximum allowed value for [`AdapterInfo::subgroup_max_size`].
///
/// See <https://gpuweb.github.io/gpuweb/#gpuadapterinfo>
/// where you can always use these values on all devices.
pub const MAXIMUM_SUBGROUP_MAX_SIZE: u32 = 128;
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Q: Is it possible to use more than this on some machines? If so, do we have any follow-up work for permitting larger groups?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

No - the max observed subgroup size in the wild is 128 on qualcomm chips (and WARP). Similarly WARP supports the same range (4 to 128) so you can test with the full range of subgroups in the wild.


/// Passed to `Device::poll` to control how and if it should block.
#[derive(Clone, Debug)]
pub enum PollType<T> {
Expand Down
Loading
Loading