Skip to content
4 changes: 4 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -86,6 +86,10 @@ SamplerDescriptor {
- WebGPU device requests now support the required limits `maxColorAttachments` and `maxColorAttachmentBytesPerSample`. By @evilpie in [#8328](https://github.com/gfx-rs/wgpu/pull/8328)
- Reject binding indices that exceed `wgpu_types::Limits::max_bindings_per_bind_group` when deriving a bind group layout for a pipeline. By @jimblandy in [#8325](https://github.com/gfx-rs/wgpu/pull/8325).

#### DX12

- Align copies b/w textures and buffers via a single intermediate buffer per copy when `D3D12_FEATURE_DATA_D3D12_OPTIONS13.UnrestrictedBufferTextureCopyPitchSupported` is `false`. By @ErichDonGubler in [#7721](https://github.com/gfx-rs/wgpu/pull/7721).

Comment on lines +89 to +92
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

onuts, I almost forgot to move this out of the previous release section.

@cwfitzgerald: I'm strongly considering experimenting with adding some CI that fails a PR that adds things to the CHANGELOG outside of the Unreleased section, unless opted out by a label. Thoughts?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I guess we don't have to make it a required check, either. It can simply be a signal that something might be wrong, at first.

## v27.0.2 (2025-10-03)

### Bug Fixes
Expand Down
11 changes: 10 additions & 1 deletion cts_runner/test.lst
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,15 @@ fails-if(vulkan) webgpu:api,operation,command_buffer,copyTextureToTexture:copy_d
webgpu:api,operation,command_buffer,copyTextureToTexture:copy_depth_stencil:format="stencil8"
// Fails with OOM in CI.
fails-if(dx12) webgpu:api,operation,command_buffer,image_copy:offsets_and_sizes:*
webgpu:api,operation,command_buffer,image_copy:undefined_params:initMethod="WriteTexture";checkMethod="FullCopyT2B";dimension="1d"
webgpu:api,operation,command_buffer,image_copy:undefined_params:initMethod="WriteTexture";checkMethod="FullCopyT2B";dimension="2d"
webgpu:api,operation,command_buffer,image_copy:undefined_params:initMethod="WriteTexture";checkMethod="FullCopyT2B";dimension="3d"
webgpu:api,operation,command_buffer,image_copy:undefined_params:initMethod="WriteTexture";checkMethod="PartialCopyT2B";dimension="1d"
fails-if(dx12) webgpu:api,operation,command_buffer,image_copy:undefined_params:initMethod="WriteTexture";checkMethod="PartialCopyT2B";dimension="2d"
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It's odd that webgpu:api,operation,command_buffer,image_copy:undefined_params:initMethod="WriteTexture";checkMethod="PartialCopyT2B";dimension="2d" still fails after this PR's fix. I suspect there's more work to do here to make this correct.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Ah, it seems the full path that's failing is a single subtest exercising a 3rd dimension for the "2d" case: copySize=[3,3,2];origin=["_undef_","_undef_","_undef_"];bytesPerRow=256;rowsPerImage=3

Definitely follow-up work for something more edge-casey. Whew!

webgpu:api,operation,command_buffer,image_copy:undefined_params:initMethod="WriteTexture";checkMethod="PartialCopyT2B";dimension="3d"
webgpu:api,operation,command_buffer,image_copy:undefined_params:initMethod="CopyB2T";checkMethod="FullCopyT2B";dimension="1d"
webgpu:api,operation,command_buffer,image_copy:undefined_params:initMethod="CopyB2T";checkMethod="FullCopyT2B";dimension="2d"
fails-if(dx12,vulkan,metal) webgpu:api,operation,command_buffer,image_copy:undefined_params:initMethod="CopyB2T";checkMethod="FullCopyT2B";dimension="3d"
webgpu:api,operation,compute,basic:memcpy:*
//FAIL: webgpu:api,operation,compute,basic:large_dispatch:*
webgpu:api,operation,compute_pipeline,overrides:*
Expand Down Expand Up @@ -157,4 +166,4 @@ webgpu:shader,validation,statement,statement_behavior:invalid_statements:body="l
webgpu:shader,validation,statement,statement_behavior:invalid_statements:body="loop8"
webgpu:shader,validation,statement,statement_behavior:invalid_statements:body="switch1"
//FAIL: 9 invalid_statements subtests due to https://github.com/gfx-rs/wgpu/issues/7733
webgpu:api,validation,render_pipeline,vertex_state:many_attributes_overlapping:*
webgpu:api,validation,render_pipeline,vertex_state:many_attributes_overlapping:*
12 changes: 1 addition & 11 deletions tests/tests/wgpu-gpu/regression/issue_6827.rs
Original file line number Diff line number Diff line change
Expand Up @@ -25,17 +25,7 @@ static TEST_SCATTER: GpuTestConfiguration = GpuTestConfiguration::new()
.expect_fail(FailureCase::backend_adapter(
wgpu::Backends::METAL,
"Apple Paravirtual device", // CI on M1
))
.expect_fail(
// Unfortunately this depends on if `D3D12_FEATURE_DATA_D3D12_OPTIONS13.UnrestrictedBufferTextureCopyPitchSupported`
// is true, which we have no way to encode. This reproduces in CI though, so not too worried about it.
FailureCase::backend(wgpu::Backends::DX12)
.flaky()
.validation_error(
"D3D12_PLACED_SUBRESOURCE_FOOTPRINT::Offset must be a multiple of 512",
)
.panic("GraphicsCommandList::close failed: The parameter is incorrect"),
),
)),
)
.run_async(|ctx| async move { run_test(ctx, true).await });

Expand Down
16 changes: 16 additions & 0 deletions wgpu-hal/src/dx12/adapter.rs
Original file line number Diff line number Diff line change
Expand Up @@ -200,6 +200,21 @@ impl super::Adapter {
.is_ok()
};

let unrestricted_buffer_texture_copy_pitch_supported = {
let mut features13 = Direct3D12::D3D12_FEATURE_DATA_D3D12_OPTIONS13::default();
unsafe {
device.CheckFeatureSupport(
Direct3D12::D3D12_FEATURE_D3D12_OPTIONS13,
<*mut _>::cast(&mut features13),
size_of_val(&features13) as u32,
)
}
.is_ok()
&& features13
.UnrestrictedBufferTextureCopyPitchSupported
.as_bool()
};

let mut max_sampler_descriptor_heap_size =
Direct3D12::D3D12_MAX_SHADER_VISIBLE_SAMPLER_HEAP_SIZE;
{
Expand Down Expand Up @@ -304,6 +319,7 @@ impl super::Adapter {
suballocation_supported: !info.name.contains("Iris(R) Xe"),
shader_model,
max_sampler_descriptor_heap_size,
unrestricted_buffer_texture_copy_pitch_supported,
};

// Theoretically vram limited, but in practice 2^20 is the limit
Expand Down
159 changes: 148 additions & 11 deletions wgpu-hal/src/dx12/command.rs
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@ use crate::{
dxgi::{name::ObjectExt, result::HResult as _},
},
dx12::borrow_interface_temporarily,
AccelerationStructureEntries,
AccelerationStructureEntries, CommandEncoder as _,
};

fn make_box(origin: &wgt::Origin3d, size: &crate::CopyExtent) -> Direct3D12::D3D12_BOX {
Expand Down Expand Up @@ -312,6 +312,78 @@ impl super::CommandEncoder {
}
}
}

unsafe fn buf_tex_intermediate<T>(
&mut self,
region: crate::BufferTextureCopy,
tex_fmt: wgt::TextureFormat,
copy_op: impl FnOnce(&mut Self, &super::Buffer, wgt::BufferSize, crate::BufferTextureCopy) -> T,
) -> (T, super::Buffer) {
let size = {
let copy_info = region.buffer_layout.get_buffer_texture_copy_info(
tex_fmt,
region.texture_base.aspect.map(),
&region.size.into(),
);
copy_info.unwrap().bytes_in_copy
};

let size = wgt::BufferSize::new(size).unwrap();

let buffer = {
let (resource, allocation) =
super::suballocation::DeviceAllocationContext::from(&*self)
.create_buffer(&crate::BufferDescriptor {
label: None,
size: size.get(),
usage: wgt::BufferUses::COPY_SRC | wgt::BufferUses::COPY_DST,
memory_flags: crate::MemoryFlags::empty(),
})
.expect(concat!(
"internal error: ",
"failed to allocate intermediate buffer ",
"for offset alignment"
));
super::Buffer {
resource,
size: size.get(),
allocation,
}
};

let mut region = region;
region.buffer_layout.offset = 0;

unsafe {
self.transition_buffers(
[crate::BufferBarrier {
buffer: &buffer,
usage: crate::StateTransition {
from: wgt::BufferUses::empty(),
to: wgt::BufferUses::COPY_DST,
},
}]
.into_iter(),
)
};

let t = copy_op(self, &buffer, size, region);

unsafe {
self.transition_buffers(
[crate::BufferBarrier {
buffer: &buffer,
usage: crate::StateTransition {
from: wgt::BufferUses::COPY_DST,
to: wgt::BufferUses::COPY_SRC,
},
}]
.into_iter(),
)
};

(t, buffer)
}
}

impl crate::CommandEncoder for super::CommandEncoder {
Expand Down Expand Up @@ -366,6 +438,7 @@ impl crate::CommandEncoder for super::CommandEncoder {
Ok(super::CommandBuffer { raw })
}
unsafe fn reset_all<I: Iterator<Item = super::CommandBuffer>>(&mut self, command_buffers: I) {
self.intermediate_copy_bufs.clear();
for cmd_buf in command_buffers {
self.free_lists.push(cmd_buf.raw);
}
Expand Down Expand Up @@ -612,30 +685,59 @@ impl crate::CommandEncoder for super::CommandEncoder {
) where
T: Iterator<Item = crate::BufferTextureCopy>,
{
let list = self.list.as_ref().unwrap();
for r in regions {
let offset_alignment = self.shared.private_caps.texture_data_placement_alignment();

for naive_copy_region in regions {
let is_offset_aligned = naive_copy_region.buffer_layout.offset % offset_alignment == 0;
let (final_copy_region, src) = if is_offset_aligned {
(naive_copy_region, src)
} else {
let (intermediate_to_dst_region, intermediate_buf) = unsafe {
let src_offset = naive_copy_region.buffer_layout.offset;
self.buf_tex_intermediate(
naive_copy_region,
dst.format,
|this, buf, size, intermediate_to_dst_region| {
let layout = crate::BufferCopy {
src_offset,
dst_offset: 0,
size,
};
this.copy_buffer_to_buffer(src, buf, [layout].into_iter());
intermediate_to_dst_region
},
)
};
self.intermediate_copy_bufs.push(intermediate_buf);
let intermediate_buf = self.intermediate_copy_bufs.last().unwrap();
(intermediate_to_dst_region, intermediate_buf)
};

let list = self.list.as_ref().unwrap();

let src_location = Direct3D12::D3D12_TEXTURE_COPY_LOCATION {
pResource: unsafe { borrow_interface_temporarily(&src.resource) },
Type: Direct3D12::D3D12_TEXTURE_COPY_TYPE_PLACED_FOOTPRINT,
Anonymous: Direct3D12::D3D12_TEXTURE_COPY_LOCATION_0 {
PlacedFootprint: r.to_subresource_footprint(dst.format),
PlacedFootprint: final_copy_region.to_subresource_footprint(dst.format),
},
};
let dst_location = Direct3D12::D3D12_TEXTURE_COPY_LOCATION {
pResource: unsafe { borrow_interface_temporarily(&dst.resource) },
Type: Direct3D12::D3D12_TEXTURE_COPY_TYPE_SUBRESOURCE_INDEX,
Anonymous: Direct3D12::D3D12_TEXTURE_COPY_LOCATION_0 {
SubresourceIndex: dst.calc_subresource_for_copy(&r.texture_base),
SubresourceIndex: dst
.calc_subresource_for_copy(&final_copy_region.texture_base),
},
};

let src_box = make_box(&wgt::Origin3d::ZERO, &r.size);
let src_box = make_box(&wgt::Origin3d::ZERO, &final_copy_region.size);
unsafe {
list.CopyTextureRegion(
&dst_location,
r.texture_base.origin.x,
r.texture_base.origin.y,
r.texture_base.origin.z,
final_copy_region.texture_base.origin.x,
final_copy_region.texture_base.origin.y,
final_copy_region.texture_base.origin.z,
&src_location,
Some(&src_box),
)
Expand All @@ -652,8 +754,12 @@ impl crate::CommandEncoder for super::CommandEncoder {
) where
T: Iterator<Item = crate::BufferTextureCopy>,
{
let list = self.list.as_ref().unwrap();
for r in regions {
let copy_aligned = |this: &mut Self,
src: &super::Texture,
dst: &super::Buffer,
r: crate::BufferTextureCopy| {
let list = this.list.as_ref().unwrap();

let src_location = Direct3D12::D3D12_TEXTURE_COPY_LOCATION {
pResource: unsafe { borrow_interface_temporarily(&src.resource) },
Type: Direct3D12::D3D12_TEXTURE_COPY_TYPE_SUBRESOURCE_INDEX,
Expand All @@ -673,6 +779,37 @@ impl crate::CommandEncoder for super::CommandEncoder {
unsafe {
list.CopyTextureRegion(&dst_location, 0, 0, 0, &src_location, Some(&src_box))
};
};

let offset_alignment = self.shared.private_caps.texture_data_placement_alignment();

for r in regions {
let is_offset_aligned = r.buffer_layout.offset % offset_alignment == 0;
if is_offset_aligned {
copy_aligned(self, src, dst, r)
} else {
let orig_offset = r.buffer_layout.offset;
let (intermediate_to_dst_region, src) = unsafe {
self.buf_tex_intermediate(
r,
src.format,
|this, buf, size, intermediate_region| {
copy_aligned(this, src, buf, intermediate_region);
crate::BufferCopy {
src_offset: 0,
dst_offset: orig_offset,
size,
}
},
)
};

unsafe {
self.copy_buffer_to_buffer(&src, dst, [intermediate_to_dst_region].into_iter());
}

self.intermediate_copy_bufs.push(src);
};
}
}

Expand Down
1 change: 1 addition & 0 deletions wgpu-hal/src/dx12/device.rs
Original file line number Diff line number Diff line change
Expand Up @@ -796,6 +796,7 @@ impl crate::Device for super::Device {
mem_allocator: self.mem_allocator.clone(),
rtv_pool: Arc::clone(&self.rtv_pool),
temp_rtv_handles: Vec::new(),
intermediate_copy_bufs: Vec::new(),
null_rtv_handle: self.null_rtv_handle,
list: None,
free_lists: Vec::new(),
Expand Down
19 changes: 18 additions & 1 deletion wgpu-hal/src/dx12/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -96,7 +96,11 @@ use windows::{
core::{Free, Interface},
Win32::{
Foundation,
Graphics::{Direct3D, Direct3D12, DirectComposition, Dxgi},
Graphics::{
Direct3D,
Direct3D12::{self, D3D12_TEXTURE_DATA_PLACEMENT_ALIGNMENT},
DirectComposition, Dxgi,
},
System::Threading,
},
};
Expand Down Expand Up @@ -601,6 +605,17 @@ struct PrivateCapabilities {
suballocation_supported: bool,
shader_model: naga::back::hlsl::ShaderModel,
max_sampler_descriptor_heap_size: u32,
unrestricted_buffer_texture_copy_pitch_supported: bool,
}

impl PrivateCapabilities {
fn texture_data_placement_alignment(&self) -> u64 {
if self.unrestricted_buffer_texture_copy_pitch_supported {
4
} else {
D3D12_TEXTURE_DATA_PLACEMENT_ALIGNMENT.into()
}
}
}

#[derive(Default)]
Expand Down Expand Up @@ -840,6 +855,8 @@ pub struct CommandEncoder {
rtv_pool: Arc<Mutex<descriptor::CpuPool>>,
temp_rtv_handles: Vec<descriptor::Handle>,

intermediate_copy_bufs: Vec<Buffer>,

null_rtv_handle: descriptor::Handle,
list: Option<Direct3D12::ID3D12GraphicsCommandList>,
free_lists: Vec<Direct3D12::ID3D12GraphicsCommandList>,
Expand Down
30 changes: 30 additions & 0 deletions wgpu-hal/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2438,6 +2438,36 @@ pub struct CopyExtent {
pub depth: u32,
}

impl From<wgt::Extent3d> for CopyExtent {
fn from(value: wgt::Extent3d) -> Self {
let wgt::Extent3d {
width,
height,
depth_or_array_layers,
} = value;
Self {
width,
height,
depth: depth_or_array_layers,
}
}
}

impl From<CopyExtent> for wgt::Extent3d {
fn from(value: CopyExtent) -> Self {
let CopyExtent {
width,
height,
depth,
} = value;
Self {
width,
height,
depth_or_array_layers: depth,
}
}
}

#[derive(Clone, Debug)]
pub struct TextureCopy {
pub src_base: TextureCopyBase,
Expand Down