diff --git a/CHANGELOG.md b/CHANGELOG.md index 76b78382d68..21e129ec6d0 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -52,6 +52,8 @@ Bottom level categories: - Added `Buffer` methods corresponding to `BufferSlice` methods, so you can skip creating a `BufferSlice` when it offers no benefit, and `BufferSlice::slice()` for sub-slicing a slice. By @kpreid in [#7123](https://github.com/gfx-rs/wgpu/pull/7123). +- Add `util::StagingBelt::allocate()` so the staging belt can be used to write textures. By @kpreid in [#6900](https://github.com/gfx-rs/wgpu/pull/6900). + #### Naga - Support @must_use attribute on function declarations. By @turbocrime in [#6801](https://github.com/gfx-rs/wgpu/pull/6801). diff --git a/wgpu/src/util/belt.rs b/wgpu/src/util/belt.rs index d6ef7a0c466..90d51d7f489 100644 --- a/wgpu/src/util/belt.rs +++ b/wgpu/src/util/belt.rs @@ -3,51 +3,29 @@ use crate::{ BufferViewMut, CommandEncoder, Device, MapMode, }; use std::fmt; -use std::sync::{mpsc, Arc}; - -struct Chunk { - buffer: Arc, - size: BufferAddress, - offset: BufferAddress, -} - -/// `Sync` wrapper that works by providing only exclusive access. -/// -/// See https://doc.rust-lang.org/nightly/std/sync/struct.Exclusive.html -struct Exclusive(T); - -unsafe impl Sync for Exclusive {} - -impl Exclusive { - fn new(value: T) -> Self { - Self(value) - } - - fn get_mut(&mut self) -> &mut T { - &mut self.0 - } -} +use std::sync::mpsc; /// Efficiently performs many buffer writes by sharing and reusing temporary buffers. /// /// Internally it uses a ring-buffer of staging buffers that are sub-allocated. -/// It has an advantage over [`Queue::write_buffer()`] in a way that it returns a mutable slice, -/// which you can fill to avoid an extra data copy. +/// Its advantage over [`Queue::write_buffer_with()`] is that the individual allocations +/// are cheaper; `StagingBelt` is most useful when you are writing very many small pieces +/// of data. It can be understood as a sort of arena allocator. /// /// Using a staging belt is slightly complicated, and generally goes as follows: -/// 1. Write to buffers that need writing to using [`StagingBelt::write_buffer()`]. +/// 1. Use [`StagingBelt::write_buffer()`] or [`StagingBelt::allocate()`] to allocate +/// buffer slices, then write your data to them. /// 2. Call [`StagingBelt::finish()`]. /// 3. Submit all command encoders that were used in step 1. /// 4. Call [`StagingBelt::recall()`]. /// -/// [`Queue::write_buffer()`]: crate::Queue::write_buffer +/// [`Queue::write_buffer_with()`]: crate::Queue::write_buffer_with pub struct StagingBelt { chunk_size: BufferAddress, /// Chunks into which we are accumulating data to be transferred. active_chunks: Vec, /// Chunks that have scheduled transfers already; they are unmapped and some - /// command encoder has one or more `copy_buffer_to_buffer` commands with them - /// as source. + /// command encoder has one or more commands with them as source. closed_chunks: Vec, /// Chunks that are back from the GPU and ready to be mapped for write and put /// into `active_chunks`. @@ -70,7 +48,7 @@ impl StagingBelt { /// (per [`StagingBelt::finish()`]); and /// * bigger is better, within these bounds. pub fn new(chunk_size: BufferAddress) -> Self { - let (sender, receiver) = std::sync::mpsc::channel(); + let (sender, receiver) = mpsc::channel(); StagingBelt { chunk_size, active_chunks: Vec::new(), @@ -81,7 +59,7 @@ impl StagingBelt { } } - /// Allocate the staging belt slice of `size` to be uploaded into the `target` buffer + /// Allocate a staging belt slice of `size` to be copied into the `target` buffer /// at the specified offset. /// /// The upload will be placed into the provided command encoder. This encoder @@ -99,10 +77,61 @@ impl StagingBelt { size: BufferSize, device: &Device, ) -> BufferViewMut<'_> { + let (mapped, belt_buffer, offset_in_belt_buffer) = self.allocate( + size, + const { BufferSize::new(crate::COPY_BUFFER_ALIGNMENT).unwrap() }, + device, + ); + encoder.copy_buffer_to_buffer( + belt_buffer, + offset_in_belt_buffer, + target, + offset, + size.get(), + ); + mapped + } + + /// Allocate a staging belt slice with the given `size` and `alignment` and return it. + /// + /// This allows you to do whatever you want with the slice after, such as + /// copying it to a texture or executing a compute shader that reads it, whereas + /// [`StagingBelt::write_buffer()`] can only write to other buffers. + /// + /// If the `size` is greater than the space available in any free internal buffer, a new buffer + /// will be allocated for it. Therefore, the `chunk_size` passed to [`StagingBelt::new()`] + /// should ideally be larger than every such size. + /// + /// The chosen slice will be positioned within the buffer at a multiple of `alignment`, + /// which may be used to meet alignment requirements for the operation you wish to perform + /// with the slice. This does not necessarily affect the alignment of the mapping. + /// + /// Three values are returned: + /// + /// * The mapped buffer view which you should write into from the CPU side (Rust code). + /// * The buffer containing the slice, for you to use in GPU commands. + /// All commands involving this slice must be submitted after + /// [`StagingBelt::finish()`] is called and before [`StagingBelt::recall()`] is called. + /// * The offset within the buffer at which the slice starts. + /// This offset should be used for all GPU commands, and should not be used with + /// the mapped buffer view. + pub fn allocate( + &mut self, + size: BufferSize, + alignment: BufferSize, + device: &Device, + ) -> (BufferViewMut<'_>, &Buffer, BufferAddress) { + assert!( + alignment.get().is_power_of_two(), + "alignment must be a power of two, not {alignment}" + ); + // At minimum, we must have alignment sufficient to map the buffer. + let alignment = alignment.get().max(crate::MAP_ALIGNMENT); + let mut chunk = if let Some(index) = self .active_chunks .iter() - .position(|chunk| chunk.offset + size.get() <= chunk.size) + .position(|chunk| chunk.can_allocate(size, alignment)) { self.active_chunks.swap_remove(index) } else { @@ -111,35 +140,35 @@ impl StagingBelt { if let Some(index) = self .free_chunks .iter() - .position(|chunk| size.get() <= chunk.size) + .position(|chunk| chunk.can_allocate(size, alignment)) { self.free_chunks.swap_remove(index) } else { - let size = self.chunk_size.max(size.get()); Chunk { - buffer: Arc::new(device.create_buffer(&BufferDescriptor { + buffer: device.create_buffer(&BufferDescriptor { label: Some("(wgpu internal) StagingBelt staging buffer"), - size, + size: self.chunk_size.max(size.get()), usage: BufferUsages::MAP_WRITE | BufferUsages::COPY_SRC, mapped_at_creation: true, - })), - size, + }), offset: 0, } } }; - encoder.copy_buffer_to_buffer(&chunk.buffer, chunk.offset, target, offset, size.get()); - let old_offset = chunk.offset; - chunk.offset = align_to(chunk.offset + size.get(), crate::MAP_ALIGNMENT); + let allocation_offset = chunk.allocate(size, alignment); self.active_chunks.push(chunk); - self.active_chunks - .last() - .unwrap() - .buffer - .slice(old_offset..old_offset + size.get()) - .get_mapped_range_mut() + let chunk = self.active_chunks.last().unwrap(); + + ( + chunk + .buffer + .slice(allocation_offset..allocation_offset + size.get()) + .get_mapped_range_mut(), + &chunk.buffer, + allocation_offset, + ) } /// Prepare currently mapped buffers for use in a submission. @@ -197,3 +226,47 @@ impl fmt::Debug for StagingBelt { .finish_non_exhaustive() } } + +struct Chunk { + buffer: Buffer, + offset: BufferAddress, +} + +impl Chunk { + fn can_allocate(&self, size: BufferSize, alignment: BufferAddress) -> bool { + let alloc_start = align_to(self.offset, alignment); + let alloc_end = alloc_start + size.get(); + + alloc_end <= self.buffer.size() + } + + fn allocate(&mut self, size: BufferSize, alignment: BufferAddress) -> BufferAddress { + let alloc_start = align_to(self.offset, alignment); + let alloc_end = alloc_start + size.get(); + + assert!(alloc_end <= self.buffer.size()); + self.offset = alloc_end; + alloc_start + } +} + +use exclusive::Exclusive; +mod exclusive { + /// `Sync` wrapper that works by providing only exclusive access. + /// + /// See + pub(super) struct Exclusive(T); + + /// Safety: `&Exclusive` has no operations. + unsafe impl Sync for Exclusive {} + + impl Exclusive { + pub fn new(value: T) -> Self { + Self(value) + } + + pub fn get_mut(&mut self) -> &mut T { + &mut self.0 + } + } +}