From 1fba25d783245320dbb3c09eae1263c184031d78 Mon Sep 17 00:00:00 2001 From: Andreas Reich Date: Fri, 31 Mar 2023 16:53:13 +0200 Subject: [PATCH 1/2] Replace TextureRowDataInfo with the more versatile Texture2DBufferInfo --- .../src/allocator/cpu_write_gpu_read_belt.rs | 10 +- .../src/allocator/gpu_readback_belt.rs | 10 +- .../src/draw_phases/picking_layer.rs | 44 ++++----- .../re_renderer/src/draw_phases/screenshot.rs | 13 ++- .../re_renderer/src/renderer/depth_cloud.rs | 21 ++--- crates/re_renderer/src/wgpu_resources/mod.rs | 91 +++++++++++++++---- 6 files changed, 117 insertions(+), 72 deletions(-) diff --git a/crates/re_renderer/src/allocator/cpu_write_gpu_read_belt.rs b/crates/re_renderer/src/allocator/cpu_write_gpu_read_belt.rs index db9f14b0a8fa..a3900e18a143 100644 --- a/crates/re_renderer/src/allocator/cpu_write_gpu_read_belt.rs +++ b/crates/re_renderer/src/allocator/cpu_write_gpu_read_belt.rs @@ -1,6 +1,6 @@ use std::{num::NonZeroU32, sync::mpsc}; -use crate::wgpu_resources::{BufferDesc, GpuBuffer, GpuBufferPool, TextureRowDataInfo}; +use crate::wgpu_resources::{BufferDesc, GpuBuffer, GpuBufferPool, Texture2DBufferInfo}; /// A sub-allocated staging buffer that can be written to. /// @@ -100,15 +100,13 @@ where destination: wgpu::ImageCopyTexture<'_>, copy_extent: glam::UVec2, ) { - let bytes_per_row = TextureRowDataInfo::new(destination.texture.format(), copy_extent.x) - .bytes_per_row_padded; + let buffer_info = Texture2DBufferInfo::new(destination.texture.format(), copy_extent); // Validate that we stay within the written part of the slice (wgpu can't fully know our intention here, so we have to check). // We go one step further and require the size to be exactly equal - it's too unlikely that you wrote more than is needed! // (and if you did you probably have regrets anyways!) - let required_buffer_size = bytes_per_row * copy_extent.y; debug_assert_eq!( - required_buffer_size as usize, + buffer_info.buffer_size_padded as usize, self.num_written() * std::mem::size_of::() ); @@ -117,7 +115,7 @@ where buffer: &self.chunk_buffer, layout: wgpu::ImageDataLayout { offset: self.byte_offset_in_chunk_buffer, - bytes_per_row: NonZeroU32::new(bytes_per_row), + bytes_per_row: NonZeroU32::new(buffer_info.bytes_per_row_padded), rows_per_image: None, }, }, diff --git a/crates/re_renderer/src/allocator/gpu_readback_belt.rs b/crates/re_renderer/src/allocator/gpu_readback_belt.rs index 09fcdd981054..6facafc3e761 100644 --- a/crates/re_renderer/src/allocator/gpu_readback_belt.rs +++ b/crates/re_renderer/src/allocator/gpu_readback_belt.rs @@ -1,6 +1,6 @@ use std::{num::NonZeroU32, ops::Range, sync::mpsc}; -use crate::wgpu_resources::{BufferDesc, GpuBuffer, GpuBufferPool, TextureRowDataInfo}; +use crate::wgpu_resources::{BufferDesc, GpuBuffer, GpuBufferPool, Texture2DBufferInfo}; /// Identifier used to identify a buffer upon retrieval of the data. /// @@ -61,13 +61,11 @@ impl GpuReadbackBuffer { source.texture.format().describe().block_size as u64, ); - let bytes_per_row = TextureRowDataInfo::new(source.texture.format(), copy_extents.x) - .bytes_per_row_padded; - let num_bytes = bytes_per_row * copy_extents.y; + let buffer_info = Texture2DBufferInfo::new(source.texture.format(), *copy_extents); // Validate that stay within the slice (wgpu can't fully know our intention here, so we have to check). debug_assert!( - (num_bytes as u64) <= self.range_in_chunk.end - start_offset, + buffer_info.buffer_size_padded <= self.range_in_chunk.end - start_offset, "Texture data is too large to fit into the readback buffer!" ); @@ -77,7 +75,7 @@ impl GpuReadbackBuffer { buffer: &self.chunk_buffer, layout: wgpu::ImageDataLayout { offset: start_offset, - bytes_per_row: NonZeroU32::new(bytes_per_row), + bytes_per_row: NonZeroU32::new(buffer_info.bytes_per_row_padded), rows_per_image: None, }, }, diff --git a/crates/re_renderer/src/draw_phases/picking_layer.rs b/crates/re_renderer/src/draw_phases/picking_layer.rs index 8a52147077ba..5270f4bf6093 100644 --- a/crates/re_renderer/src/draw_phases/picking_layer.rs +++ b/crates/re_renderer/src/draw_phases/picking_layer.rs @@ -13,7 +13,7 @@ use crate::{ allocator::create_and_fill_uniform_buffer, global_bindings::FrameUniformBuffer, view_builder::ViewBuilder, - wgpu_resources::{GpuBindGroup, GpuTexture, TextureDesc, TextureRowDataInfo}, + wgpu_resources::{GpuBindGroup, GpuTexture, Texture2DBufferInfo, TextureDesc}, DebugLabel, GpuReadbackBuffer, GpuReadbackIdentifier, IntRect, RenderContext, }; @@ -122,12 +122,23 @@ impl PickingLayerProcessor { readback_identifier: GpuReadbackIdentifier, readback_user_data: T, ) -> Self { - let row_info = TextureRowDataInfo::new(Self::PICKING_LAYER_FORMAT, picking_rect.width()); - let buffer_size = row_info.bytes_per_row_padded * picking_rect.height(); + let row_info_id = Texture2DBufferInfo::new(Self::PICKING_LAYER_FORMAT, picking_rect.extent); + //let row_info_depth = + //Texture2DBufferInfo::new(Self::PICKING_LAYER_FORMAT, picking_rect.extent); + + // Offset of the depth buffer in the readback buffer needs to be aligned to size of a depth pixel. + // This is "trivially true" if the size of the depth format is a multiple of the size of the id format. + debug_assert!( + Self::PICKING_LAYER_FORMAT.describe().block_size + % Self::PICKING_LAYER_DEPTH_FORMAT.describe().block_size + == 0 + ); + let buffer_size = row_info_id.buffer_size_padded; // + row_info_depth.buffer_size_padded; + let readback_buffer = ctx.gpu_readback_belt.lock().allocate( &ctx.device, &ctx.gpu_resources.buffers, - buffer_size as u64, + buffer_size, readback_identifier, Box::new(ReadbackBeltMetadata { picking_rect, @@ -279,31 +290,12 @@ impl PickingLayerProcessor { ctx.gpu_readback_belt .lock() .readback_data::>(identifier, |data, metadata| { - // Due to https://github.com/gfx-rs/wgpu/issues/3508 the data might be completely unaligned, - // so much, that we can't interpret it just as `PickingLayerId`. - // Therefore, we have to do a copy of the data regardless. - let row_info = TextureRowDataInfo::new( + let buffer_info_id = Texture2DBufferInfo::new( Self::PICKING_LAYER_FORMAT, - metadata.picking_rect.extent.x, + metadata.picking_rect.extent, ); - // Copies need to use [u8] because of aforementioned alignment issues. - let mut picking_data = vec![ - PickingLayerId::default(); - (metadata.picking_rect.extent.x * metadata.picking_rect.extent.y) - as usize - ]; - let picking_data_as_u8 = bytemuck::cast_slice_mut(&mut picking_data); - for row in 0..metadata.picking_rect.extent.y { - let offset_padded = (row_info.bytes_per_row_padded * row) as usize; - let offset_unpadded = (row_info.bytes_per_row_unpadded * row) as usize; - picking_data_as_u8[offset_unpadded - ..(offset_unpadded + row_info.bytes_per_row_unpadded as usize)] - .copy_from_slice( - &data[offset_padded - ..(offset_padded + row_info.bytes_per_row_unpadded as usize)], - ); - } + let picking_data = buffer_info_id.remove_padding_and_convert(data); result = Some(PickingResult { picking_data, diff --git a/crates/re_renderer/src/draw_phases/screenshot.rs b/crates/re_renderer/src/draw_phases/screenshot.rs index 875284986c90..68c05b3b545c 100644 --- a/crates/re_renderer/src/draw_phases/screenshot.rs +++ b/crates/re_renderer/src/draw_phases/screenshot.rs @@ -11,7 +11,7 @@ //! Or alternatively try to render the images in several tiles 🤔. In any case this would greatly improve quality! use crate::{ - wgpu_resources::{GpuTexture, TextureDesc, TextureRowDataInfo}, + wgpu_resources::{GpuTexture, Texture2DBufferInfo, TextureDesc}, DebugLabel, GpuReadbackBuffer, GpuReadbackIdentifier, RenderContext, }; @@ -37,12 +37,11 @@ impl ScreenshotProcessor { readback_identifier: GpuReadbackIdentifier, readback_user_data: T, ) -> Self { - let row_info = TextureRowDataInfo::new(Self::SCREENSHOT_COLOR_FORMAT, resolution.x); - let buffer_size = row_info.bytes_per_row_padded * resolution.y; + let buffer_info = Texture2DBufferInfo::new(Self::SCREENSHOT_COLOR_FORMAT, resolution); let screenshot_readback_buffer = ctx.gpu_readback_belt.lock().allocate( &ctx.device, &ctx.gpu_resources.buffers, - buffer_size as u64, + buffer_info.buffer_size_padded, readback_identifier, Box::new(ReadbackBeltMetadata { extent: resolution, @@ -130,9 +129,9 @@ impl ScreenshotProcessor { .lock() .readback_data::>(identifier, |data: &[u8], metadata| { screenshot_was_available = Some(()); - let texture_row_info = - TextureRowDataInfo::new(Self::SCREENSHOT_COLOR_FORMAT, metadata.extent.x); - let texture_data = texture_row_info.remove_padding(data); + let buffer_info = + Texture2DBufferInfo::new(Self::SCREENSHOT_COLOR_FORMAT, metadata.extent); + let texture_data = buffer_info.remove_padding(data); on_screenshot(&texture_data, metadata.extent, metadata.user_data); }); screenshot_was_available diff --git a/crates/re_renderer/src/renderer/depth_cloud.rs b/crates/re_renderer/src/renderer/depth_cloud.rs index 358132110442..c69def6eca10 100644 --- a/crates/re_renderer/src/renderer/depth_cloud.rs +++ b/crates/re_renderer/src/renderer/depth_cloud.rs @@ -21,8 +21,8 @@ use crate::{ view_builder::ViewBuilder, wgpu_resources::{ BindGroupDesc, BindGroupEntry, BindGroupLayoutDesc, GpuBindGroup, GpuBindGroupLayoutHandle, - GpuRenderPipelineHandle, GpuTexture, PipelineLayoutDesc, RenderPipelineDesc, TextureDesc, - TextureRowDataInfo, + GpuRenderPipelineHandle, GpuTexture, PipelineLayoutDesc, RenderPipelineDesc, + Texture2DBufferInfo, TextureDesc, }, ColorMap, OutlineMaskPreference, PickingLayerProcessor, }; @@ -336,34 +336,33 @@ fn create_and_upload_texture( .textures .alloc(&ctx.device, &depth_texture_desc); - let TextureRowDataInfo { - bytes_per_row_unpadded: bytes_per_row_unaligned, - bytes_per_row_padded, - } = TextureRowDataInfo::new(depth_texture_desc.format, depth_texture_desc.size.width); - // Not supporting compressed formats here. debug_assert!(depth_texture_desc.format.describe().block_dimensions == (1, 1)); + let buffer_info = + Texture2DBufferInfo::new(depth_texture_desc.format, depth_cloud.depth_dimensions); + // TODO(andreas): CpuGpuWriteBelt should make it easier to do this. - let bytes_padding_per_row = (bytes_per_row_padded - bytes_per_row_unaligned) as usize; + let bytes_padding_per_row = + (buffer_info.bytes_per_row_padded - buffer_info.bytes_per_row_unpadded) as usize; // Sanity check the padding size. If this happens something is seriously wrong, as it would imply // that we can't express the required alignment with the block size. debug_assert!( bytes_padding_per_row % std::mem::size_of::() == 0, "Padding is not a multiple of pixel size. Can't correctly pad the texture data" ); - let num_pixel_padding_per_row = bytes_padding_per_row / std::mem::size_of::(); let mut depth_texture_staging = ctx.cpu_write_gpu_read_belt.lock().allocate::( &ctx.device, &ctx.gpu_resources.buffers, - data.len() + num_pixel_padding_per_row * depth_texture_desc.size.height as usize, + buffer_info.buffer_size_padded as usize / std::mem::size_of::(), ); // Fill with a single copy if possible, otherwise do multiple, filling in padding. - if num_pixel_padding_per_row == 0 { + if bytes_padding_per_row == 0 { depth_texture_staging.extend_from_slice(data); } else { + let num_pixel_padding_per_row = bytes_padding_per_row / std::mem::size_of::(); for row in data.chunks(depth_texture_desc.size.width as usize) { depth_texture_staging.extend_from_slice(row); depth_texture_staging diff --git a/crates/re_renderer/src/wgpu_resources/mod.rs b/crates/re_renderer/src/wgpu_resources/mod.rs index ef272248e3dd..a61f4eef26bc 100644 --- a/crates/re_renderer/src/wgpu_resources/mod.rs +++ b/crates/re_renderer/src/wgpu_resources/mod.rs @@ -117,9 +117,9 @@ impl WgpuResourcePools { } } -/// Utility for dealing with rows of raw texture data. -#[derive(Clone, Copy)] -pub struct TextureRowDataInfo { +/// Utility for dealing with buffers containing raw 2D texture data. +#[derive(Clone)] +pub struct Texture2DBufferInfo { /// How many bytes per row contain actual data. pub bytes_per_row_unpadded: u32, @@ -127,34 +127,57 @@ pub struct TextureRowDataInfo { /// /// Padding bytes are always at the end of a row. pub bytes_per_row_padded: u32, + + /// Size required for an unpadded buffer. + pub buffer_size_unpadded: wgpu::BufferAddress, + + /// Size required for a padded buffer as it is read/written from/to the GPU. + pub buffer_size_padded: wgpu::BufferAddress, } -impl TextureRowDataInfo { - pub fn new(format: wgpu::TextureFormat, width: u32) -> Self { +impl Texture2DBufferInfo { + #[inline] + pub fn new(format: wgpu::TextureFormat, extent: glam::UVec2) -> Self { let format_info = format.describe(); - let width_blocks = width / format_info.block_dimensions.0 as u32; - let bytes_per_row_unaligned = width_blocks * format_info.block_size as u32; + + let width_blocks = extent.x / format_info.block_dimensions.0 as u32; + let height_blocks = extent.y / format_info.block_dimensions.1 as u32; + + let bytes_per_row_unpadded = width_blocks * format_info.block_size as u32; + let bytes_per_row_padded = + wgpu::util::align_to(bytes_per_row_unpadded, wgpu::COPY_BYTES_PER_ROW_ALIGNMENT); Self { - bytes_per_row_unpadded: bytes_per_row_unaligned, - bytes_per_row_padded: wgpu::util::align_to( - bytes_per_row_unaligned, - wgpu::COPY_BYTES_PER_ROW_ALIGNMENT, - ), + bytes_per_row_unpadded, + bytes_per_row_padded, + buffer_size_unpadded: (bytes_per_row_unpadded * height_blocks) as wgpu::BufferAddress, + buffer_size_padded: (bytes_per_row_padded * height_blocks) as wgpu::BufferAddress, } } + #[inline] + pub fn num_rows(&self) -> u32 { + self.buffer_size_padded as u32 / self.bytes_per_row_padded + } + /// Removes the padding from a buffer containing gpu texture data. + /// + /// The buffer have the expected size for a padded buffer to hold the texture data. + /// + /// Note that if you're passing in gpu data, there no alignment guarantees on the returned slice, + /// do NOT convert it using [`bytemuck`]. Use [`Texture2DBufferInfo::remove_padding_and_convert`] instead. pub fn remove_padding<'a>(&self, buffer: &'a [u8]) -> Cow<'a, [u8]> { + crate::profile_function!(); + + assert!(buffer.len() as wgpu::BufferAddress == self.buffer_size_padded); + if self.bytes_per_row_padded == self.bytes_per_row_unpadded { return Cow::Borrowed(buffer); } - let height = (buffer.len() as u32) / self.bytes_per_row_padded; - let mut unpadded_buffer = - Vec::with_capacity((self.bytes_per_row_unpadded * height) as usize); + let mut unpadded_buffer = Vec::with_capacity(self.buffer_size_unpadded as _); - for row in 0..height { + for row in 0..self.num_rows() { let offset = (self.bytes_per_row_padded * row) as usize; unpadded_buffer.extend_from_slice( &buffer[offset..(offset + self.bytes_per_row_unpadded as usize)], @@ -163,4 +186,40 @@ impl TextureRowDataInfo { unpadded_buffer.into() } + + /// Removes the padding from a buffer containing gpu texture data and remove convert to a given type. + /// + /// The buffer have the expected size for a padded buffer to hold the texture data. + /// + /// The unpadded row size is expected to be a multiple of the size of the target type. + /// (Which means that, while uncommon, it technically doesn't need to be as big as a block in the pixel - this can be useful for e.g. packing wide bitfields) + pub fn remove_padding_and_convert(&self, buffer: &[u8]) -> Vec { + crate::profile_function!(); + + assert!(buffer.len() as wgpu::BufferAddress == self.buffer_size_padded); + assert!(self.bytes_per_row_unpadded % std::mem::size_of::() as u32 == 0); + + // Due to https://github.com/gfx-rs/wgpu/issues/3508 the data might be completely unaligned, + // so much, that we can't even interpret it as e.g. a u32 slice. + // Therefore, we have to do a copy of the data regardless of whether it's padded or not. + + let mut unpadded_buffer: Vec = vec![ + T::zeroed(); + (self.num_rows() * self.bytes_per_row_unpadded / std::mem::size_of::() as u32) + as usize + ]; // Consider using unsafe set_len() instead of vec![] to avoid zeroing the memory. + let unpaadded_buffer_u8 = bytemuck::cast_slice_mut(&mut unpadded_buffer); + + for row in 0..self.num_rows() { + let offset_padded = (self.bytes_per_row_padded * row) as usize; + let offset_unpadded = (self.bytes_per_row_unpadded * row) as usize; + unpaadded_buffer_u8 + [offset_unpadded..(offset_unpadded + self.bytes_per_row_unpadded as usize)] + .copy_from_slice( + &buffer[offset_padded..(offset_padded + self.bytes_per_row_unpadded as usize)], + ); + } + + unpadded_buffer + } } From ec8adf84b4e1b0f70f7c54b53144e8a011e8f175 Mon Sep 17 00:00:00 2001 From: Andreas Reich Date: Mon, 3 Apr 2023 10:32:04 +0200 Subject: [PATCH 2/2] comment & naming fixes --- crates/re_renderer/src/wgpu_resources/mod.rs | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/crates/re_renderer/src/wgpu_resources/mod.rs b/crates/re_renderer/src/wgpu_resources/mod.rs index a61f4eef26bc..403e46350b47 100644 --- a/crates/re_renderer/src/wgpu_resources/mod.rs +++ b/crates/re_renderer/src/wgpu_resources/mod.rs @@ -162,7 +162,7 @@ impl Texture2DBufferInfo { /// Removes the padding from a buffer containing gpu texture data. /// - /// The buffer have the expected size for a padded buffer to hold the texture data. + /// The passed in buffer is to be expected to be exactly of size [`Texture2DBufferInfo::buffer_size_padded`]. /// /// Note that if you're passing in gpu data, there no alignment guarantees on the returned slice, /// do NOT convert it using [`bytemuck`]. Use [`Texture2DBufferInfo::remove_padding_and_convert`] instead. @@ -189,7 +189,7 @@ impl Texture2DBufferInfo { /// Removes the padding from a buffer containing gpu texture data and remove convert to a given type. /// - /// The buffer have the expected size for a padded buffer to hold the texture data. + /// The passed in buffer is to be expected to be exactly of size [`Texture2DBufferInfo::buffer_size_padded`]. /// /// The unpadded row size is expected to be a multiple of the size of the target type. /// (Which means that, while uncommon, it technically doesn't need to be as big as a block in the pixel - this can be useful for e.g. packing wide bitfields) @@ -207,13 +207,15 @@ impl Texture2DBufferInfo { T::zeroed(); (self.num_rows() * self.bytes_per_row_unpadded / std::mem::size_of::() as u32) as usize - ]; // Consider using unsafe set_len() instead of vec![] to avoid zeroing the memory. - let unpaadded_buffer_u8 = bytemuck::cast_slice_mut(&mut unpadded_buffer); + ]; // TODO(andreas): Consider using unsafe set_len() instead of vec![] to avoid zeroing the memory. + + // The copy has to happen on a u8 slice, because any other type would assume some alignment that we can't guarantee because of the above. + let unpadded_buffer_u8_view = bytemuck::cast_slice_mut(&mut unpadded_buffer); for row in 0..self.num_rows() { let offset_padded = (self.bytes_per_row_padded * row) as usize; let offset_unpadded = (self.bytes_per_row_unpadded * row) as usize; - unpaadded_buffer_u8 + unpadded_buffer_u8_view [offset_unpadded..(offset_unpadded + self.bytes_per_row_unpadded as usize)] .copy_from_slice( &buffer[offset_padded..(offset_padded + self.bytes_per_row_unpadded as usize)],