rerun-io · Wumpf · Mar 27, 2023 · Mar 23, 2023 · Mar 23, 2023 · Mar 27, 2023
diff --git a/crates/re_renderer/src/allocator/cpu_write_gpu_read_belt.rs b/crates/re_renderer/src/allocator/cpu_write_gpu_read_belt.rs
@@ -1,6 +1,6 @@
 use std::{num::NonZeroU32, ops::DerefMut, sync::mpsc};
 
-use crate::wgpu_resources::{BufferDesc, GpuBuffer, GpuBufferPool};
+use crate::wgpu_resources::{texture_row_data_info, BufferDesc, GpuBuffer, GpuBufferPool};
 
 /// A sub-allocated staging buffer that can be written to.
 ///
@@ -85,26 +85,46 @@ where
         self.unwritten_element_range.start
     }
 
-    /// Copies the entire buffer to a texture and drops it.
-    pub fn copy_to_texture(
+    /// Copies all so far written data to a rectangle on a single 2d texture layer.
+    ///
+    /// Assumes that the buffer consists of as-tightly-packed-as-possible rows of data.
+    /// (taking into account required padding as specified by [`wgpu::COPY_BYTES_PER_ROW_ALIGNMENT`])
+    ///
+    /// Implementation note:
+    /// Does 2D-only entirely for convenience as it greatly simplifies the input parameters.
+    pub fn copy_to_texture2d(
         self,
         encoder: &mut wgpu::CommandEncoder,
         destination: wgpu::ImageCopyTexture<'_>,
-        bytes_per_row: Option<NonZeroU32>,
-        rows_per_image: Option<NonZeroU32>,
-        copy_size: wgpu::Extent3d,
+        copy_extent: glam::UVec2,
     ) {
+        let bytes_per_row =
+            texture_row_data_info(destination.texture.format(), copy_extent.x).bytes_per_row_padded;
+
+        // Validate that we stay within the written part of the slice (wgpu can't fully know our intention here, so we have to check).
+        // We go one step further and require the size to be exactly equal - it's too unlikely that you wrote more than is needed!
+        // (and if you did you probably have regrets anyways!)
+        let required_buffer_size = bytes_per_row * copy_extent.y;
+        debug_assert_eq!(
+            required_buffer_size as usize,
+            self.num_written() * std::mem::size_of::<T>()
+        );
+
         encoder.copy_buffer_to_texture(
             wgpu::ImageCopyBuffer {
                 buffer: &self.chunk_buffer,
                 layout: wgpu::ImageDataLayout {
                     offset: self.byte_offset_in_chunk_buffer,
-                    bytes_per_row,
-                    rows_per_image,
+                    bytes_per_row: NonZeroU32::new(bytes_per_row),
+                    rows_per_image: None,
                 },
             },
             destination,
-            copy_size,
+            wgpu::Extent3d {
+                width: copy_extent.x,
+                height: copy_extent.y,
+                depth_or_array_layers: 1,
+            },
         );
     }
 

diff --git a/crates/re_renderer/src/renderer/depth_cloud.rs b/crates/re_renderer/src/renderer/depth_cloud.rs
@@ -12,7 +12,6 @@
 //! behaves pretty much exactly like our point cloud renderer (see [`point_cloud.rs`]).
 
 use smallvec::smallvec;
-use std::num::NonZeroU32;
 
 use crate::{
     allocator::create_and_fill_uniform_buffer_batch,
@@ -330,17 +329,15 @@ fn create_and_upload_texture<T: bytemuck::Pod>(
         }
     }
 
-    depth_texture_staging.copy_to_texture(
+    depth_texture_staging.copy_to_texture2d(
         ctx.active_frame.before_view_builder_encoder.lock().get(),
         wgpu::ImageCopyTexture {
             texture: &depth_texture.inner.texture,
             mip_level: 0,
             origin: wgpu::Origin3d::ZERO,
             aspect: wgpu::TextureAspect::All,
         },
-        Some(NonZeroU32::new(bytes_per_row_padded).expect("invalid bytes per row")),
-        None,
-        depth_texture_size,
+        depth_cloud.depth_dimensions,
     );
 
     depth_texture

diff --git a/crates/re_renderer/src/renderer/point_cloud.rs b/crates/re_renderer/src/renderer/point_cloud.rs
@@ -13,10 +13,7 @@
 //! that srgb->linear conversion happens on texture load.
 //!
 
-use std::{
-    num::{NonZeroU32, NonZeroU64},
-    ops::Range,
-};
+use std::{num::NonZeroU64, ops::Range};
 
 use crate::{
     allocator::create_and_fill_uniform_buffer_batch, renderer::OutlineMaskProcessor, DebugLabel,
@@ -25,7 +22,6 @@ use crate::{
 use bitflags::bitflags;
 use bytemuck::Zeroable;
 use enumset::{enum_set, EnumSet};
-use itertools::Itertools;
 use smallvec::smallvec;
 
 use crate::{
@@ -175,7 +171,7 @@ impl PointCloudDrawData {
     /// If no batches are passed, all points are assumed to be in a single batch with identity transform.
     pub fn new<T>(
         ctx: &mut RenderContext,
-        builder: PointCloudBuilder<T>,
+        mut builder: PointCloudBuilder<T>,
     ) -> Result<Self, PointCloudDrawDataError> {
         crate::profile_function!();
 
@@ -266,63 +262,54 @@ impl PointCloudDrawData {
             },
         );
 
-        // TODO(andreas): We want a staging-belt(-like) mechanism to upload data instead of the queue.
-        //                  These staging buffers would be provided by the belt.
-        // To make the data upload simpler (and have it be done in one go), we always update full rows of each of our textures
+        // To make the data upload simpler (and have it be done in copy-operation), we always update full rows of each of our textures
         let num_points_written =
             wgpu::util::align_to(vertices.len() as u32, DATA_TEXTURE_SIZE) as usize;
-        let num_points_zeroed = num_points_written - vertices.len();
-        let position_and_size_staging = {
-            crate::profile_scope!("collect_pos_size");
-            vertices
-                .iter()
-                .map(|point| gpu_data::PositionData {
-                    pos: point.position,
-                    radius: point.radius,
-                })
-                .chain(std::iter::repeat(gpu_data::PositionData::zeroed()).take(num_points_zeroed))
-                .collect_vec()
-        };
-
-        // Upload data from staging buffers to gpu.
-        let size = wgpu::Extent3d {
-            width: DATA_TEXTURE_SIZE,
-            height: num_points_written as u32 / DATA_TEXTURE_SIZE,
-            depth_or_array_layers: 1,
-        };
+        let num_elements_padding = num_points_written - vertices.len();
+        let texture_copy_extent = glam::uvec2(
+            DATA_TEXTURE_SIZE,
+            num_points_written as u32 / DATA_TEXTURE_SIZE,
+        );
 
         {
             crate::profile_scope!("write_pos_size_texture");
-            ctx.queue.write_texture(
+
+            let mut staging_buffer = ctx.cpu_write_gpu_read_belt.lock().allocate(
+                &ctx.device,
+                &ctx.gpu_resources.buffers,
+                num_points_written,
+            );
+            staging_buffer.extend(vertices.iter().map(|point| gpu_data::PositionData {
+                pos: point.position,
+                radius: point.radius,
+            }));
+            staging_buffer.extend(
+                std::iter::repeat(gpu_data::PositionData::zeroed()).take(num_elements_padding),
+            );
+            staging_buffer.copy_to_texture2d(
+                ctx.active_frame.before_view_builder_encoder.lock().get(),
                 wgpu::ImageCopyTexture {
                     texture: &position_data_texture.texture,
                     mip_level: 0,
                     origin: wgpu::Origin3d::ZERO,
                     aspect: wgpu::TextureAspect::All,
                 },
-                bytemuck::cast_slice(&position_and_size_staging),
-                wgpu::ImageDataLayout {
-                    offset: 0,
-                    bytes_per_row: NonZeroU32::new(
-                        DATA_TEXTURE_SIZE * std::mem::size_of::<gpu_data::PositionData>() as u32,
-                    ),
-                    rows_per_image: None,
-                },
-                size,
+                texture_copy_extent,
             );
         }
 
-        builder.color_buffer.copy_to_texture(
+        builder
+            .color_buffer
+            .extend(std::iter::repeat(ecolor::Color32::TRANSPARENT).take(num_elements_padding));
+        builder.color_buffer.copy_to_texture2d(
             ctx.active_frame.before_view_builder_encoder.lock().get(),
             wgpu::ImageCopyTexture {
                 texture: &color_texture.texture,
                 mip_level: 0,
                 origin: wgpu::Origin3d::ZERO,
                 aspect: wgpu::TextureAspect::All,
             },
-            NonZeroU32::new(DATA_TEXTURE_SIZE * std::mem::size_of::<[u8; 4]>() as u32),
-            None,
-            size,
+            texture_copy_extent,
         );
 
         let draw_data_uniform_buffer_bindings = create_and_fill_uniform_buffer_batch(