Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

More robust handling of maximum texture size for non-color data, slight perf improvements for large point clouds #5229

Merged
merged 7 commits into from
Feb 20, 2024
6 changes: 6 additions & 0 deletions crates/re_renderer/src/allocator/cpu_write_gpu_read_belt.rs
Original file line number Diff line number Diff line change
Expand Up @@ -212,6 +212,12 @@ where
Ok(())
}

/// True if no elements have been pushed into the buffer so far.
#[inline]
pub fn is_empty(&self) -> bool {
self.unwritten_element_range.start == 0
}

/// The number of elements pushed into the buffer so far.
#[inline]
pub fn num_written(&self) -> usize {
Expand Down
274 changes: 206 additions & 68 deletions crates/re_renderer/src/allocator/data_texture_source.rs

Large diffs are not rendered by default.

5 changes: 1 addition & 4 deletions crates/re_renderer/src/allocator/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -11,10 +11,7 @@ mod uniform_buffer_fill;
pub use cpu_write_gpu_read_belt::{
CpuWriteGpuReadBelt, CpuWriteGpuReadBuffer, CpuWriteGpuReadError,
};
pub use data_texture_source::{
data_texture_desc, data_texture_size, data_texture_source_buffer_element_count,
DataTextureSource,
};
pub use data_texture_source::{data_texture_desc, DataTextureSource};
pub use gpu_readback_belt::{
GpuReadbackBelt, GpuReadbackBuffer, GpuReadbackError, GpuReadbackIdentifier,
};
Expand Down
12 changes: 8 additions & 4 deletions crates/re_renderer/src/line_drawable_builder.rs
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@ use crate::{
/// TODO(andreas): We could make significant optimizations here by making this builder capable
/// of writing to a GPU readable memory location for all its data.
pub struct LineDrawableBuilder<'ctx> {
ctx: &'ctx crate::context::RenderContext,
ctx: &'ctx RenderContext,

pub(crate) vertices: Vec<LineVertex>,
pub(crate) batches: Vec<LineBatchInfo>,
Expand All @@ -41,15 +41,19 @@ impl<'ctx> LineDrawableBuilder<'ctx> {
}
}

pub fn reserve_strips(&mut self, num_strips: usize) -> Result<(), CpuWriteGpuReadError> {
/// Returns number of strips that can be added without reallocation.
/// This may be smaller than the requested number if the maximum number of strips is reached.
pub fn reserve_strips(&mut self, num_strips: usize) -> Result<usize, CpuWriteGpuReadError> {
self.strips.reserve(num_strips);
self.picking_instance_ids_buffer.reserve(num_strips)
}

/// Returns number of vertices that can be added without reallocation.
/// This may be smaller than the requested number if the maximum number of vertices is reached.
#[allow(clippy::unnecessary_wraps)] // TODO(andreas): will be needed once vertices writes directly to GPU readable memory.
pub fn reserve_vertices(&mut self, num_vertices: usize) -> Result<(), CpuWriteGpuReadError> {
pub fn reserve_vertices(&mut self, num_vertices: usize) -> Result<usize, CpuWriteGpuReadError> {
self.vertices.reserve(num_vertices);
Ok(())
Ok(self.vertices.capacity() - self.vertices.len())
}

/// Boosts the size of the points by the given amount of ui-points for the purpose of drawing outlines.
Expand Down
180 changes: 78 additions & 102 deletions crates/re_renderer/src/point_cloud_builder.rs
Original file line number Diff line number Diff line change
@@ -1,87 +1,71 @@
use itertools::izip;
use itertools::{izip, Itertools};

use re_log::ResultExt;

use crate::{
allocator::{data_texture_source_buffer_element_count, CpuWriteGpuReadBuffer},
allocator::DataTextureSource,
draw_phases::PickingLayerObjectId,
renderer::{
PointCloudBatchFlags, PointCloudBatchInfo, PointCloudDrawData, PointCloudDrawDataError,
PositionRadius,
},
Color32, DebugLabel, DepthOffset, OutlineMaskPreference, PickingLayerInstanceId, RenderContext,
Size,
Color32, CpuWriteGpuReadError, DebugLabel, DepthOffset, OutlineMaskPreference,
PickingLayerInstanceId, RenderContext, Size,
};

/// Builder for point clouds, making it easy to create [`crate::renderer::PointCloudDrawData`].
pub struct PointCloudBuilder {
pub struct PointCloudBuilder<'ctx> {
pub(crate) ctx: &'ctx RenderContext,

// Size of `point`/color` must be equal.
pub vertices: Vec<PositionRadius>,
pub(crate) position_radius_buffer: DataTextureSource<'ctx, PositionRadius>,

pub(crate) color_buffer: CpuWriteGpuReadBuffer<Color32>,
pub(crate) picking_instance_ids_buffer: CpuWriteGpuReadBuffer<PickingLayerInstanceId>,
pub(crate) color_buffer: DataTextureSource<'ctx, Color32>,
pub(crate) picking_instance_ids_buffer: DataTextureSource<'ctx, PickingLayerInstanceId>,

pub(crate) batches: Vec<PointCloudBatchInfo>,

pub(crate) radius_boost_in_ui_points_for_outlines: f32,

max_num_points: usize,
}

impl PointCloudBuilder {
pub fn new(ctx: &RenderContext, max_num_points: usize) -> Self {
let max_texture_dimension_2d = ctx.device.limits().max_texture_dimension_2d;

let color_buffer = ctx
.cpu_write_gpu_read_belt
.lock()
.allocate::<Color32>(
&ctx.device,
&ctx.gpu_resources.buffers,
data_texture_source_buffer_element_count(
PointCloudDrawData::COLOR_TEXTURE_FORMAT,
max_num_points,
max_texture_dimension_2d,
),
)
.expect("Failed to allocate color buffer"); // TODO(#3408): Should never happen but should propagate error anyways

let picking_instance_ids_buffer = ctx
.cpu_write_gpu_read_belt
.lock()
.allocate::<PickingLayerInstanceId>(
&ctx.device,
&ctx.gpu_resources.buffers,
data_texture_source_buffer_element_count(
PointCloudDrawData::PICKING_INSTANCE_ID_TEXTURE_FORMAT,
max_num_points,
max_texture_dimension_2d,
),
)
.expect("Failed to allocate picking layer buffer"); // TODO(#3408): Should never happen but should propagate error anyways

impl<'ctx> PointCloudBuilder<'ctx> {
pub fn new(ctx: &'ctx RenderContext) -> Self {
Self {
vertices: Vec::with_capacity(max_num_points),
color_buffer,
picking_instance_ids_buffer,
ctx,
position_radius_buffer: DataTextureSource::new(ctx),
color_buffer: DataTextureSource::new(ctx),
picking_instance_ids_buffer: DataTextureSource::new(ctx),
batches: Vec::with_capacity(16),
radius_boost_in_ui_points_for_outlines: 0.0,
max_num_points,
}
}

/// Returns number of points that can be added without reallocation.
/// This may be smaller than the requested number if the maximum number of strips is reached.
pub fn reserve(
&mut self,
expected_number_of_additional_points: usize,
) -> Result<usize, CpuWriteGpuReadError> {
// We know that the maximum number is independent of datatype, so we can use the same value for all.
self.position_radius_buffer
.reserve(expected_number_of_additional_points)?;
self.color_buffer
.reserve(expected_number_of_additional_points)?;
self.picking_instance_ids_buffer
.reserve(expected_number_of_additional_points)
}

/// Boosts the size of the points by the given amount of ui-points for the purpose of drawing outlines.
pub fn radius_boost_in_ui_points_for_outlines(
mut self,
&mut self,
radius_boost_in_ui_points_for_outlines: f32,
) -> Self {
) {
self.radius_boost_in_ui_points_for_outlines = radius_boost_in_ui_points_for_outlines;
self
}

/// Start of a new batch.
#[inline]
pub fn batch(&mut self, label: impl Into<DebugLabel>) -> PointCloudBatchBuilder<'_> {
pub fn batch(&mut self, label: impl Into<DebugLabel>) -> PointCloudBatchBuilder<'_, 'ctx> {
self.batches.push(PointCloudBatchInfo {
label: label.into(),
world_from_obj: glam::Affine3A::IDENTITY,
Expand All @@ -96,36 +80,15 @@ impl PointCloudBuilder {
PointCloudBatchBuilder(self)
}

// Iterate over all batches, yielding the batch info and a point vertex iterator.
pub fn iter_vertices_by_batch(
&self,
) -> impl Iterator<Item = (&PointCloudBatchInfo, impl Iterator<Item = &PositionRadius>)> {
let mut vertex_offset = 0;
self.batches.iter().map(move |batch| {
let out = (
batch,
self.vertices
.iter()
.skip(vertex_offset)
.take(batch.point_count as usize),
);
vertex_offset += batch.point_count as usize;
out
})
}

/// Finalizes the builder and returns a point cloud draw data with all the points added so far.
pub fn into_draw_data(
self,
ctx: &crate::context::RenderContext,
) -> Result<PointCloudDrawData, PointCloudDrawDataError> {
PointCloudDrawData::new(ctx, self)
pub fn into_draw_data(self) -> Result<PointCloudDrawData, PointCloudDrawDataError> {
PointCloudDrawData::new(self)
}
}

pub struct PointCloudBatchBuilder<'a>(&'a mut PointCloudBuilder);
pub struct PointCloudBatchBuilder<'a, 'ctx>(&'a mut PointCloudBuilder<'ctx>);

impl<'a> Drop for PointCloudBatchBuilder<'a> {
impl<'a, 'ctx> Drop for PointCloudBatchBuilder<'a, 'ctx> {
fn drop(&mut self) {
// Remove batch again if it wasn't actually used.
if self.0.batches.last().unwrap().point_count == 0 {
Expand All @@ -134,7 +97,7 @@ impl<'a> Drop for PointCloudBatchBuilder<'a> {
}
}

impl<'a> PointCloudBatchBuilder<'a> {
impl<'a, 'ctx> PointCloudBatchBuilder<'a, 'ctx> {
#[inline]
fn batch_mut(&mut self) -> &mut PointCloudBatchInfo {
self.0
Expand Down Expand Up @@ -179,28 +142,38 @@ impl<'a> PointCloudBatchBuilder<'a> {
colors: &[Color32],
picking_ids: &[PickingLayerInstanceId],
) -> Self {
// TODO(jleibs): Figure out if we can plumb-through proper support for `Iterator::size_hints()`
// or potentially make `FixedSizedIterator` work correctly. This should be possible size the
// underlying arrow structures are of known-size, but carries some complexity with the amount of
// chaining, joining, filtering, etc. that happens along the way.
re_tracing::profile_function!();

let mut num_points = positions.len();

debug_assert_eq!(self.0.vertices.len(), self.0.color_buffer.num_written());
debug_assert_eq!(
self.0.vertices.len(),
self.0.picking_instance_ids_buffer.num_written()
self.0.position_radius_buffer.len(),
self.0.color_buffer.len()
);
debug_assert_eq!(
self.0.position_radius_buffer.len(),
self.0.picking_instance_ids_buffer.len()
);

// Do a reserve ahead of time, to check whether we're hitting the data texture limit.
// The limit is the same for all data textures, so we only need to check one.
let Some(num_available_points) = self
.0
.position_radius_buffer
.reserve(positions.len())
.ok_or_log_error()
else {
return self;
};

if num_points + self.0.vertices.len() > self.0.max_num_points {
let num_points = if positions.len() > num_available_points {
re_log::error_once!(
"Reserved space for {} points, but reached {}. Clamping to previously set maximum",
self.0.max_num_points,
num_points + self.0.vertices.len()
"Reached maximum number of points for point cloud of {}. Ignoring all excess points.",
self.0.position_radius_buffer.len() + num_available_points
);
num_points = self.0.max_num_points - self.0.vertices.len();
}
num_available_points
} else {
positions.len()
};

if num_points == 0 {
return self;
}
Expand All @@ -215,13 +188,20 @@ impl<'a> PointCloudBatchBuilder<'a> {

{
re_tracing::profile_scope!("positions & radii");
self.0.vertices.extend(
izip!(
positions.iter().copied(),
radii.iter().copied().chain(std::iter::repeat(Size::AUTO))
)
.map(|(pos, radius)| PositionRadius { pos, radius }),
);

// TODO(andreas): It would be nice to pass on the iterator as is so we don't have to do yet another
// copy of the data and instead write into the buffers directly - if done right this should be the fastest.
// But it's surprisingly tricky to do this effectively.
let vertices = izip!(
positions.iter().copied(),
radii.iter().copied().chain(std::iter::repeat(Size::AUTO))
)
.map(|(pos, radius)| PositionRadius { pos, radius })
.collect_vec();
self.0
.position_radius_buffer
.extend_from_slice(&vertices)
.ok_or_log_error();
}
{
re_tracing::profile_scope!("colors");
Expand All @@ -230,8 +210,6 @@ impl<'a> PointCloudBatchBuilder<'a> {
.color_buffer
.extend_from_slice(colors)
.ok_or_log_error();

// Fill up with defaults. Doing this in a separate step is faster than chaining the iterator.
self.0
.color_buffer
.add_n(Color32::WHITE, num_points.saturating_sub(colors.len()))
Expand All @@ -244,8 +222,6 @@ impl<'a> PointCloudBatchBuilder<'a> {
.picking_instance_ids_buffer
.extend_from_slice(picking_ids)
.ok_or_log_error();

// Fill up with defaults. Doing this in a separate step is faster than chaining the iterator.
self.0
.picking_instance_ids_buffer
.add_n(
Expand Down
Loading
Loading