Skip to content

Commit

Permalink
Optimized cpu time for 3D point clouds (once again!) (#5273)
Browse files Browse the repository at this point in the history
### What

Embarrassingly simple optimization that got missed when cachifying the
point cloud: Removed two needless `to_vec` copy calls which were done on
separate rayon jobs.

`pixi run rerun-release ../rainwater.rrd --threads=1`
**19.6ms -> 17.7ms** cpu time on top bar counter in rainwater scene
(4.6mio colored points with homogeneous radius).
Without --threads=1 perf was a bit too unstable to make good statements
(also perf trace can't be averaged nicely) and arguably much more
interesting anyways. There's not much multithreading going on in this
scene regardless.

Starts to get gpu bound on my mac which is ofc extremely view &
resolution dependent. Haven tried but I expect the gpu->cpu transfer
cost to still be much more significant on machines without unified
memory architecture (and obviously we want to fix that regardless).

Before:

![image](https://github.com/rerun-io/rerun/assets/1220815/ee6ef4c7-2745-48d4-b981-8396af9ff1f5)


After:

![image](https://github.com/rerun-io/rerun/assets/1220815/01d5f428-a176-4083-af90-7c3ca8414fd1)


As visible from the traces, we could certainly still do better fairly
easily by having a fast path on color and radius processing

### Checklist
* [x] I have read and agree to [Contributor
Guide](https://github.com/rerun-io/rerun/blob/main/CONTRIBUTING.md) and
the [Code of
Conduct](https://github.com/rerun-io/rerun/blob/main/CODE_OF_CONDUCT.md)
* [x] I've included a screenshot or gif (if applicable)
* [x] I have tested the web demo (if applicable):
* Using newly built examples:
[app.rerun.io](https://app.rerun.io/pr/5273/index.html)
* Using examples from latest `main` build:
[app.rerun.io](https://app.rerun.io/pr/5273/index.html?manifest_url=https://app.rerun.io/version/main/examples_manifest.json)
* Using full set of examples from `nightly` build:
[app.rerun.io](https://app.rerun.io/pr/5273/index.html?manifest_url=https://app.rerun.io/version/nightly/examples_manifest.json)
* [x] The PR title and labels are set such as to maximize their
usefulness for the next release's CHANGELOG
* [x] If applicable, add a new check to the [release
checklist](https://github.com/rerun-io/rerun/blob/main/tests/python/release_checklist)!

- [PR Build Summary](https://build.rerun.io/pr/5273)
- [Docs
preview](https://rerun.io/preview/187fbf20a2d443a0a572103a620d46aae32340bb/docs)
<!--DOCS-PREVIEW-->
- [Examples
preview](https://rerun.io/preview/187fbf20a2d443a0a572103a620d46aae32340bb/examples)
<!--EXAMPLES-PREVIEW-->
- [Recent benchmark results](https://build.rerun.io/graphs/crates.html)
- [Wasm size tracking](https://build.rerun.io/graphs/sizes.html)
  • Loading branch information
Wumpf authored Feb 24, 2024
1 parent 8a49ebb commit 289a812
Show file tree
Hide file tree
Showing 3 changed files with 14 additions and 81 deletions.
24 changes: 0 additions & 24 deletions crates/re_space_view_spatial/benches/bench_points.rs
Original file line number Diff line number Diff line change
Expand Up @@ -133,18 +133,6 @@ fn bench_points(c: &mut criterion::Criterion) {
});
}

{
let mut group = c.benchmark_group("Points3D");
group.throughput(criterion::Throughput::Elements(NUM_POINTS as _));
group.bench_function(bench_name(true, "load_positions"), |b| {
b.iter(|| {
let positions = LoadedPoints::load_positions(&data);
assert_eq!(positions.len(), NUM_POINTS);
positions
});
});
}

{
let points = LoadedPoints::load(&data, &ent_path, at, &annotations);

Expand Down Expand Up @@ -175,18 +163,6 @@ fn bench_points(c: &mut criterion::Criterion) {
});
});
}

{
let mut group = c.benchmark_group("Points3D");
group.throughput(criterion::Throughput::Elements(NUM_POINTS as _));
group.bench_function(bench_name(true, "load_picking_ids"), |b| {
b.iter(|| {
let picking_ids = LoadedPoints::load_picking_ids(&data);
assert_eq!(picking_ids.len(), NUM_POINTS);
picking_ids
});
});
}
},
)
.unwrap();
Expand Down
14 changes: 3 additions & 11 deletions crates/re_space_view_spatial/src/visualizers/points2d.rs
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
use re_entity_db::{EntityPath, InstancePathHash};
use re_renderer::{LineDrawableBuilder, PickingLayerInstanceId, PointCloudBuilder};
use re_renderer::{LineDrawableBuilder, PointCloudBuilder};
use re_types::{
archetypes::Points2D,
components::{ClassId, Color, InstanceKey, KeypointId, Position2D, Radius, Text},
Expand Down Expand Up @@ -95,7 +95,7 @@ impl Points2DVisualizer {
let positions = Self::load_positions(data);
let colors = Self::load_colors(data, ent_path, &annotation_infos);
let radii = Self::load_radii(data, ent_path);
let picking_instance_ids = Self::load_picking_ids(data);
let picking_instance_ids = bytemuck::cast_slice(data.instance_keys);

{
re_tracing::profile_scope!("to_gpu");
Expand All @@ -112,7 +112,7 @@ impl Points2DVisualizer {
.picking_object_id(re_renderer::PickingLayerObjectId(ent_path.hash64()));

let mut point_range_builder =
point_batch.add_points_2d(&positions, &radii, &colors, &picking_instance_ids);
point_batch.add_points_2d(&positions, &radii, &colors, picking_instance_ids);

// Determine if there's any sub-ranges that need extra highlighting.
{
Expand Down Expand Up @@ -200,14 +200,6 @@ impl Points2DVisualizer {
) -> Vec<re_renderer::Color32> {
crate::visualizers::process_color_slice(colors, ent_path, annotation_infos)
}

#[inline]
pub fn load_picking_ids(
&Points2DComponentData { instance_keys, .. }: &Points2DComponentData<'_>,
) -> Vec<PickingLayerInstanceId> {
re_tracing::profile_function!();
bytemuck::cast_slice(instance_keys).to_vec()
}
}

// ---
Expand Down
57 changes: 11 additions & 46 deletions crates/re_space_view_spatial/src/visualizers/points3d.rs
Original file line number Diff line number Diff line change
Expand Up @@ -103,7 +103,7 @@ impl Points3DVisualizer {
.picking_object_id(re_renderer::PickingLayerObjectId(ent_path.hash64()));

let mut point_range_builder =
point_batch.add_points(&positions, &radii, &colors, &picking_instance_ids);
point_batch.add_points(positions, &radii, &colors, picking_instance_ids);

// Determine if there's any sub-ranges that need extra highlighting.
{
Expand Down Expand Up @@ -151,7 +151,7 @@ impl Points3DVisualizer {
if let Some(labels) = data.labels {
self.data.ui_labels.extend(Self::process_labels(
labels,
&positions,
positions,
&instance_path_hashes_for_picking,
&colors,
&annotation_infos,
Expand Down Expand Up @@ -275,13 +275,13 @@ impl VisualizerSystem for Points3DVisualizer {
// ---

#[doc(hidden)] // Public for benchmarks
pub struct LoadedPoints {
pub struct LoadedPoints<'a> {
pub annotation_infos: ResolvedAnnotationInfos,
pub keypoints: Keypoints,
pub positions: Vec<glam::Vec3>,
pub positions: &'a [glam::Vec3],
pub radii: Vec<re_renderer::Size>,
pub colors: Vec<re_renderer::Color32>,
pub picking_instance_ids: Vec<PickingLayerInstanceId>,
pub picking_instance_ids: &'a [PickingLayerInstanceId],
}

#[doc(hidden)] // Public for benchmarks
Expand All @@ -295,10 +295,10 @@ pub struct Points3DComponentData<'a> {
pub class_ids: Option<&'a [Option<ClassId>]>,
}

impl LoadedPoints {
impl<'a> LoadedPoints<'a> {
#[inline]
pub fn load(
data: &Points3DComponentData<'_>,
data: &'a Points3DComponentData<'_>,
ent_path: &EntityPath,
latest_at: TimeInt,
annotations: &Annotations,
Expand All @@ -314,11 +314,12 @@ impl LoadedPoints {
annotations,
);

let (positions, radii, colors, picking_instance_ids) = join4(
|| Self::load_positions(data),
let positions = bytemuck::cast_slice(data.positions);
let picking_instance_ids = bytemuck::cast_slice(data.instance_keys);

let (radii, colors) = rayon::join(
|| Self::load_radii(data, ent_path),
|| Self::load_colors(data, ent_path, &annotation_infos),
|| Self::load_picking_ids(data),
);

Self {
Expand All @@ -331,14 +332,6 @@ impl LoadedPoints {
}
}

#[inline]
pub fn load_positions(
Points3DComponentData { positions, .. }: &Points3DComponentData<'_>,
) -> Vec<glam::Vec3> {
re_tracing::profile_function!();
bytemuck::cast_slice(positions).to_vec()
}

#[inline]
pub fn load_radii(
&Points3DComponentData {
Expand All @@ -357,32 +350,4 @@ impl LoadedPoints {
) -> Vec<re_renderer::Color32> {
crate::visualizers::process_color_slice(colors, ent_path, annotation_infos)
}

#[inline]
pub fn load_picking_ids(
&Points3DComponentData { instance_keys, .. }: &Points3DComponentData<'_>,
) -> Vec<PickingLayerInstanceId> {
re_tracing::profile_function!();
bytemuck::cast_slice(instance_keys).to_vec()
}
}

/// Run 4 things in parallel
fn join4<A: Send, B: Send, C: Send, D: Send>(
a: impl FnOnce() -> A + Send,
b: impl FnOnce() -> B + Send,
c: impl FnOnce() -> C + Send,
d: impl FnOnce() -> D + Send,
) -> (A, B, C, D) {
#[cfg(not(target_arch = "wasm32"))]
{
re_tracing::profile_function!();
let ((a, b), (c, d)) = rayon::join(|| rayon::join(a, b), || rayon::join(c, d));
(a, b, c, d)
}

#[cfg(target_arch = "wasm32")]
{
(a(), b(), c(), d())
}
}

0 comments on commit 289a812

Please sign in to comment.