diff --git a/Cargo.lock b/Cargo.lock index 7ce0582da054..f1800979efac 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -4126,6 +4126,7 @@ name = "re_log_types" version = "0.8.0-alpha.0" dependencies = [ "ahash 0.8.3", + "anyhow", "arrow2", "arrow2_convert", "crossbeam", diff --git a/Cargo.toml b/Cargo.toml index 448375a5398a..17d93fcf4703 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -6,7 +6,7 @@ members = [ "docs/code-examples", "rerun_py", "run_wasm", - "tests/rust/*", + "tests/rust/test_*", ] [workspace.package] diff --git a/crates/re_log_types/Cargo.toml b/crates/re_log_types/Cargo.toml index 402788957dd9..1ac2e3c927ff 100644 --- a/crates/re_log_types/Cargo.toml +++ b/crates/re_log_types/Cargo.toml @@ -40,6 +40,7 @@ re_tuid = { workspace = true, features = ["arrow2_convert"] } # External ahash.workspace = true +anyhow.workspace = true arrow2 = { workspace = true, features = [ "io_ipc", "io_print", @@ -53,6 +54,7 @@ web-time.workspace = true nohash-hasher = "0.2" num-derive = "0.3" num-traits = "0.2" +similar-asserts = "1.4.2" smallvec.workspace = true thiserror.workspace = true time = { workspace = true, features = ["formatting", "macros"] } diff --git a/crates/re_log_types/src/data_table.rs b/crates/re_log_types/src/data_table.rs index 4267dadc34c9..38bf498b84ad 100644 --- a/crates/re_log_types/src/data_table.rs +++ b/crates/re_log_types/src/data_table.rs @@ -1057,3 +1057,102 @@ impl std::fmt::Display for DataTable { .fmt(f) } } + +impl DataTable { + /// Checks whether two [`DataTable`]s are _similar_, i.e. not equal on a byte-level but + /// functionally equivalent. + /// + /// Returns `Ok(())` if they match, or an error containing a detailed diff otherwise. + pub fn similar(table1: &DataTable, table2: &DataTable) -> anyhow::Result<()> { + /// Given a [`DataTable`], returns all of its rows sorted by timeline. + fn compute_rows(table: &DataTable) -> HashMap> { + let mut rows_by_timeline: HashMap> = Default::default(); + + let rows = table.to_rows().flat_map(|row| { + row.timepoint + .iter() + .map(|(timeline, time)| { + let mut row = row.clone(); + row.timepoint = TimePoint::from([(*timeline, *time)]); + (*timeline, row) + }) + .collect_vec() + }); + + for (timeline, row) in rows { + rows_by_timeline.entry(timeline).or_default().push(row); + } + + rows_by_timeline + } + + let mut rows_by_timeline1 = compute_rows(table1); + let mut rows_by_timeline2 = compute_rows(table2); + + for timeline1 in rows_by_timeline1.keys() { + anyhow::ensure!( + rows_by_timeline2.contains_key(timeline1), + "timeline {timeline1:?} was present in the first rrd file but not in the second", + ); + } + for timeline2 in rows_by_timeline2.keys() { + anyhow::ensure!( + rows_by_timeline1.contains_key(timeline2), + "timeline {timeline2:?} was present in the second rrd file but not in the first", + ); + } + + // NOTE: Can't compare `log_time`, by definition. + rows_by_timeline1.remove(&Timeline::log_time()); + rows_by_timeline2.remove(&Timeline::log_time()); + + for (timeline, rows1) in &mut rows_by_timeline1 { + let rows2 = rows_by_timeline2.get_mut(timeline).unwrap(); // safe + + // NOTE: We need both sets of rows to follow a common natural order for the comparison + // to make sense. + rows1.sort_by_key(|row| (row.timepoint.clone(), row.row_id)); + rows2.sort_by_key(|row| (row.timepoint.clone(), row.row_id)); + + anyhow::ensure!( + rows1.len() == rows2.len(), + "rrd files yielded different number of datastore rows for timeline {timeline:?}: {} vs. {}", + rows1.len(), + rows2.len() + ); + + for (ri, (row1, row2)) in rows1.iter().zip(rows2).enumerate() { + let DataRow { + row_id: _, + timepoint: timepoint1, + entity_path: entity_path1, + num_instances: num_instances1, + cells: cells1, + } = row1; + let DataRow { + row_id: _, + timepoint: timepoint2, + entity_path: entity_path2, + num_instances: num_instances2, + cells: cells2, + } = row2; + + anyhow::ensure!( + timepoint1 == timepoint2 + && entity_path1 == entity_path2 + && num_instances1 == num_instances2 + && cells1 == cells2, + "Found discrepancy in row #{ri}:\n{}", + similar_asserts::SimpleDiff::from_str( + &row1.to_string(), + &row2.to_string(), + "row1", + "row2" + ) + ); + } + } + + Ok(()) + } +} diff --git a/crates/rerun/src/run.rs b/crates/rerun/src/run.rs index 9a68485e9995..c2e673a1ce8a 100644 --- a/crates/rerun/src/run.rs +++ b/crates/rerun/src/run.rs @@ -1,3 +1,5 @@ +use std::path::{Path, PathBuf}; + use itertools::Itertools; use re_log_types::{LogMsg, PythonVersion}; use re_smart_channel::{Receiver, SmartMessagePayload}; @@ -149,6 +151,19 @@ enum Commands { #[cfg(all(feature = "analytics"))] #[command(subcommand)] Analytics(AnalyticsCommands), + + /// Compares the data between 2 .rrd files, returning a successful shell exit code if they + /// match. + /// + /// This ignores the `log_time` timeline. + Compare { + path_to_rrd1: String, + path_to_rrd2: String, + + /// If specified, dumps both .rrd files as tables. + #[clap(long, default_value_t = false)] + full_dump: bool, + }, } #[derive(Debug, Clone, Subcommand)] @@ -254,8 +269,16 @@ where match commands { #[cfg(all(feature = "analytics"))] Commands::Analytics(analytics) => run_analytics(analytics).map_err(Into::into), - #[cfg(not(all(feature = "analytics")))] - _ => Ok(()), + + Commands::Compare { + path_to_rrd1, + path_to_rrd2, + full_dump, + } => { + let path_to_rrd1 = PathBuf::from(path_to_rrd1); + let path_to_rrd2 = PathBuf::from(path_to_rrd2); + run_compare(&path_to_rrd1, &path_to_rrd2, *full_dump) + } } } else { run_impl(build_info, call_source, args).await @@ -280,6 +303,75 @@ where } } +/// Checks whether two .rrd files are _similar_, i.e. not equal on a byte-level but +/// functionally equivalent. +/// +/// Returns `Ok(())` if they match, or an error containing a detailed diff otherwise. +fn run_compare(path_to_rrd1: &Path, path_to_rrd2: &Path, full_dump: bool) -> anyhow::Result<()> { + /// Given a path to an rrd file, builds up a `DataStore` and returns its contents as one big + /// `DataTable`. + /// + /// Fails if there are more than one data recordings present in the rrd file. + fn compute_uber_table(path_to_rrd: &Path) -> anyhow::Result { + use re_data_store::StoreDb; + use re_log_types::StoreId; + + let rrd_file = std::fs::File::open(path_to_rrd) + .with_context(|| format!("couldn't open rrd file contents at {path_to_rrd:?}"))?; + + let mut stores: std::collections::HashMap = Default::default(); + let decoder = re_log_encoding::decoder::Decoder::new(rrd_file)?; + for msg in decoder { + let msg = msg + .with_context(|| format!("couldn't decode rrd file contents at {path_to_rrd:?}"))?; + stores + .entry(msg.store_id().clone()) + .or_insert(re_data_store::StoreDb::new(msg.store_id().clone())) + .add(&msg) + .with_context(|| format!("couldn't decode rrd file contents at {path_to_rrd:?}"))?; + } + + let mut stores = stores + .values() + .filter(|store| store.store_kind() == re_log_types::StoreKind::Recording) + .collect_vec(); + + anyhow::ensure!( + !stores.is_empty(), + "no data recording found in rrd file at {path_to_rrd:?}" + ); + anyhow::ensure!( + stores.len() == 1, + "more than one data recording found in rrd file at {path_to_rrd:?}" + ); + + let store = stores.pop().unwrap(); // safe, ensured above + + let table = re_log_types::DataTable::from_rows(re_log_types::TableId::random(), { + let mut rows = store + .store() + .to_data_tables(None) + .flat_map(|t| t.to_rows().collect_vec()) + .collect_vec(); + // NOTE: So the full dump makes sense, if enabled. + rows.sort_by_key(|row| (row.timepoint.clone(), row.row_id)); + rows + }); + + Ok::<_, anyhow::Error>(table) + } + + let table1 = compute_uber_table(path_to_rrd1)?; + let table2 = compute_uber_table(path_to_rrd2)?; + + if full_dump { + println!("{table1}"); + println!("{table2}"); + } + + re_log_types::DataTable::similar(&table1, &table2) +} + #[cfg(all(feature = "analytics"))] fn run_analytics(cmd: &AnalyticsCommands) -> Result<(), re_analytics::cli::CliError> { match cmd {