Skip to content

Commit

Permalink
Introduce rerun compare to check whether 2 rrd files are functional…
Browse files Browse the repository at this point in the history
…ly equivalent (#2597)

1. Run some example and save the result to an .rrd file:
```
$ ./examples/python/clock/main.py --steps 50 --save examples/python/clock/out1.rrd
```

2. Do it again:
```
$ ./examples/python/clock/main.py --steps 50 --save examples/python/clock/out2.rrd
```

3. Compare these two rrd files:
```
$ cargo r -p rerun-cli --quiet -- compare examples/python/clock/out1.rrd examples/python/clock/out2.rrd

$ echo $?
0
```

3. Modify the example slightly, and save it to a third rrd file:
```
$ vim ./examples/python/clock/main.py

$ git diff
-        color_m = (int(255 - (scaled_m * 255)), int(scaled_m * 255), 128, 128)
+        color_m = (int(255 - (scaled_m * 255)), int(scaled_m * 250), 128, 128)

$ ./examples/python/clock/main.py --steps 50 --save examples/python/clock/out1.rrd
```

4. Compare the two .rrd files
```
$ cargo r -p rerun-cli --quiet -- compare examples/python/clock/out1.rrd examples/python/clock/out3.rrd
```

![image](https://github.com/rerun-io/rerun/assets/2910679/70be3ec3-cf4f-4584-affc-46b72154fc9b)
```
$ echo $?
1
```

---

### Checklist
* [x] I have read and agree to [Contributor
Guide](https://github.com/rerun-io/rerun/blob/main/CONTRIBUTING.md) and
the [Code of
Conduct](https://github.com/rerun-io/rerun/blob/main/CODE_OF_CONDUCT.md)
* [x] I've included a screenshot or gif (if applicable)
* [x] I have tested [demo.rerun.io](https://demo.rerun.io/pr/2597) (if
applicable)

- [PR Build Summary](https://build.rerun.io/pr/2597)
- [Docs preview](https://rerun.io/preview/pr%3Acmc%2Fcompare_rrd/docs)
- [Examples
preview](https://rerun.io/preview/pr%3Acmc%2Fcompare_rrd/examples)
  • Loading branch information
teh-cmc authored Jul 5, 2023
1 parent 2569eb7 commit caba397
Show file tree
Hide file tree
Showing 5 changed files with 197 additions and 3 deletions.
1 change: 1 addition & 0 deletions Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

2 changes: 1 addition & 1 deletion Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@ members = [
"docs/code-examples",
"rerun_py",
"run_wasm",
"tests/rust/*",
"tests/rust/test_*",
]

[workspace.package]
Expand Down
2 changes: 2 additions & 0 deletions crates/re_log_types/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -40,6 +40,7 @@ re_tuid = { workspace = true, features = ["arrow2_convert"] }

# External
ahash.workspace = true
anyhow.workspace = true
arrow2 = { workspace = true, features = [
"io_ipc",
"io_print",
Expand All @@ -53,6 +54,7 @@ web-time.workspace = true
nohash-hasher = "0.2"
num-derive = "0.3"
num-traits = "0.2"
similar-asserts = "1.4.2"
smallvec.workspace = true
thiserror.workspace = true
time = { workspace = true, features = ["formatting", "macros"] }
Expand Down
99 changes: 99 additions & 0 deletions crates/re_log_types/src/data_table.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1057,3 +1057,102 @@ impl std::fmt::Display for DataTable {
.fmt(f)
}
}

impl DataTable {
/// Checks whether two [`DataTable`]s are _similar_, i.e. not equal on a byte-level but
/// functionally equivalent.
///
/// Returns `Ok(())` if they match, or an error containing a detailed diff otherwise.
pub fn similar(table1: &DataTable, table2: &DataTable) -> anyhow::Result<()> {
/// Given a [`DataTable`], returns all of its rows sorted by timeline.
fn compute_rows(table: &DataTable) -> HashMap<Timeline, Vec<DataRow>> {
let mut rows_by_timeline: HashMap<Timeline, Vec<DataRow>> = Default::default();

let rows = table.to_rows().flat_map(|row| {
row.timepoint
.iter()
.map(|(timeline, time)| {
let mut row = row.clone();
row.timepoint = TimePoint::from([(*timeline, *time)]);
(*timeline, row)
})
.collect_vec()
});

for (timeline, row) in rows {
rows_by_timeline.entry(timeline).or_default().push(row);
}

rows_by_timeline
}

let mut rows_by_timeline1 = compute_rows(table1);
let mut rows_by_timeline2 = compute_rows(table2);

for timeline1 in rows_by_timeline1.keys() {
anyhow::ensure!(
rows_by_timeline2.contains_key(timeline1),
"timeline {timeline1:?} was present in the first rrd file but not in the second",
);
}
for timeline2 in rows_by_timeline2.keys() {
anyhow::ensure!(
rows_by_timeline1.contains_key(timeline2),
"timeline {timeline2:?} was present in the second rrd file but not in the first",
);
}

// NOTE: Can't compare `log_time`, by definition.
rows_by_timeline1.remove(&Timeline::log_time());
rows_by_timeline2.remove(&Timeline::log_time());

for (timeline, rows1) in &mut rows_by_timeline1 {
let rows2 = rows_by_timeline2.get_mut(timeline).unwrap(); // safe

// NOTE: We need both sets of rows to follow a common natural order for the comparison
// to make sense.
rows1.sort_by_key(|row| (row.timepoint.clone(), row.row_id));
rows2.sort_by_key(|row| (row.timepoint.clone(), row.row_id));

anyhow::ensure!(
rows1.len() == rows2.len(),
"rrd files yielded different number of datastore rows for timeline {timeline:?}: {} vs. {}",
rows1.len(),
rows2.len()
);

for (ri, (row1, row2)) in rows1.iter().zip(rows2).enumerate() {
let DataRow {
row_id: _,
timepoint: timepoint1,
entity_path: entity_path1,
num_instances: num_instances1,
cells: cells1,
} = row1;
let DataRow {
row_id: _,
timepoint: timepoint2,
entity_path: entity_path2,
num_instances: num_instances2,
cells: cells2,
} = row2;

anyhow::ensure!(
timepoint1 == timepoint2
&& entity_path1 == entity_path2
&& num_instances1 == num_instances2
&& cells1 == cells2,
"Found discrepancy in row #{ri}:\n{}",
similar_asserts::SimpleDiff::from_str(
&row1.to_string(),
&row2.to_string(),
"row1",
"row2"
)
);
}
}

Ok(())
}
}
96 changes: 94 additions & 2 deletions crates/rerun/src/run.rs
Original file line number Diff line number Diff line change
@@ -1,3 +1,5 @@
use std::path::{Path, PathBuf};

use itertools::Itertools;
use re_log_types::{LogMsg, PythonVersion};
use re_smart_channel::{Receiver, SmartMessagePayload};
Expand Down Expand Up @@ -149,6 +151,19 @@ enum Commands {
#[cfg(all(feature = "analytics"))]
#[command(subcommand)]
Analytics(AnalyticsCommands),

/// Compares the data between 2 .rrd files, returning a successful shell exit code if they
/// match.
///
/// This ignores the `log_time` timeline.
Compare {
path_to_rrd1: String,
path_to_rrd2: String,

/// If specified, dumps both .rrd files as tables.
#[clap(long, default_value_t = false)]
full_dump: bool,
},
}

#[derive(Debug, Clone, Subcommand)]
Expand Down Expand Up @@ -254,8 +269,16 @@ where
match commands {
#[cfg(all(feature = "analytics"))]
Commands::Analytics(analytics) => run_analytics(analytics).map_err(Into::into),
#[cfg(not(all(feature = "analytics")))]
_ => Ok(()),

Commands::Compare {
path_to_rrd1,
path_to_rrd2,
full_dump,
} => {
let path_to_rrd1 = PathBuf::from(path_to_rrd1);
let path_to_rrd2 = PathBuf::from(path_to_rrd2);
run_compare(&path_to_rrd1, &path_to_rrd2, *full_dump)
}
}
} else {
run_impl(build_info, call_source, args).await
Expand All @@ -280,6 +303,75 @@ where
}
}

/// Checks whether two .rrd files are _similar_, i.e. not equal on a byte-level but
/// functionally equivalent.
///
/// Returns `Ok(())` if they match, or an error containing a detailed diff otherwise.
fn run_compare(path_to_rrd1: &Path, path_to_rrd2: &Path, full_dump: bool) -> anyhow::Result<()> {
/// Given a path to an rrd file, builds up a `DataStore` and returns its contents as one big
/// `DataTable`.
///
/// Fails if there are more than one data recordings present in the rrd file.
fn compute_uber_table(path_to_rrd: &Path) -> anyhow::Result<re_log_types::DataTable> {
use re_data_store::StoreDb;
use re_log_types::StoreId;

let rrd_file = std::fs::File::open(path_to_rrd)
.with_context(|| format!("couldn't open rrd file contents at {path_to_rrd:?}"))?;

let mut stores: std::collections::HashMap<StoreId, StoreDb> = Default::default();
let decoder = re_log_encoding::decoder::Decoder::new(rrd_file)?;
for msg in decoder {
let msg = msg
.with_context(|| format!("couldn't decode rrd file contents at {path_to_rrd:?}"))?;
stores
.entry(msg.store_id().clone())
.or_insert(re_data_store::StoreDb::new(msg.store_id().clone()))
.add(&msg)
.with_context(|| format!("couldn't decode rrd file contents at {path_to_rrd:?}"))?;
}

let mut stores = stores
.values()
.filter(|store| store.store_kind() == re_log_types::StoreKind::Recording)
.collect_vec();

anyhow::ensure!(
!stores.is_empty(),
"no data recording found in rrd file at {path_to_rrd:?}"
);
anyhow::ensure!(
stores.len() == 1,
"more than one data recording found in rrd file at {path_to_rrd:?}"
);

let store = stores.pop().unwrap(); // safe, ensured above

let table = re_log_types::DataTable::from_rows(re_log_types::TableId::random(), {
let mut rows = store
.store()
.to_data_tables(None)
.flat_map(|t| t.to_rows().collect_vec())
.collect_vec();
// NOTE: So the full dump makes sense, if enabled.
rows.sort_by_key(|row| (row.timepoint.clone(), row.row_id));
rows
});

Ok::<_, anyhow::Error>(table)
}

let table1 = compute_uber_table(path_to_rrd1)?;
let table2 = compute_uber_table(path_to_rrd2)?;

if full_dump {
println!("{table1}");
println!("{table2}");
}

re_log_types::DataTable::similar(&table1, &table2)
}

#[cfg(all(feature = "analytics"))]
fn run_analytics(cmd: &AnalyticsCommands) -> Result<(), re_analytics::cli::CliError> {
match cmd {
Expand Down

0 comments on commit caba397

Please sign in to comment.