Dataframe v2: examples (#7817)

Full standalone Rust & Python examples for people who like to learn by jumping straight into the examples/ folder. Simpler snippets and the associated reference page are coming in a future PR.
rerun-io · Oct 19, 2024 · 31d5943 · 31d5943
1 parent 6d86699
commit 31d5943
Show file tree

Hide file tree

Showing 10 changed files with 557 additions and 333 deletions.
diff --git a/Cargo.lock b/Cargo.lock
@@ -1711,6 +1711,15 @@ version = "2.5.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "7e962a19be5cfc3f3bf6dd8f61eb50107f356ad6270fbb3ed41476571db78be5"
 
+[[package]]
+name = "dataframe_query"
+version = "0.19.0-alpha.7"
+dependencies = [
+ "itertools 0.13.0",
+ "rerun",
+ "unindent",
+]
+
 [[package]]
 name = "deranged"
 version = "0.3.11"

diff --git a/examples/manifest.toml b/examples/manifest.toml
@@ -152,10 +152,13 @@ examples = [
 # or explicitly excluded by running `python scripts/check_example_manifest_coverage.py`.
 [ignored]
 examples = [
+  "_empty_rerun_sdk",
+  "all_examples",
   "custom_collection_adapter",
   "custom_data_loader",
   "custom_space_view",
   "custom_store_subscriber",
+  "dataframe_query",
   "drone_lidar",
   "extend_viewer_ui",
   "external_data_loader",
@@ -165,6 +168,4 @@ examples = [
   "spawn_viewer",
   "stdio",
   "template",
-  "all_examples",
-  "_empty_rerun_sdk",
 ]
diff --git a/examples/python/dataframe_query/README.md b/examples/python/dataframe_query/README.md
@@ -0,0 +1,16 @@
+This example will query for the first 10 rows of data in your recording of choice,
+and display the results as a table in your terminal.
+
+You can use one of your recordings, or grab one from our hosted examples, e.g.:
+```bash
+curl 'https://app.rerun.io/version/latest/examples/dna.rrd' -o - > /tmp/dna.rrd
+```
+
+The results can be filtered further by specifying an entity filter expression:
+```bash
+python dataframe_query.py my_recording.rrd /helix/structure/**\
+```
+
+```bash
+python dataframe_query.py <path_to_rrd> [entity_path_filter]
+```
diff --git a/examples/python/dataframe_query/dataframe_query.py b/examples/python/dataframe_query/dataframe_query.py
@@ -0,0 +1,47 @@
+#!/usr/bin/env python3
+"""Demonstrates basic usage of the dataframe APIs."""
+
+from __future__ import annotations
+
+import argparse
+
+import pyarrow as pa
+import rerun as rr
+
+DESCRIPTION = """
+Usage: python dataframe_query.py <path_to_rrd> [entity_path_filter]
+
+This example will query for the first 10 rows of data in your recording of choice,
+and display the results as a table in your terminal.
+
+You can use one of your recordings, or grab one from our hosted examples, e.g.:
+  curl 'https://app.rerun.io/version/latest/examples/dna.rrd' -o - > /tmp/dna.rrd
+
+The results can be filtered further by specifying an entity filter expression:
+  {bin_name} my_recording.rrd /helix/structure/**
+""".strip()
+
+
+def query(path_to_rrd: str, entity_path_filter: str) -> None:
+    recording = rr.dataframe.load_recording(path_to_rrd)
+    view = recording.view(index="log_time", contents=entity_path_filter)
+    batches = view.select()
+
+    table = pa.Table.from_batches(batches, batches.schema)
+    table = table.slice(0, 10)
+    print(table.to_pandas())
+
+
+def main() -> None:
+    parser = argparse.ArgumentParser(description=DESCRIPTION)
+    parser.add_argument("path_to_rrd", type=str, help="Path to the .rrd file")
+    parser.add_argument(
+        "entity_path_filter", type=str, nargs="?", default="/**", help="Optional entity path filter expression"
+    )
+    args = parser.parse_args()
+
+    query(args.path_to_rrd, args.entity_path_filter)
+
+
+if __name__ == "__main__":
+    main()
diff --git a/examples/python/dataframe_query/pyproject.toml b/examples/python/dataframe_query/pyproject.toml
@@ -0,0 +1,13 @@
+[project]
+name = "dataframe_query"
+version = "0.1.0"
+# requires-python = "<3.12"
+readme = "README.md"
+dependencies = ["rerun-sdk"]
+
+[project.scripts]
+dataframe_query = "dataframe_query:main"
+
+[build-system]
+requires = ["hatchling"]
+build-backend = "hatchling.build"
diff --git a/examples/rust/dataframe_query/Cargo.toml b/examples/rust/dataframe_query/Cargo.toml
@@ -0,0 +1,15 @@
+[package]
+name = "dataframe_query"
+version = "0.19.0-alpha.7"
+edition = "2021"
+rust-version = "1.79"
+license = "MIT OR Apache-2.0"
+publish = false
+
+[dependencies]
+rerun = { path = "../../../crates/top/rerun", default-features = false, features = [
+  "dataframe",
+] }
+
+itertools = "0.13"
+unindent = "0.2"
diff --git a/examples/rust/dataframe_query/README.md b/examples/rust/dataframe_query/README.md
@@ -0,0 +1,17 @@
+This example will query for the first 10 rows of data in your recording of choice,
+and display the results as a table in your terminal.
+
+```bash
+cargo run --release -- <path_to_rrd> [entity_path_filter]
+```
+
+You can use one of your recordings, or grab one from our hosted examples, e.g.:
+```bash
+curl 'https://app.rerun.io/version/latest/examples/dna.rrd' -o - > /tmp/dna.rrd
+```
+
+The results can be filtered further by specifying an entity filter expression:
+```bash
+cargo run --release -- my_recording.rrd /helix/structure/**\
+```
+
diff --git a/examples/rust/dataframe_query/src/main.rs b/examples/rust/dataframe_query/src/main.rs
@@ -0,0 +1,82 @@
+//! Demonstrates basic usage of the dataframe APIs.
+
+use itertools::Itertools;
+
+use rerun::{
+    dataframe::{
+        concatenate_record_batches, EntityPathFilter, QueryCache, QueryEngine, QueryExpression,
+        SparseFillStrategy, Timeline,
+    },
+    ChunkStore, ChunkStoreConfig, StoreKind, VersionPolicy,
+};
+
+fn main() -> Result<(), Box<dyn std::error::Error>> {
+    let args = std::env::args().collect_vec();
+
+    let get_arg = |i| {
+        let Some(value) = args.get(i) else {
+            let bin_name = args.first().map_or("$BIN", |s| s.as_str());
+            eprintln!(
+                "{}",
+                unindent::unindent(&format!(
+                    "\
+                    Usage: {bin_name} <path_to_rrd> [entity_path_filter]
+
+                    This example will query for the first 10 rows of data in your recording of choice,
+                    and display the results as a table in your terminal.
+
+                    You can use one of your recordings, or grab one from our hosted examples, e.g.:
+                    curl 'https://app.rerun.io/version/latest/examples/dna.rrd' -o - > /tmp/dna.rrd
+
+                    The results can be filtered further by specifying an entity filter expression:
+                    {bin_name} my_recording.rrd /helix/structure/**\
+                    ",
+                )),
+            );
+            std::process::exit(1);
+        };
+        value
+    };
+
+    let path_to_rrd = get_arg(1);
+    let entity_path_filter = EntityPathFilter::try_from(args.get(2).map_or("/**", |s| s.as_str()))?;
+    let timeline = Timeline::log_time();
+
+    let stores = ChunkStore::from_rrd_filepath(
+        &ChunkStoreConfig::DEFAULT,
+        path_to_rrd,
+        VersionPolicy::Warn,
+    )?;
+
+    for (store_id, store) in &stores {
+        if store_id.kind != StoreKind::Recording {
+            continue;
+        }
+
+        let query_cache = QueryCache::new(store);
+        let query_engine = QueryEngine {
+            store,
+            cache: &query_cache,
+        };
+
+        let query = QueryExpression {
+            filtered_index: Some(timeline),
+            view_contents: Some(
+                query_engine
+                    .iter_entity_paths(&entity_path_filter)
+                    .map(|entity_path| (entity_path, None))
+                    .collect(),
+            ),
+            sparse_fill_strategy: SparseFillStrategy::LatestAtGlobal,
+            ..Default::default()
+        };
+
+        let query_handle = query_engine.query(query.clone());
+        let record_batches = query_handle.batch_iter().take(10).collect_vec();
+
+        let table = concatenate_record_batches(query_handle.schema().clone(), &record_batches)?;
+        println!("{table}");
+    }
+
+    Ok(())
+}