Skip to content

Commit

Permalink
Dataframe v2: examples (#7817)
Browse files Browse the repository at this point in the history
Full standalone Rust & Python examples for people who like to learn by
jumping straight into the examples/ folder.

Simpler snippets and the associated reference page are coming in a
future PR.
  • Loading branch information
teh-cmc authored Oct 19, 2024
1 parent 6d86699 commit 31d5943
Show file tree
Hide file tree
Showing 10 changed files with 557 additions and 333 deletions.
9 changes: 9 additions & 0 deletions Cargo.lock
Original file line number Diff line number Diff line change
Expand Up @@ -1711,6 +1711,15 @@ version = "2.5.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "7e962a19be5cfc3f3bf6dd8f61eb50107f356ad6270fbb3ed41476571db78be5"

[[package]]
name = "dataframe_query"
version = "0.19.0-alpha.7"
dependencies = [
"itertools 0.13.0",
"rerun",
"unindent",
]

[[package]]
name = "deranged"
version = "0.3.11"
Expand Down
5 changes: 3 additions & 2 deletions examples/manifest.toml
Original file line number Diff line number Diff line change
Expand Up @@ -152,10 +152,13 @@ examples = [
# or explicitly excluded by running `python scripts/check_example_manifest_coverage.py`.
[ignored]
examples = [
"_empty_rerun_sdk",
"all_examples",
"custom_collection_adapter",
"custom_data_loader",
"custom_space_view",
"custom_store_subscriber",
"dataframe_query",
"drone_lidar",
"extend_viewer_ui",
"external_data_loader",
Expand All @@ -165,6 +168,4 @@ examples = [
"spawn_viewer",
"stdio",
"template",
"all_examples",
"_empty_rerun_sdk",
]
16 changes: 16 additions & 0 deletions examples/python/dataframe_query/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
This example will query for the first 10 rows of data in your recording of choice,
and display the results as a table in your terminal.

You can use one of your recordings, or grab one from our hosted examples, e.g.:
```bash
curl 'https://app.rerun.io/version/latest/examples/dna.rrd' -o - > /tmp/dna.rrd
```

The results can be filtered further by specifying an entity filter expression:
```bash
python dataframe_query.py my_recording.rrd /helix/structure/**\
```

```bash
python dataframe_query.py <path_to_rrd> [entity_path_filter]
```
47 changes: 47 additions & 0 deletions examples/python/dataframe_query/dataframe_query.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,47 @@
#!/usr/bin/env python3
"""Demonstrates basic usage of the dataframe APIs."""

from __future__ import annotations

import argparse

import pyarrow as pa
import rerun as rr

DESCRIPTION = """
Usage: python dataframe_query.py <path_to_rrd> [entity_path_filter]
This example will query for the first 10 rows of data in your recording of choice,
and display the results as a table in your terminal.
You can use one of your recordings, or grab one from our hosted examples, e.g.:
curl 'https://app.rerun.io/version/latest/examples/dna.rrd' -o - > /tmp/dna.rrd
The results can be filtered further by specifying an entity filter expression:
{bin_name} my_recording.rrd /helix/structure/**
""".strip()


def query(path_to_rrd: str, entity_path_filter: str) -> None:
recording = rr.dataframe.load_recording(path_to_rrd)
view = recording.view(index="log_time", contents=entity_path_filter)
batches = view.select()

table = pa.Table.from_batches(batches, batches.schema)
table = table.slice(0, 10)
print(table.to_pandas())


def main() -> None:
parser = argparse.ArgumentParser(description=DESCRIPTION)
parser.add_argument("path_to_rrd", type=str, help="Path to the .rrd file")
parser.add_argument(
"entity_path_filter", type=str, nargs="?", default="/**", help="Optional entity path filter expression"
)
args = parser.parse_args()

query(args.path_to_rrd, args.entity_path_filter)


if __name__ == "__main__":
main()
13 changes: 13 additions & 0 deletions examples/python/dataframe_query/pyproject.toml
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
[project]
name = "dataframe_query"
version = "0.1.0"
# requires-python = "<3.12"
readme = "README.md"
dependencies = ["rerun-sdk"]

[project.scripts]
dataframe_query = "dataframe_query:main"

[build-system]
requires = ["hatchling"]
build-backend = "hatchling.build"
15 changes: 15 additions & 0 deletions examples/rust/dataframe_query/Cargo.toml
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
[package]
name = "dataframe_query"
version = "0.19.0-alpha.7"
edition = "2021"
rust-version = "1.79"
license = "MIT OR Apache-2.0"
publish = false

[dependencies]
rerun = { path = "../../../crates/top/rerun", default-features = false, features = [
"dataframe",
] }

itertools = "0.13"
unindent = "0.2"
17 changes: 17 additions & 0 deletions examples/rust/dataframe_query/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
This example will query for the first 10 rows of data in your recording of choice,
and display the results as a table in your terminal.

```bash
cargo run --release -- <path_to_rrd> [entity_path_filter]
```

You can use one of your recordings, or grab one from our hosted examples, e.g.:
```bash
curl 'https://app.rerun.io/version/latest/examples/dna.rrd' -o - > /tmp/dna.rrd
```

The results can be filtered further by specifying an entity filter expression:
```bash
cargo run --release -- my_recording.rrd /helix/structure/**\
```

82 changes: 82 additions & 0 deletions examples/rust/dataframe_query/src/main.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,82 @@
//! Demonstrates basic usage of the dataframe APIs.
use itertools::Itertools;

use rerun::{
dataframe::{
concatenate_record_batches, EntityPathFilter, QueryCache, QueryEngine, QueryExpression,
SparseFillStrategy, Timeline,
},
ChunkStore, ChunkStoreConfig, StoreKind, VersionPolicy,
};

fn main() -> Result<(), Box<dyn std::error::Error>> {
let args = std::env::args().collect_vec();

let get_arg = |i| {
let Some(value) = args.get(i) else {
let bin_name = args.first().map_or("$BIN", |s| s.as_str());
eprintln!(
"{}",
unindent::unindent(&format!(
"\
Usage: {bin_name} <path_to_rrd> [entity_path_filter]
This example will query for the first 10 rows of data in your recording of choice,
and display the results as a table in your terminal.
You can use one of your recordings, or grab one from our hosted examples, e.g.:
curl 'https://app.rerun.io/version/latest/examples/dna.rrd' -o - > /tmp/dna.rrd
The results can be filtered further by specifying an entity filter expression:
{bin_name} my_recording.rrd /helix/structure/**\
",
)),
);
std::process::exit(1);
};
value
};

let path_to_rrd = get_arg(1);
let entity_path_filter = EntityPathFilter::try_from(args.get(2).map_or("/**", |s| s.as_str()))?;
let timeline = Timeline::log_time();

let stores = ChunkStore::from_rrd_filepath(
&ChunkStoreConfig::DEFAULT,
path_to_rrd,
VersionPolicy::Warn,
)?;

for (store_id, store) in &stores {
if store_id.kind != StoreKind::Recording {
continue;
}

let query_cache = QueryCache::new(store);
let query_engine = QueryEngine {
store,
cache: &query_cache,
};

let query = QueryExpression {
filtered_index: Some(timeline),
view_contents: Some(
query_engine
.iter_entity_paths(&entity_path_filter)
.map(|entity_path| (entity_path, None))
.collect(),
),
sparse_fill_strategy: SparseFillStrategy::LatestAtGlobal,
..Default::default()
};

let query_handle = query_engine.query(query.clone());
let record_batches = query_handle.batch_iter().take(10).collect_vec();

let table = concatenate_record_batches(query_handle.schema().clone(), &record_batches)?;
println!("{table}");
}

Ok(())
}
Loading

0 comments on commit 31d5943

Please sign in to comment.