Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Introduce AnyValues as an alternative to extension_components #3561

Merged
merged 11 commits into from
Oct 2, 2023
12 changes: 12 additions & 0 deletions docs/code-examples/any_values.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
"""Log a arbitrary data."""
import rerun as rr

rr.init("rerun_example_any_values", spawn=True)

rr.log(
"any_values",
rr.AnyValues(
foo=[1.2, 3.4, 5.6],
bar="hello world",
),
)
1 change: 1 addition & 0 deletions docs/code-examples/roundtrips.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@
opt_out_entirely = {
"annotation_context_connections": ["cpp"],
"annotation_context_segmentation": ["cpp"],
"any_values": ["cpp", "rust"], # Only implemented for Python
"asset3d_out_of_tree": ["cpp"], # TODO(cmc): cannot set recording clock in cpp at the moment
"asset3d_simple": ["cpp"], # TODO(#2919): Need log_timeless for C++
"bar_chart": ["cpp"],
Expand Down
2 changes: 2 additions & 0 deletions rerun_py/rerun_sdk/rerun/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@
__all__ = [
"AnnotationContext",
"AnnotationInfo",
"AnyValues",
"Arrows3D",
"AsComponents",
"Asset3D",
Expand Down Expand Up @@ -122,6 +123,7 @@

from ._image import ImageEncoded, ImageFormat
from ._log import AsComponents, ComponentBatchLike, IndicatorComponentBatch, log, log_components
from .any_value import AnyValues
from .archetypes import (
AnnotationContext,
Arrows3D,
Expand Down
149 changes: 149 additions & 0 deletions rerun_py/rerun_sdk/rerun/any_value.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,149 @@
from __future__ import annotations

from typing import Any, Iterable

import numpy as np
import pyarrow as pa

from ._log import AsComponents, ComponentBatchLike
from .error_utils import _send_warning

ANY_VALUE_TYPE_REGISTRY: dict[str, Any] = {}

COMPONENT_PREFIX = "user.components."


class AnyBatchValue(ComponentBatchLike):
"""
Helper to log arbitrary data as a component batch.

This is a very simple helper that implements the `ComponentBatchLike` interface on top
of the `pyarrow` library array conversion functions.

See also [rerun.AnyValues][].
"""

def __init__(self, name: str, value: Any) -> None:
"""
Construct a new AnyBatchValue.

The component will be named "user.components.<NAME>".

The value will be attempted to be converted into an arrow array by first calling
the `as_arrow_array()` method if it's defined. All Rerun Batch datatypes implement
this function so it's possible to pass them directly to AnyValues.

If the object doesn't implement `as_arrow_array()`, it will be passed as an argument
to [pyarrow.array](https://arrow.apache.org/docs/python/generated/pyarrow.array.html).

Note: rerun requires that a given component only take on a single type.
The first type logged will be the type that is used for all future logs
of that component. The API will make a best effort to do type conversion
if supported by numpy and arrow. Any components that can't be converted
will be dropped, and a warning will be sent to the log.

If you are want to inspect how your component will be converted to the
underlying arrow code, the following snippet is what is happening
internally:

```
np_value = np.atleast_1d(np.array(value, copy=False))
pa_value = pa.array(value)
```

Parameters
----------
name:
The name of the component.
value:
The data to be logged as a component.
"""
np_type, pa_type = ANY_VALUE_TYPE_REGISTRY.get(name, (None, None))

self.name = name
self.pa_array = None

try:
if hasattr(value, "as_arrow_array"):
self.pa_array = value.as_arrow_array()
else:
if np_type is not None:
if value is None:
value = []
np_value = np.atleast_1d(np.array(value, copy=False, dtype=np_type))
self.pa_array = pa.array(np_value, type=pa_type)
else:
if value is None:
_send_warning(f"AnyValues '{name}' of unknown type has no data. Ignoring.", 1)
else:
np_value = np.atleast_1d(np.array(value, copy=False))
self.pa_array = pa.array(np_value)
ANY_VALUE_TYPE_REGISTRY[name] = (np_value.dtype, self.pa_array.type)

except Exception as ex:
_send_warning(
f"Error converting data to arrow for AnyValues '{name}'. Ignoring.\n{type(ex).__name__}: {ex}",
1,
)

def is_valid(self) -> bool:
return self.pa_array is not None

def component_name(self) -> str:
return COMPONENT_PREFIX + self.name

def as_arrow_array(self) -> pa.Array | None:
return self.pa_array


class AnyValues(AsComponents):
"""Helper to log arbitrary values as a bundle of components."""

def __init__(self, **kwargs: Any) -> None:
"""
Construct a new AnyValues bundle.

Each kwarg will be logged as a separate component using the provided data.
- The key will be used as the name of the component
- The value must be able to be converted to an array of arrow types. In general, if
you can pass it to [pyarrow.array](https://arrow.apache.org/docs/python/generated/pyarrow.array.html),
you can log it as a extension component.

All values must either have the same length, or be singular in which case they will be
treated as a splat.

Note: rerun requires that a given component only take on a single type. The first type logged
will be the type that is used for all future logs of that component. The API will make
a best effort to do type conversion if supported by numpy and arrow. Any components that
can't be converted will be dropped.

If you are want to inspect how your component will be converted to the underlying
arrow code, the following snippet is what is happening internally:
```
np_value = np.atleast_1d(np.array(value, copy=False))
pa_value = pa.array(value)
```

Example
-------
```
rr.log(
"any_values",
rr.AnyValues(
foo=[1.2, 3.4, 5.6],
bar="hello world",
),
)
```
"""
global ANY_VALUE_TYPE_REGISTRY

self.component_batches = []

for name, value in kwargs.items():
batch = AnyBatchValue(name, value)
if batch.is_valid():
self.component_batches.append(batch)

def as_component_batches(self) -> Iterable[ComponentBatchLike]:
return self.component_batches
46 changes: 46 additions & 0 deletions rerun_py/tests/unit/test_any_values.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,46 @@
from __future__ import annotations

import numpy as np
import pytest
import rerun as rr
from rerun.error_utils import RerunWarning


def test_any_value() -> None:
values = rr.AnyValues(foo=[1.0, 2.0, 3.0], bar="hello")

batches = list(values.as_component_batches())

foo_batch = batches[0]
bar_batch = batches[1]

assert foo_batch.component_name() == "user.components.foo"
assert bar_batch.component_name() == "user.components.bar"
assert len(foo_batch.as_arrow_array()) == 3
assert len(bar_batch.as_arrow_array()) == 1
assert np.all(foo_batch.as_arrow_array().to_numpy() == np.array([1.0, 2.0, 3.0]))


def test_any_value_datatypes() -> None:
values = rr.AnyValues(my_points=rr.datatypes.Vec2DBatch([(0, 1), (2, 3), (4, 5)]))

batches = list(values.as_component_batches())

foo_batch = batches[0]

assert foo_batch.component_name() == "user.components.my_points"
assert len(foo_batch.as_arrow_array()) == 3


def test_bad_any_value() -> None:
class Foo:
pass

with pytest.warns(RerunWarning) as warnings:
values = rr.AnyValues(bad_data=[Foo()])

batches = list(values.as_component_batches())

assert len(batches) == 0
assert len(warnings) == 1
assert "Error converting data to arrow for AnyValues 'bad_data'" in str(warnings[0].message)
Loading