Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Refactor rerun.AnyValues to handle None input more gracefully #3725

Merged
merged 9 commits into from
Oct 9, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions rerun_py/mkdocs.yml
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@ plugins:
python:
paths: ["rerun_sdk"] # Lookup python modules relative to this path
import: # Cross-references for python and numpy
- https://arrow.apache.org/docs/objects.inv
- https://docs.python.org/3/objects.inv
- https://numpy.org/doc/stable/objects.inv
- https://ipython.readthedocs.io/en/stable/objects.inv
Expand Down
4 changes: 2 additions & 2 deletions rerun_py/rerun_sdk/rerun/_log.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@

from . import components as cmp
from ._baseclasses import AsComponents, ComponentBatchLike
from .error_utils import _send_warning, catch_and_log_exceptions
from .error_utils import _send_warning_or_raise, catch_and_log_exceptions
from .recording_stream import RecordingStream

__all__ = ["log", "IndicatorComponentBatch", "AsComponents"]
Expand Down Expand Up @@ -207,7 +207,7 @@ def log_components(

# Skip components which were logged multiple times.
if name in added:
_send_warning(
_send_warning_or_raise(
f"Component {name} was included multiple times. Only the first instance will be used.",
depth_to_user_code=1,
)
Expand Down
104 changes: 64 additions & 40 deletions rerun_py/rerun_sdk/rerun/any_value.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@
import pyarrow as pa

from ._log import AsComponents, ComponentBatchLike
from .error_utils import _send_warning
from .error_utils import catch_and_log_exceptions

ANY_VALUE_TYPE_REGISTRY: dict[str, Any] = {}

Expand All @@ -21,7 +21,7 @@ class AnyBatchValue(ComponentBatchLike):
See also [rerun.AnyValues][].
"""

def __init__(self, name: str, value: Any) -> None:
def __init__(self, name: str, value: Any, drop_untyped_nones: bool = True) -> None:
"""
Construct a new AnyBatchValue.

Expand All @@ -30,7 +30,7 @@ def __init__(self, name: str, value: Any) -> None:
this function so it's possible to pass them directly to AnyValues.

If the object doesn't implement `as_arrow_array()`, it will be passed as an argument
to [pyarrow.array](https://arrow.apache.org/docs/python/generated/pyarrow.array.html).
to [pyarrow.array][] .

Note: rerun requires that a given component only take on a single type.
The first type logged will be the type that is used for all future logs
Expand All @@ -53,14 +53,19 @@ def __init__(self, name: str, value: Any) -> None:
The name of the component.
value:
The data to be logged as a component.
drop_untyped_nones:
If True, any components that are None will be dropped unless they have been
previously logged with a type.
"""
np_type, pa_type = ANY_VALUE_TYPE_REGISTRY.get(name, (None, None))

self.name = name
self.pa_array = None

try:
if hasattr(value, "as_arrow_array"):
with catch_and_log_exceptions(f"Converting data for '{name}'"):
if isinstance(value, pa.Array):
self.pa_array = value
elif hasattr(value, "as_arrow_array"):
self.pa_array = value.as_arrow_array()
else:
if np_type is not None:
Expand All @@ -70,18 +75,13 @@ def __init__(self, name: str, value: Any) -> None:
self.pa_array = pa.array(np_value, type=pa_type)
else:
if value is None:
_send_warning(f"AnyValues '{name}' of unknown type has no data. Ignoring.", 1)
if not drop_untyped_nones:
raise ValueError("Cannot convert None to arrow array. Type is unknown.")
else:
np_value = np.atleast_1d(np.array(value, copy=False))
self.pa_array = pa.array(np_value)
ANY_VALUE_TYPE_REGISTRY[name] = (np_value.dtype, self.pa_array.type)

except Exception as ex:
_send_warning(
f"Error converting data to arrow for AnyValues '{name}'. Ignoring.\n{type(ex).__name__}: {ex}",
1,
)

def is_valid(self) -> bool:
return self.pa_array is not None

Expand All @@ -93,53 +93,77 @@ def as_arrow_array(self) -> pa.Array | None:


class AnyValues(AsComponents):
"""Helper to log arbitrary values as a bundle of components."""
"""
Helper to log arbitrary values as a bundle of components.

Example
-------
```python
rr.log(
"any_values", rr.AnyValues(
foo=[1.2, 3.4, 5.6], bar="hello world",
),
)
```
"""

def __init__(self, **kwargs: Any) -> None:
def __init__(self, drop_untyped_nones: bool = True, **kwargs: Any) -> None:
"""
Construct a new AnyValues bundle.

Each kwarg will be logged as a separate component using the provided data.
- The key will be used as the name of the component
- The value must be able to be converted to an array of arrow types. In general, if
you can pass it to [pyarrow.array](https://arrow.apache.org/docs/python/generated/pyarrow.array.html),
you can log it as a extension component.
- The value must be able to be converted to an array of arrow types. In
general, if you can pass it to [pyarrow.array][] you can log it as a
extension component.

All values must either have the same length, or be singular in which
case they will be treated as a splat.

Note: rerun requires that a given component only take on a single type.
The first type logged will be the type that is used for all future logs
of that component. The API will make a best effort to do type conversion
if supported by numpy and arrow. Any components that can't be converted
will result in a warning (or an exception in strict mode).

All values must either have the same length, or be singular in which case they will be
treated as a splat.
`None` values provide a particular challenge as they have no type
information until after the component has been logged with a particular
type. By default, these values are dropped. This should generally be
fine as logging `None` to clear the value before it has been logged is
meaningless unless you are logging out-of-order data. In such cases,
consider introducing your own typed component via
[rerun.ComponentBatchLike][].

Note: rerun requires that a given component only take on a single type. The first type logged
will be the type that is used for all future logs of that component. The API will make
a best effort to do type conversion if supported by numpy and arrow. Any components that
can't be converted will be dropped.
You can change this behavior by setting `drop_untyped_nones` to `False`,
but be aware that this will result in potential warnings (or exceptions
in strict mode).

If you are want to inspect how your component will be converted to the underlying
arrow code, the following snippet is what is happening internally:
If you are want to inspect how your component will be converted to the
underlying arrow code, the following snippet is what is happening
internally:
```
np_value = np.atleast_1d(np.array(value, copy=False))
pa_value = pa.array(value)
```

Example
-------
```
rr.log(
"any_values",
rr.AnyValues(
foo=[1.2, 3.4, 5.6],
bar="hello world",
),
)
```
Parameters
----------
drop_untyped_nones:
If True, any components that are None will be dropped unless they
have been previously logged with a type.
kwargs:
The components to be logged.

"""
global ANY_VALUE_TYPE_REGISTRY

self.component_batches = []

for name, value in kwargs.items():
batch = AnyBatchValue(name, value)
if batch.is_valid():
self.component_batches.append(batch)
with catch_and_log_exceptions(self.__class__.__name__):
for name, value in kwargs.items():
batch = AnyBatchValue(name, value, drop_untyped_nones=drop_untyped_nones)
if batch.is_valid():
self.component_batches.append(batch)

def as_component_batches(self) -> Iterable[ComponentBatchLike]:
return self.component_batches
4 changes: 2 additions & 2 deletions rerun_py/rerun_sdk/rerun/archetypes/bar_chart_ext.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@

from typing import TYPE_CHECKING

from ..error_utils import _send_warning, catch_and_log_exceptions
from ..error_utils import _send_warning_or_raise, catch_and_log_exceptions

if TYPE_CHECKING:
from ..components import TensorDataBatch
Expand All @@ -24,7 +24,7 @@ def values__field_converter_override(data: TensorDataArrayLike) -> TensorDataBat
shape_dims = tensor_data.as_arrow_array()[0].value["shape"].values.field(0).to_numpy()

if len(shape_dims) != 1:
_send_warning(
_send_warning_or_raise(
f"Bar chart data should only be 1D. Got values with shape: {shape_dims}",
2,
recording=None,
Expand Down
8 changes: 4 additions & 4 deletions rerun_py/rerun_sdk/rerun/archetypes/boxes3d_ext.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
import numpy as np

from .. import components, datatypes
from ..error_utils import _send_warning, catch_and_log_exceptions
from ..error_utils import _send_warning_or_raise, catch_and_log_exceptions


class Boxes3DExt:
Expand Down Expand Up @@ -59,18 +59,18 @@ def __init__(
with catch_and_log_exceptions(context=self.__class__.__name__):
if sizes is not None:
if half_sizes is not None:
_send_warning("Cannot specify both `sizes` and `half_sizes` at the same time.", 1)
_send_warning_or_raise("Cannot specify both `sizes` and `half_sizes` at the same time.", 1)

sizes = np.asarray(sizes, dtype=np.float32)
half_sizes = sizes / 2.0

if mins is not None:
if centers is not None:
_send_warning("Cannot specify both `mins` and `centers` at the same time.", 1)
_send_warning_or_raise("Cannot specify both `mins` and `centers` at the same time.", 1)

# already converted `sizes` to `half_sizes`
if half_sizes is None:
_send_warning("Cannot specify `mins` without `sizes` or `half_sizes`.", 1)
_send_warning_or_raise("Cannot specify `mins` without `sizes` or `half_sizes`.", 1)
half_sizes = np.asarray([1, 1, 1], dtype=np.float32)

mins = np.asarray(mins, dtype=np.float32)
Expand Down
4 changes: 2 additions & 2 deletions rerun_py/rerun_sdk/rerun/archetypes/depth_image_ext.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@
import pyarrow as pa

from .._validators import find_non_empty_dim_indices
from ..error_utils import _send_warning, catch_and_log_exceptions
from ..error_utils import _send_warning_or_raise, catch_and_log_exceptions

if TYPE_CHECKING:
from ..components import TensorDataBatch
Expand Down Expand Up @@ -36,7 +36,7 @@ def data__field_converter_override(data: TensorDataArrayLike) -> TensorDataBatch

# TODO(#3239): What `recording` should we be passing here? How should we be getting it?
if num_non_empty_dims != 2:
_send_warning(f"Expected depth image, got array of shape {shape_dims}", 1, recording=None)
_send_warning_or_raise(f"Expected depth image, got array of shape {shape_dims}", 1, recording=None)

# IF no labels are set, add them
# TODO(jleibs): Again, needing to do this at the arrow level is awful
Expand Down
8 changes: 4 additions & 4 deletions rerun_py/rerun_sdk/rerun/archetypes/image_ext.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@

from .._validators import find_non_empty_dim_indices
from ..datatypes import TensorBufferType
from ..error_utils import _send_warning, catch_and_log_exceptions
from ..error_utils import _send_warning_or_raise, catch_and_log_exceptions

if TYPE_CHECKING:
from .._image import ImageEncoded
Expand Down Expand Up @@ -48,7 +48,7 @@ def compress(self, *, jpeg_quality: int = 95) -> ImageEncoded | Image:
tensor_data_arrow = self.data.as_arrow_array()

if tensor_data_arrow[0].value["buffer"].type_code == self.JPEG_TYPE_ID:
_send_warning(
_send_warning_or_raise(
"Image is already compressed as JPEG. Ignoring compression request.",
1,
recording=None,
Expand Down Expand Up @@ -104,12 +104,12 @@ def data__field_converter_override(data: TensorDataArrayLike) -> TensorDataBatch

# TODO(#3239): What `recording` should we be passing here? How should we be getting it?
if num_non_empty_dims < 2 or 3 < num_non_empty_dims:
_send_warning(f"Expected image, got array of shape {shape_dims}", 1, recording=None)
_send_warning_or_raise(f"Expected image, got array of shape {shape_dims}", 1, recording=None)

if num_non_empty_dims == 3:
depth = shape_dims[non_empty_dims[-1]]
if depth not in (3, 4):
_send_warning(
_send_warning_or_raise(
f"Expected image 3 (RGB) or 4 (RGBA). Instead got array of shape {shape_dims}",
1,
recording=None,
Expand Down
14 changes: 7 additions & 7 deletions rerun_py/rerun_sdk/rerun/archetypes/pinhole_ext.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@
from ..components import ViewCoordinatesLike
from ..datatypes.mat3x3 import Mat3x3Like
from ..datatypes.vec2d import Vec2D, Vec2DLike
from ..error_utils import _send_warning, catch_and_log_exceptions
from ..error_utils import _send_warning_or_raise, catch_and_log_exceptions


class PinholeExt:
Expand Down Expand Up @@ -84,7 +84,7 @@ def __init__(
if resolution is None and width is not None and height is not None:
resolution = [width, height]
elif resolution is not None and (width is not None or height is not None):
_send_warning("Can't set both resolution and width/height", 1)
_send_warning_or_raise("Can't set both resolution and width/height", 1)

# TODO(andreas): Use a union type for the Pinhole component instead ~Zof converting to a matrix here
if image_from_camera is None:
Expand All @@ -96,7 +96,7 @@ def __init__(
height = cast(float, resolution.xy[1])

if focal_length is None:
_send_warning("either image_from_camera or focal_length must be set", 1)
_send_warning_or_raise("either image_from_camera or focal_length must be set", 1)
focal_length = (width * height) ** 0.5 # a reasonable default
if principal_point is None:
principal_point = [width / 2, height / 2]
Expand All @@ -109,24 +109,24 @@ def __init__(
fl_x = focal_length[0] # type: ignore[index]
fl_y = focal_length[1] # type: ignore[index]
except Exception:
_send_warning("Expected focal_length to be one or two floats", 1)
_send_warning_or_raise("Expected focal_length to be one or two floats", 1)
fl_x = width / 2
fl_y = fl_x

try:
u_cen = principal_point[0] # type: ignore[index]
v_cen = principal_point[1] # type: ignore[index]
except Exception:
_send_warning("Expected principal_point to be one or two floats", 1)
_send_warning_or_raise("Expected principal_point to be one or two floats", 1)
u_cen = width / 2
v_cen = height / 2

image_from_camera = [[fl_x, 0, u_cen], [0, fl_y, v_cen], [0, 0, 1]] # type: ignore[assignment]
else:
if focal_length is not None:
_send_warning("Both image_from_camera and focal_length set", 1)
_send_warning_or_raise("Both image_from_camera and focal_length set", 1)
if principal_point is not None:
_send_warning("Both image_from_camera and principal_point set", 1)
_send_warning_or_raise("Both image_from_camera and principal_point set", 1)

self.__attrs_init__(image_from_camera=image_from_camera, resolution=resolution, camera_xyz=camera_xyz)
return
Expand Down
4 changes: 2 additions & 2 deletions rerun_py/rerun_sdk/rerun/archetypes/segmentation_image_ext.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@
from .._validators import find_non_empty_dim_indices
from ..datatypes import TensorBufferType
from ..datatypes.tensor_data_ext import _build_buffer_array
from ..error_utils import _send_warning, catch_and_log_exceptions
from ..error_utils import _send_warning_or_raise, catch_and_log_exceptions

if TYPE_CHECKING:
from ..components import TensorDataBatch
Expand Down Expand Up @@ -43,7 +43,7 @@ def data__field_converter_override(data: TensorDataArrayLike) -> TensorDataBatch

# TODO(#3239): What `recording` should we be passing here? How should we be getting it?
if num_non_empty_dims != 2:
_send_warning(f"Expected segmentation image, got array of shape {shape_dims}", 1, recording=None)
_send_warning_or_raise(f"Expected segmentation image, got array of shape {shape_dims}", 1, recording=None)

tensor_data_type = TensorDataType().storage_type
shape_data_type = TensorDimensionType().storage_type
Expand Down
Loading
Loading