From b53cd2bced083d6b83f3d00bf446a7ca9f18f6ea Mon Sep 17 00:00:00 2001 From: Julia Dark <24235303+jp-dark@users.noreply.github.com> Date: Thu, 26 Sep 2024 13:56:38 -0400 Subject: [PATCH] Add the `Scene` class (#220) * Create the `Scene` class * Add `spatial` collection of scenes to the `Experiment` class * Add Scene to ephemeral collections and tests --------- Co-authored-by: nguyenv --- python-spec/src/somacore/__init__.py | 2 + .../src/somacore/ephemeral/__init__.py | 2 + .../src/somacore/ephemeral/collections.py | 160 ++++++- python-spec/src/somacore/experiment.py | 11 +- python-spec/src/somacore/scene.py | 444 ++++++++++++++++++ python-spec/testing/test_collection.py | 2 + 6 files changed, 618 insertions(+), 3 deletions(-) create mode 100644 python-spec/src/somacore/scene.py diff --git a/python-spec/src/somacore/__init__.py b/python-spec/src/somacore/__init__.py index 4cf00dab..f16b3f8f 100644 --- a/python-spec/src/somacore/__init__.py +++ b/python-spec/src/somacore/__init__.py @@ -37,6 +37,7 @@ from .query import AxisColumnNames from .query import AxisQuery from .query import ExperimentAxisQuery +from .scene import Scene from .spatial import GeometryDataFrame from .spatial import ImageProperties from .spatial import MultiscaleImage @@ -67,6 +68,7 @@ "SpatialRead", "Experiment", "Measurement", + "Scene", "ImageProperties", "MultiscaleImage", "SpatialDataFrame", diff --git a/python-spec/src/somacore/ephemeral/__init__.py b/python-spec/src/somacore/ephemeral/__init__.py index 6ec3fce2..932f63f1 100644 --- a/python-spec/src/somacore/ephemeral/__init__.py +++ b/python-spec/src/somacore/ephemeral/__init__.py @@ -8,9 +8,11 @@ from .collections import Collection from .collections import Experiment from .collections import Measurement +from .collections import Scene __all__ = ( "Collection", "Experiment", "Measurement", + "Scene", ) diff --git a/python-spec/src/somacore/ephemeral/collections.py b/python-spec/src/somacore/ephemeral/collections.py index de49cb05..583ed947 100644 --- a/python-spec/src/somacore/ephemeral/collections.py +++ b/python-spec/src/somacore/ephemeral/collections.py @@ -1,13 +1,27 @@ -from typing import Any, Dict, Iterator, NoReturn, Optional, TypeVar - +from typing import ( + Any, + Dict, + Iterator, + NoReturn, + Optional, + Sequence, + Tuple, + TypeVar, + Union, +) + +import pyarrow as pa from typing_extensions import Literal, Self from .. import base from .. import collection +from .. import coordinates from .. import data from .. import experiment from .. import measurement from .. import options +from .. import scene +from .. import spatial _Elem = TypeVar("_Elem", bound=base.SOMAObject) @@ -120,6 +134,14 @@ class Collection( # type: ignore[misc] # __eq__ false positive ] """The loosest possible constraint of the abstract Measurement type.""" +_BasicAbstractScene = scene.Scene[ + spatial.MultiscaleImage, + spatial.PointCloud, + spatial.GeometryDataFrame, + base.SOMAObject, +] +"""The loosest possible constraint of the abstract Scene type.""" + class Measurement( # type: ignore[misc] # __eq__ false positive BaseCollection[base.SOMAObject], _BasicAbstractMeasurement @@ -129,11 +151,145 @@ class Measurement( # type: ignore[misc] # __eq__ false positive __slots__ = () +class Scene( # type: ignore[misc] # __eq__ false positive + BaseCollection[base.SOMAObject], _BasicAbstractScene +): + """An in-memory Collection with Scene semantics.""" + + __slots__ = () + + @property + def coordinate_space(self) -> coordinates.CoordinateSpace: + """Coordinate system for this scene.""" + raise NotImplementedError() + + @coordinate_space.setter + def coordinate_space(self, value: coordinates.CoordinateSpace) -> None: + raise NotImplementedError() + + def add_geometry_dataframe( + self, + key: str, + subcollection: Union[str, Sequence[str]], + transform: Optional[coordinates.CoordinateTransform], + *, + uri: str, + schema: pa.Schema, + index_column_names: Sequence[str] = ( + options.SOMA_JOINID, + options.SOMA_GEOMETRY, + ), + axis_names: Sequence[str] = ("x", "y"), + domain: Optional[Sequence[Optional[Tuple[Any, Any]]]] = None, + platform_config: Optional[options.PlatformConfig] = None, + context: Optional[Any] = None, + ) -> spatial.GeometryDataFrame: + raise NotImplementedError() + + def add_multiscale_image( + self, + key: str, + subcollection: Union[str, Sequence[str]], + transform: Optional[coordinates.CoordinateTransform], + *, + uri: str, + type: pa.DataType, + image_type: str = "CYX", # TODO: Replace this arg after PR #219 is merged + reference_level_shape: Sequence[int], + axis_names: Sequence[str] = ("c", "x", "y"), + ) -> spatial.MultiscaleImage: + raise NotImplementedError() + + def add_new_point_cloud( + self, + key: str, + subcollection: Union[str, Sequence[str]], + transform: Optional[coordinates.CoordinateTransform], + *, + uri: Optional[str] = None, + schema: pa.Schema, + index_column_names: Sequence[str] = (options.SOMA_JOINID,), + axis_names: Sequence[str] = ("x", "y"), + domain: Optional[Sequence[Optional[Tuple[Any, Any]]]] = None, + platform_config: Optional[options.PlatformConfig] = None, + ) -> spatial.PointCloud: + raise NotImplementedError() + + def set_transform_to_geometry_dataframe( + self, + key: str, + transform: coordinates.CoordinateTransform, + *, + subcollection: Union[str, Sequence[str]] = "obsl", + coordinate_space: Optional[coordinates.CoordinateSpace] = None, + ) -> spatial.GeometryDataFrame: + raise NotImplementedError() + + def set_transform_to_multiscale_image( + self, + key: str, + transform: coordinates.CoordinateTransform, + *, + subcollection: Union[str, Sequence[str]] = "img", + coordinate_space: Optional[coordinates.CoordinateSpace] = None, + ) -> spatial.MultiscaleImage: + raise NotImplementedError() + + def set_transform_to_point_cloud( + self, + key: str, + transform: coordinates.CoordinateTransform, + *, + subcollection: Union[str, Sequence[str]] = "obsl", + coordinate_space: Optional[coordinates.CoordinateSpace] = None, + ) -> spatial.PointCloud: + raise NotImplementedError() + + def get_transform_from_geometry_dataframe( + self, key: str, *, subcollection: Union[str, Sequence[str]] = "obsl" + ) -> coordinates.CoordinateTransform: + raise NotImplementedError() + + def get_transform_from_multiscale_image( + self, + key: str, + *, + subcollection: str = "img", + level: Optional[Union[str, int]] = None, + ) -> coordinates.CoordinateTransform: + raise NotImplementedError() + + def get_transform_from_point_cloud( + self, key: str, *, subcollection: str = "obsl" + ) -> coordinates.CoordinateTransform: + raise NotImplementedError() + + def get_transform_to_geometry_dataframe( + self, key: str, *, subcollection: Union[str, Sequence[str]] = "obsl" + ) -> coordinates.CoordinateTransform: + raise NotImplementedError() + + def get_transform_to_multiscale_image( + self, + key: str, + *, + subcollection: str = "img", + level: Optional[Union[str, int]] = None, + ) -> coordinates.CoordinateTransform: + raise NotImplementedError() + + def get_transform_to_point_cloud( + self, key: str, *, subcollection: str = "obsl" + ) -> coordinates.CoordinateTransform: + raise NotImplementedError() + + class Experiment( # type: ignore[misc] # __eq__ false positive BaseCollection[base.SOMAObject], experiment.Experiment[ data.DataFrame, collection.Collection[_BasicAbstractMeasurement], + collection.Collection[_BasicAbstractScene], base.SOMAObject, ], ): diff --git a/python-spec/src/somacore/experiment.py b/python-spec/src/somacore/experiment.py index 6f97311f..bb0d7c65 100644 --- a/python-spec/src/somacore/experiment.py +++ b/python-spec/src/somacore/experiment.py @@ -8,16 +8,21 @@ from . import data from . import measurement from . import query +from . import scene _DF = TypeVar("_DF", bound=data.DataFrame) """An implementation of a DataFrame.""" _MeasColl = TypeVar("_MeasColl", bound=collection.Collection[measurement.Measurement]) """An implementation of a collection of Measurements.""" +_SceneColl = TypeVar("_SceneColl", bound=collection.Collection[scene.Scene]) +"""An implemenation of a collection of spatial data.""" _RootSO = TypeVar("_RootSO", bound=base.SOMAObject) """The root SOMA object type of the implementation.""" -class Experiment(collection.BaseCollection[_RootSO], Generic[_DF, _MeasColl, _RootSO]): +class Experiment( + collection.BaseCollection[_RootSO], Generic[_DF, _MeasColl, _SceneColl, _RootSO] +): """A collection subtype representing an annotated 2D matrix of measurements. In single cell biology, this can represent multiple modes of measurement @@ -38,6 +43,7 @@ class Experiment(collection.BaseCollection[_RootSO], Generic[_DF, _MeasColl, _Ro # somacore.Experiment[ # ImplDataFrame, # _DF # ImplMeasurement, # _MeasColl + # ImplScene, # _SceneColl # ImplSOMAObject, # _RootSO # ], # ): @@ -57,6 +63,9 @@ class Experiment(collection.BaseCollection[_RootSO], Generic[_DF, _MeasColl, _Ro ms = _mixin.item[_MeasColl]() """A collection of named measurements.""" + spatial = _mixin.item[_SceneColl]() # TODO: Discuss the name of this element. + """A collection of named spatial scenes.""" + def axis_query( self, measurement_name: str, diff --git a/python-spec/src/somacore/scene.py b/python-spec/src/somacore/scene.py new file mode 100644 index 00000000..4c9f959d --- /dev/null +++ b/python-spec/src/somacore/scene.py @@ -0,0 +1,444 @@ +"""Implementation of the SOMA scene collection for spatial data""" + +import abc +from typing import Any, Generic, Optional, Sequence, Tuple, TypeVar, Union + +import pyarrow as pa +from typing_extensions import Final + +from . import _mixin +from . import base +from . import collection +from . import coordinates +from . import options +from . import spatial + +_MultiscaleImage = TypeVar("_MultiscaleImage", bound=spatial.MultiscaleImage) +"""A particular implementation of a multiscale image.""" + +_PointCloud = TypeVar("_PointCloud", bound=spatial.PointCloud) +"""A particular implementation of a point cloud.""" + +_GeometryDataFrame = TypeVar("_GeometryDataFrame", bound=spatial.GeometryDataFrame) +"""A particular implementation of a geometry dataframe.""" + +_RootSO = TypeVar("_RootSO", bound=base.SOMAObject) +"""The root SomaObject type of the implementation.""" + + +class Scene( + collection.BaseCollection[_RootSO], + Generic[_MultiscaleImage, _PointCloud, _GeometryDataFrame, _RootSO], +): + """A collection subtype representing spatial assets that can all be stored + on a single coordinate space. + + Lifecycle: experimental + """ + + # This class is implemented as a mixin to be used with SOMA classes. + # For example, a SOMA implementation would look like this: + # + # # This type-ignore comment will always be needed due to limitations + # # of type annotations; it is (currently) expected. + # class Scene( # type: ignore[type-var] + # ImplBaseCollection[ImplSOMAObject], + # somacore.Scene[ + # ImplMultiscaleImage, + # ImplPointCloud, + # ImplGeometryDataFrame, + # ImplSOMAObject, + # ], + # ): + # ... + + __slots__ = () + soma_type: Final = "SOMAScene" # type: ignore[misc] + + img = _mixin.item[collection.Collection[_MultiscaleImage]]() + """A collection of multiscale images backing the spatial data. + + Lifecycle: experimental + """ + + obsl = _mixin.item[collection.Collection[Union[_PointCloud, _GeometryDataFrame]]]() + """A collection of observation location data. + + This collection exists to store any spatial data in the scene that joins on the obs + ``soma_joinid``. Each dataframe in ``obsl`` can be either a PointCloud + or a GeometryDataFrame. + + Lifecycle: experimental + """ + + varl = _mixin.item[ + collection.Collection[ + collection.Collection[Union[_PointCloud, _GeometryDataFrame]] + ] + ]() + """A collection of collections of variable location data. + + This collection exists to store any spatial data in the scene that joins on the + variable ``soma_joinid`` for the measurements in the SOMA experiment. The top-level + collection maps from measurement name to a collection of dataframes. + + Each dataframe in a ``varl`` subcollection can be either a GeometryDataFrame or a + PointCloud. + + Lifecycle: experimental + """ + + @property + @abc.abstractmethod + def coordinate_space(self) -> Optional[coordinates.CoordinateSpace]: + """Coordinate system for this scene. + + Lifecycle: experimental + """ + raise NotImplementedError() + + @coordinate_space.setter + @abc.abstractmethod + def coordinate_space(self, value: coordinates.CoordinateSpace) -> None: + raise NotImplementedError() + + @abc.abstractmethod + def add_geometry_dataframe( + self, + key: str, + subcollection: Union[str, Sequence[str]], + transform: Optional[coordinates.CoordinateTransform], + *, + uri: str, + schema: pa.Schema, + index_column_names: Sequence[str] = ( + options.SOMA_JOINID, + options.SOMA_GEOMETRY, + ), + axis_names: Sequence[str] = ("x", "y"), + domain: Optional[Sequence[Optional[Tuple[Any, Any]]]] = None, + platform_config: Optional[options.PlatformConfig] = None, + context: Optional[Any] = None, + ) -> _GeometryDataFrame: + """Adds a ``GeometryDataFrame`` to the scene and sets a coordinate transform + between the scene and the dataframe. + + If the subcollection the geometry dataframe is inside of is more than one + layer deep, the input should be provided as a sequence of names. For example, + to set the transformation to a geometry dataframe named "transcripts" in + the "var/RNA" collection:: + + scene.add_geometry_dataframe( + 'cell_boundaries', subcollection=['var', 'RNA'], **kwargs + ) + + Args: + key: The name of the geometry dataframe. + transform: The coordinate transformation from the scene to the dataframe. + subcollection: The name, or sequence of names, of the subcollection the + dataframe is stored in. Defaults to ``'obsl'``. + + Returns: + The newly create ``GeometryDataFrame``, opened for writing. + + Lifecycle: experimental + """ + raise NotImplementedError() + + @abc.abstractmethod + def add_multiscale_image( + self, + key: str, + subcollection: Union[str, Sequence[str]], + transform: Optional[coordinates.CoordinateTransform], + *, + uri: str, + type: pa.DataType, + image_type: str = "CYX", # TODO: Replace this arg after PR #219 is merged + reference_level_shape: Sequence[int], + axis_names: Sequence[str] = ("c", "x", "y"), + ) -> _MultiscaleImage: + """Adds a ``MultiscaleImage`` to the scene and sets a coordinate transform + between the scene and the dataframe. + + Parameters are as in :meth:`spatial.PointCloud.create`. + See :meth:`add_new_collection` for details about child URIs. + + Args: + key: The name of the geometry dataframe. + transform: The coordinate transformation from the scene to the dataframe. + subcollection: The name, or sequence of names, of the subcollection the + dataframe is stored in. Defaults to ``'obsl'``. + + Returns: + The newly create ``MultiscaleImage``, opened for writing. + + Lifecycle: experimental + """ + raise NotImplementedError() + + @abc.abstractmethod + def add_new_point_cloud( + self, + key: str, + subcollection: Union[str, Sequence[str]], + transform: Optional[coordinates.CoordinateTransform], + *, + uri: Optional[str] = None, + schema: pa.Schema, + index_column_names: Sequence[str] = (options.SOMA_JOINID,), + axis_names: Sequence[str] = ("x", "y"), + domain: Optional[Sequence[Optional[Tuple[Any, Any]]]] = None, + platform_config: Optional[options.PlatformConfig] = None, + ) -> _PointCloud: + """Adds a point cloud to the scene and sets a coordinate transform + between the scene and the dataframe. + + Parameters are as in :meth:`spatial.PointCloud.create`. + See :meth:`add_new_collection` for details about child URIs. + + Args: + key: The name of the geometry dataframe. + transform: The coordinate transformation from the scene to the dataframe. + subcollection: The name, or sequence of names, of the subcollection the + dataframe is stored in. Defaults to ``'obsl'``. + + Returns: + The newly created ``PointCloud``, opened for writing. + + Lifecycle: experimental + """ + raise NotImplementedError() + + @abc.abstractmethod + def set_transform_to_geometry_dataframe( + self, + key: str, + transform: coordinates.CoordinateTransform, + *, + subcollection: Union[str, Sequence[str]] = "obsl", + coordinate_space: Optional[coordinates.CoordinateSpace] = None, + ) -> _GeometryDataFrame: + """Adds the coordinate transform for the scene coordinate space to + a geometry dataframe stored in the scene. + + If the subcollection the geometry dataframe is inside of is more than one + layer deep, the input should be provided as a sequence of names. For example, + to set a transformation for geometry dataframe named "transcripts" in the + "var/RNA" collection:: + + scene.set_transfrom_for_geometry_dataframe( + 'transcripts', transform, subcollection=['var', 'RNA'], + ) + + Args: + key: The name of the geometry dataframe. + transform: The coordinate transformation from the scene to the dataframe. + subcollection: The name, or sequence of names, of the subcollection the + dataframe is stored in. Defaults to ``'obsl'``. + coordinate_space: Optional coordinate space for the dataframe. This will + replace the existing coordinate space of the dataframe. + + Returns: + The geometry dataframe, opened for writing. + + Lifecycle: experimental + """ + raise NotImplementedError() + + @abc.abstractmethod + def set_transform_to_multiscale_image( + self, + key: str, + transform: coordinates.CoordinateTransform, + *, + subcollection: Union[str, Sequence[str]] = "img", + coordinate_space: Optional[coordinates.CoordinateSpace] = None, + ) -> _MultiscaleImage: + """Adds the coordinate transform for the scene coordinate space to + a multiscale image stored in the scene. + + The transform to the multiscale image must be to the coordinate space + defined on the reference level for the image. In most cases, this will be + the level ``0`` image. + + Args: + key: The name of the multiscale image. + transform: The coordinate transformation from the scene to the reference + level of the multiscale image. + subcollection: The name, or sequence of names, of the subcollection the + image is stored in. Defaults to ``'img'``. + coordinate_space: Optional coordinate space for the image. This will + replace the existing coordinate space of the multiscale image. + + Returns: + The multiscale image, opened for writing. + + Lifecycle: experimental + """ + raise NotImplementedError() + + @abc.abstractmethod + def set_transform_to_point_cloud( + self, + key: str, + transform: coordinates.CoordinateTransform, + *, + subcollection: Union[str, Sequence[str]] = "obsl", + coordinate_space: Optional[coordinates.CoordinateSpace] = None, + ) -> _PointCloud: + """Adds the coordinate transform for the scene coordinate space to + a point cloud stored in the scene. + + If the subcollection the point cloud is inside of is more than one + layer deep, the input should be provided as a sequence of names. For example, + to set a transform for a point named `transcripts` in the `var/RNA` + collection:: + + scene.set_transformation_for_point_cloud( + 'transcripts', transform, subcollection=['var', 'RNA'], + ) + + Args: + key: The name of the point cloud. + transform: The coordinate transformation from the scene to the point cloud. + subcollection: The name, or sequence of names, of the subcollection the + point cloud is stored in. Defaults to ``'obsl'``. + coordinate_space: Optional coordinate space for the point cloud. This will + replace the existing coordinate space of the point cloud. Defaults to + ``None``. + + Returns: + The point cloud, opened for writing. + + Lifecycle: experimental + """ + raise NotImplementedError() + + @abc.abstractmethod + def get_transform_from_geometry_dataframe( + self, key: str, *, subcollection: Union[str, Sequence[str]] = "obsl" + ) -> coordinates.CoordinateTransform: + """Returns the coordinate transformation from the requested geometry dataframe + to the scene. + + Args: + key: The name of the geometry dataframe. + subcollection: The name, or sequence of names, of the subcollection the + dataframe is stored in. Defaults to ``'obsl'``. + + Returns: + Coordinate transform from the dataframe to the scene. + + Lifecycle: experimental + """ + raise NotImplementedError() + + @abc.abstractmethod + def get_transform_from_multiscale_image( + self, + key: str, + *, + subcollection: str = "img", + level: Optional[Union[str, int]] = None, + ) -> coordinates.CoordinateTransform: + """Returns the coordinate transformation from the requested multiscale image to + the scene. + + Args: + key: The name of the multiscale image. + subcollection: The name, or sequence of names, of the subcollection the + dataframe is stored in. Defaults to ``'img'``. + level: The level of the image to get the transformation from. + Defaults to ``None`` -- the transformation will be to the reference + level. + + Returns: + Coordinate transform from the multiscale image to the scene. + + Lifecycle: experimental + """ + raise NotImplementedError() + + @abc.abstractmethod + def get_transform_from_point_cloud( + self, key: str, *, subcollection: str = "obsl" + ) -> coordinates.CoordinateTransform: + """Returns the coordinate transformation from the requested point cloud to + the scene. + + Args: + key: The name of the point cloud. + subcollection: The name, or sequence of names, of the subcollection the + point cloud is stored in. Defaults to ``'obsl'``. + + Returns: + Coordinate transform from the scene to the point cloud. + + Lifecycle: experimental + """ + raise NotImplementedError() + + @abc.abstractmethod + def get_transform_to_geometry_dataframe( + self, key: str, *, subcollection: Union[str, Sequence[str]] = "obsl" + ) -> coordinates.CoordinateTransform: + """Returns the coordinate transformation from the scene to a requested + geometery dataframe. + + Args: + key: The name of the geometry dataframe. + subcollection: The name, or sequence of names, of the subcollection the + dataframe is stored in. Defaults to ``'obsl'``. + + Returns: + Coordinate transform from the scene to the requested dataframe. + + Lifecycle: experimental + """ + raise NotImplementedError() + + @abc.abstractmethod + def get_transform_to_multiscale_image( + self, + key: str, + *, + subcollection: str = "img", + level: Optional[Union[str, int]] = None, + ) -> coordinates.CoordinateTransform: + """Returns the coordinate transformation from the scene to a requested + multiscale image. + + Args: + key: The name of the multiscale image. + subcollection: The name, or sequence of names, of the subcollection the + dataframe is stored in. Defaults to ``'img'``. + level: The level of the image to get the transformation to. + Defaults to ``None`` -- the transformation will be to the reference + level. + + Returns: + Coordinate transform from the scene to the requested multiscale image. + + Lifecycle: experimental + """ + raise NotImplementedError() + + @abc.abstractmethod + def get_transform_to_point_cloud( + self, key: str, *, subcollection: str = "obsl" + ) -> coordinates.CoordinateTransform: + """Returns the coordinate transformation from the scene to a requested + point cloud. + + Args: + key: The name of the point cloud. + subcollection: The name, or sequence of names, of the subcollection the + point cloud is stored in. Defaults to ``'obsl'``. + + Returns: + Coordinate transform from the scene to the requested point cloud. + + Lifecycle: experimental + """ + raise NotImplementedError() diff --git a/python-spec/testing/test_collection.py b/python-spec/testing/test_collection.py index e0b8569b..e997c6d2 100644 --- a/python-spec/testing/test_collection.py +++ b/python-spec/testing/test_collection.py @@ -40,3 +40,5 @@ def test_method_resolution_order(self): self.assertEqual("SOMAMeasurement", m.soma_type) exp = ephemeral.Experiment() self.assertEqual("SOMAExperiment", exp.soma_type) + scene = ephemeral.Scene() + self.assertEqual("SOMAScene", scene.soma_type)