Skip to content

Commit

Permalink
Rework and enhance type hierarchy and generics. (#115)
Browse files Browse the repository at this point in the history
- Pulls basic collection impl into BaseCollection, allowing Collection
  to add the semantics of "no semantics". This mirrors the
  implementation in tiledbsoma.
- Makes Measurement and Experiment inherit from BaseCollection.
  Previous problems with this were due to missing `__slots__`.
- Adds generic parameters to Measurement and Experiment that allow
  implementations to specify the exact types they provide, saving a lot
  of `cast`ing down the road.
- Renames lots of TypeVars to be clearer about their purpose.
- Adds overloads to `add_new_collection` for better type inference.
- Tightens `_Experimentish` to only expect read accessors, not writers.

While these new changes add a bunch of generic slots to the base
collection types and experiments and measurements, the experience from
the perspective of a SOMA library user will be roughly the same. That is
to say, it's a little scary here, but the end user will still see
`theimpl.Collection[ElementType]`. Type inference when using composed
objects is better as well:

    some_exp = theimpl.Experiment.open(...)

    obs = some_exp.obs
    reveal_type(obs)
    # BEFORE: somacore.DataFrame
    #         (i.e., the type system doesn't know what implementation
    #         of the abstract DataFrame this is; it only knows about
    #         the bare minimum DataFrame properties)
    # AFTER:  theimpl.DataFrame

    ms = some_exp.ms
    reveal_type(ms)
    # BEFORE: somacore.Collection[somacore.Measurement]
    # AFTER:  theimpl.Collection[theimpl.Measurement]

    some_meas = ms["whatever"]
    reveal_type(ms)
    # BEFORE: somacore.Measurement
    # AFTER:  theimpl.Measurement

    some_meas.X
    reveal_type(ms)
    # BEFORE: somacore.Collection[somacore.NDArray]
    # AFTER:  theimpl.Collection[theimpl.NDArray]

There is no change at runtime; the actual types of the objects remain
the same, but autocompletion, type checking, and other tooling has a
*much* better idea of what is going on.
  • Loading branch information
thetorpedodog authored Feb 3, 2023
1 parent 81f4eb4 commit 4aab19f
Show file tree
Hide file tree
Showing 5 changed files with 166 additions and 65 deletions.
59 changes: 49 additions & 10 deletions python-spec/src/somacore/collection.py
Original file line number Diff line number Diff line change
@@ -1,19 +1,22 @@
import abc
from typing import Any, MutableMapping, Optional, Sequence, Type, TypeVar
from typing import Any, MutableMapping, Optional, Sequence, Type, TypeVar, overload

import pyarrow as pa
from typing_extensions import Final

from . import base
from . import data
from . import options

_ST = TypeVar("_ST", bound=base.SOMAObject)
"""Generic type variable for any SOMA object."""
_CT = TypeVar("_CT", bound="Collection")
_Elem = TypeVar("_Elem", bound=base.SOMAObject)
"""Element Type for a SOMA collection."""
_CT = TypeVar("_CT", bound="BaseCollection")
"""Any implementation of a Collection."""


class Collection(base.SOMAObject, MutableMapping[str, _ST], metaclass=abc.ABCMeta):
class BaseCollection(
base.SOMAObject, MutableMapping[str, _Elem], metaclass=abc.ABCMeta
):
"""A generic string-keyed collection of :class:`SOMAObject`s.
The generic type specifies what type the Collection may contain. At its
Expand All @@ -22,7 +25,6 @@ class Collection(base.SOMAObject, MutableMapping[str, _ST], metaclass=abc.ABCMet
"""

__slots__ = ()
soma_type = "SOMACollection"

@classmethod
@abc.abstractmethod
Expand All @@ -39,15 +41,45 @@ def create(
"""
raise NotImplementedError()

# Overloads to allow type inference to work when doing:
#
# some_coll.add_new_collection("key") # -> Collection
# and
# some_coll.add_new_collection("key", Experiment) # -> Experiment

@overload
@abc.abstractmethod
def add_new_collection(
self,
key: str,
cls: None = None,
*,
uri: Optional[str] = ...,
platform_config: Optional[options.PlatformConfig] = ...,
) -> "Collection":
...

@overload
@abc.abstractmethod
def add_new_collection(
self,
key: str,
cls: Type[_CT],
*,
uri: Optional[str] = ...,
platform_config: Optional[options.PlatformConfig] = ...,
) -> _CT:
...

@abc.abstractmethod
def add_new_collection(
self,
key: str,
cls: Optional[Type[_CT]] = None,
cls: Optional[Type["BaseCollection"]] = None,
*,
uri: Optional[str] = None,
platform_config: Optional[options.PlatformConfig] = None,
) -> _CT:
) -> "BaseCollection":
"""Creates a new sub-collection of this collection.
To add an existing collection as a sub-element of this collection,
Expand Down Expand Up @@ -139,13 +171,13 @@ def add_new_sparse_ndarray(
"""
raise NotImplementedError()

def __setitem__(self, key: str, value: _ST) -> None:
def __setitem__(self, key: str, value: _Elem) -> None:
"""Sets an entry into this collection. See :meth:`set` for details."""
self.set(key, value)

@abc.abstractmethod
def set(
self, key: str, value: _ST, *, use_relative_uri: Optional[bool] = None
self, key: str, value: _Elem, *, use_relative_uri: Optional[bool] = None
) -> None:
"""Sets an entry of this collection.
Expand All @@ -171,3 +203,10 @@ def set(
If ``False``, will always use an absolute URI.
"""
raise NotImplementedError()


class Collection(BaseCollection[_Elem]):
"""SOMA Collection imposing no semantics on the contained values."""

soma_type: Final = "SOMACollection" # type: ignore[misc]
__slots__ = ()
51 changes: 41 additions & 10 deletions python-spec/src/somacore/ephemeral/collections.py
Original file line number Diff line number Diff line change
@@ -1,14 +1,16 @@
from typing import Any, Dict, Iterator, NoReturn, Optional, TypeVar
from typing import Any, Dict, Iterator, NoReturn, Optional, Type, TypeVar

from .. import base
from .. import collection
from .. import data
from .. import experiment
from .. import measurement

_ST = TypeVar("_ST", bound=base.SOMAObject)
_Elem = TypeVar("_Elem", bound=base.SOMAObject)
_Self = TypeVar("_Self")


class Collection(collection.Collection[_ST]):
class BaseCollection(collection.BaseCollection[_Elem]):
"""A memory-backed SOMA Collection for ad-hoc collection building.
This Collection implementation exists purely in memory. It can be used to
Expand All @@ -21,12 +23,14 @@ class Collection(collection.Collection[_ST]):
collection has no ``context`` and ``close``ing it does nothing.
"""

def __init__(self, *args: Any, **kwargs: _ST):
__slots__ = ("_entries", "_metadata")

def __init__(self, *args: Any, **kwargs: _Elem):
"""Creates a new Collection.
Arguments and kwargs are provided as in the ``dict`` constructor.
"""
self._entries: Dict[str, _ST] = dict(*args, **kwargs)
self._entries: Dict[str, _Elem] = dict(*args, **kwargs)
self._metadata: Dict[str, Any] = {}

@property
Expand All @@ -45,7 +49,7 @@ def open(cls, *args, **kwargs) -> NoReturn:
)

@classmethod
def create(cls, *args, **kwargs) -> "Collection":
def create(cls: Type[_Self], *args, **kwargs) -> _Self:
del args, kwargs # All unused
# ThisCollection is in-memory only, so just return a new empty one.
return cls()
Expand All @@ -64,12 +68,12 @@ def add_new_collection(self, *args, **kwargs) -> NoReturn:
add_new_dense_ndarray = add_new_collection

def set(
self, key: str, value: _ST, *, use_relative_uri: Optional[bool] = None
self, key: str, value: _Elem, *, use_relative_uri: Optional[bool] = None
) -> None:
del use_relative_uri # Ignored.
self._entries[key] = value

def __getitem__(self, key: str) -> _ST:
def __getitem__(self, key: str) -> _Elem:
return self._entries[key]

def __delitem__(self, key: str) -> None:
Expand All @@ -82,9 +86,36 @@ def __len__(self) -> int:
return len(self._entries)


class Measurement(measurement.Measurement, Collection): # type: ignore[misc]
class Collection(BaseCollection[_Elem], collection.Collection):
"""An in-memory Collection imposing no semantics on the contents."""

__slots__ = ()


_BasicAbstractMeasurement = measurement.Measurement[
data.DataFrame,
collection.Collection[data.NDArray],
collection.Collection[data.DenseNDArray],
collection.Collection[data.SparseNDArray],
base.SOMAObject,
]
"""The loosest possible constraint of the abstract Measurement type."""


class Measurement(BaseCollection[base.SOMAObject], _BasicAbstractMeasurement):
"""An in-memory Collection with Measurement semantics."""

__slots__ = ()

class Experiment(experiment.Experiment, Collection): # type: ignore[misc]

class Experiment(
BaseCollection[base.SOMAObject],
experiment.Experiment[
data.DataFrame,
collection.Collection[_BasicAbstractMeasurement],
base.SOMAObject,
],
):
"""An in-memory Collection with Experiment semantics."""

__slots__ = ()
43 changes: 25 additions & 18 deletions python-spec/src/somacore/experiment.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
from typing import MutableMapping, Optional, TypeVar
from typing import Generic, Optional, TypeVar

from typing_extensions import Final

Expand All @@ -10,38 +10,45 @@
from . import query

_Self = TypeVar("_Self", bound="Experiment")
_ST = TypeVar("_ST", bound=base.SOMAObject)
"""A self type."""
_DF = TypeVar("_DF", bound=data.DataFrame)
"""An implementation of a DataFrame."""
_MeasColl = TypeVar("_MeasColl", bound=collection.Collection[measurement.Measurement])
"""An implementation of a collection of Measurements."""
_RootSO = TypeVar("_RootSO", bound=base.SOMAObject)
"""The root SOMA object type of the implementation."""


class Experiment(MutableMapping[str, _ST]):
class Experiment(collection.BaseCollection[_RootSO], Generic[_DF, _MeasColl, _RootSO]):
"""Mixin for Experiment types."""

# This class is implemented as a mixin to be used with SOMA classes:
# This class is implemented as a mixin to be used with SOMA classes.
# For example, a SOMA implementation would look like this:
#
# # in a SOMA implementation:
# class Experiment(somacore.Experiment, ImplsBaseCollection):
# pass
#
# Experiment should always appear *first* in the base class list.
# MutableMapping is listed as the parent type instead of Collection here
# to avoid the interpreter being unable to pick the right base class:
#
# TypeError: multiple bases have instance lay-out conflict
# # This type-ignore comment will always be needed due to limitations
# # of type annotations; it is (currently) expected.
# class Experiment( # type: ignore[type-var]
# ImplBaseCollection[ImplSOMAObject],
# somacore.Experiment[
# ImplDataFrame, # _DF
# ImplMeasurement, # _MeasColl
# ImplSOMAObject, # _RootSO
# ],
# ):
# ...

__slots__ = ()
soma_type: Final = "SOMAExperiment"
soma_type: Final = "SOMAExperiment" # type: ignore[misc]

obs = _mixin.item(data.DataFrame)
obs = _mixin.item[_DF]()
"""Primary observations on the observation axis.
The contents of the ``soma_joinid`` pseudo-column define the observation
index domain, i.e. ``obsid``. All observations for the experiment must be
defined here.
"""

ms = _mixin.item(
collection.Collection[measurement.Measurement] # type: ignore[type-var]
)
ms = _mixin.item[_MeasColl]()
"""A collection of named measurements."""

def axis_query(
Expand Down
63 changes: 41 additions & 22 deletions python-spec/src/somacore/measurement.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
"""Implementations of the composed SOMA data types."""

from typing import MutableMapping, TypeVar
from typing import Generic, TypeVar

from typing_extensions import Final

Expand All @@ -9,61 +9,80 @@
from . import collection
from . import data

_ST = TypeVar("_ST", bound=base.SOMAObject)


class Measurement(MutableMapping[str, _ST]):
_DF = TypeVar("_DF", bound=data.DataFrame)
"""A particular implementation of DataFrame."""
_NDColl = TypeVar("_NDColl", bound=collection.Collection[data.NDArray])
"""A particular implementation of a collection of NDArrays."""
_DenseNDColl = TypeVar("_DenseNDColl", bound=collection.Collection[data.DenseNDArray])
"""A particular implementation of a collection of DenseNDArrays."""
_SparseNDColl = TypeVar(
"_SparseNDColl", bound=collection.Collection[data.SparseNDArray]
)
"""A particular implementation of a collection of SparseNDArrays."""
_RootSO = TypeVar("_RootSO", bound=base.SOMAObject)
"""The root SomaObject type of the implementation."""


class Measurement(
collection.BaseCollection[_RootSO],
Generic[_DF, _NDColl, _DenseNDColl, _SparseNDColl, _RootSO],
):
"""A set of observations defined by a DataFrame, with measurements."""

# This class is implemented as a mixin to be used with SOMA classes:
#
# # in a SOMA implementation:
# class Measurement(somacore.Measurement, ImplBaseCollection):
# pass
#
# Measurement should always appear *first* in the base class list.
# MutableMapping is listed as the parent type instead of Collection here
# to avoid the interpreter being unable to pick the right base class:
# This class is implemented as a mixin to be used with SOMA classes.
# For example, a SOMA implementation would look like this:
#
# TypeError: multiple bases have instance lay-out conflict
# # This type-ignore comment will always be needed due to limitations
# # of type annotations; it is (currently) expected.
# class Measurement( # type: ignore[type-var]
# ImplBaseCollection[ImplSOMAObject],
# somacore.Measurement[
# ImplDataFrame, # _DF
# ImplCollection[ImplNDArray], # _NDColl
# ImplCollection[ImplDenseNDArray], # _DenseNDColl
# ImplCollection[ImplSparseNDArray], # _SparseNDColl
# ImplSOMAObject, # _RootSO
# ],
# ):
# ...

__slots__ = ()
soma_type: Final = "SOMAMeasurement"
soma_type: Final = "SOMAMeasurement" # type: ignore[misc]

var = _mixin.item(data.DataFrame)
var = _mixin.item[_DF]()
"""Primary annotations on the variable axis for vars on this meansurement.
This annotates _columns_ of the ``X`` arrays. The contents of the
``soma_joinid`` pseudo-column define the variable index domain (``varid``)
All variables for this measurement _must_ be defined in this dataframe.
"""

X = _mixin.item(collection.Collection[data.NDArray])
X = _mixin.item[_NDColl]()
"""A collection of matrices containing feature values.
Each matrix is indexed by ``[obsid, varid]``. Sparse and dense 2D arrays may
both be used in any combination in ``X``.
"""

obsm = _mixin.item(collection.Collection[data.DenseNDArray])
obsm = _mixin.item[_DenseNDColl]()
"""Matrices containing annotations of each ``obs`` row.
This has the same shape as ``obs`` and is indexed with ``obsid``.
"""

obsp = _mixin.item(collection.Collection[data.SparseNDArray])
obsp = _mixin.item[_SparseNDColl]()
"""Matrices containg pairwise annotations of each ``obs`` row.
This is indexed by ``[obsid_1, obsid_2]``.
"""

varm = _mixin.item(collection.Collection[data.DenseNDArray])
varm = _mixin.item[_DenseNDColl]()
"""Matrices containing annotations of each ``var`` row.
This has the same shape as ``var`` and is indexed with ``varid``.
"""

varp = _mixin.item(collection.Collection[data.SparseNDArray])
varp = _mixin.item[_SparseNDColl]()
"""Matrices containg pairwise annotations of each ``var`` row.
This is indexed by ``[varid_1, varid_2]``.
Expand Down
Loading

0 comments on commit 4aab19f

Please sign in to comment.