diff --git a/python-spec/src/somacore/experiment.py b/python-spec/src/somacore/experiment.py index bb0d7c6..b570aa2 100644 --- a/python-spec/src/somacore/experiment.py +++ b/python-spec/src/somacore/experiment.py @@ -66,6 +66,17 @@ class Experiment( spatial = _mixin.item[_SceneColl]() # TODO: Discuss the name of this element. """A collection of named spatial scenes.""" + obs_spatial_presence = _mixin.item[_DF]() + """A dataframe that stores the presence of obs in the spatial scenes. + + This provides a join table for the obs ``soma_joinid`` and the scene names used in + the ``spatial`` collection. This dataframe must contain index columns ``soma_joinid`` + and ``scene_id``. The ``scene_id`` column must have type ``string``. The + dataframe must contain a ``boolean`` column ``soma_data``. The values of ``soma_data`` are + ``True`` if the obs ``soma_joinid`` is contained in the scene + ``scene_id`` and ``False`` otherwise. + """ + def axis_query( self, measurement_name: str, diff --git a/python-spec/src/somacore/measurement.py b/python-spec/src/somacore/measurement.py index e7d0c9f..273b58f 100644 --- a/python-spec/src/somacore/measurement.py +++ b/python-spec/src/somacore/measurement.py @@ -99,3 +99,14 @@ class Measurement( This is indexed by ``[varid_1, varid_2]``. """ + + var_spatial_presence = _mixin.item[_DF]() + """A dataframe that stores the presence of var in the spatial scenes. + + This provides a join table for the var ``soma_joinid`` and the scene names used in + the ``spatial`` collection. This dataframe must contain index columns ``soma_joinid`` + and ``scene_id``. The ``scene_id`` column must have type ``string``. The + dataframe must contain a ``boolean`` column ``data``. The values of ``data`` are + ``True`` if the var with varid ``soma_joinid`` is contained in scene with name + ``scene_id`` and ``False`` otherwise. + """ diff --git a/python-spec/src/somacore/query/query.py b/python-spec/src/somacore/query/query.py index ac22792..947d8b0 100644 --- a/python-spec/src/somacore/query/query.py +++ b/python-spec/src/somacore/query/query.py @@ -21,6 +21,7 @@ import numpy.typing as npt import pandas as pd import pyarrow as pa +import pyarrow.compute as pacomp from scipy import sparse from typing_extensions import Literal, Protocol, Self, TypedDict @@ -267,6 +268,48 @@ def varm(self, layer: str) -> data.SparseRead: """ return self._axism_inner(_Axis.VAR, layer) + def obs_scene_ids(self) -> pa.Array: + """Returns a pyarrow array with scene ids that contain obs from this + query. + + Lifecycle: experimental + """ + try: + obs_scene = self.experiment.obs_spatial_presence + except KeyError as ke: + raise KeyError("Missing obs_scene") from ke + if not isinstance(obs_scene, data.DataFrame): + raise TypeError("obs_scene must be a dataframe.") + + full_table = obs_scene.read( + coords=((_Axis.OBS.getattr_from(self._joinids), slice(None))), + result_order=options.ResultOrder.COLUMN_MAJOR, + value_filter="data != 0", + ).concat() + + return pacomp.unique(full_table["scene_id"]) + + def var_scene_ids(self) -> pa.Array: + """Return a pyarrow array with scene ids that contain var from this + query. + + Lifecycle: experimental + """ + try: + var_scene = self._ms.var_spatial_presence + except KeyError as ke: + raise KeyError("Missing var_scene") from ke + if not isinstance(var_scene, data.DataFrame): + raise TypeError("var_scene must be a dataframe.") + + full_table = var_scene.read( + coords=((_Axis.OBS.getattr_from(self._joinids), slice(None))), + result_order=options.ResultOrder.COLUMN_MAJOR, + value_filter="data != 0", + ).concat() + + return pacomp.unique(full_table["scene_id"]) + def to_anndata( self, X_name: str, @@ -826,6 +869,9 @@ def obs(self) -> data.DataFrame: ... @property def context(self) -> Optional[base_types.ContextBase]: ... + @property + def obs_spatial_presence(self) -> data.DataFrame: ... + class _HasObsVar(Protocol[_T_co]): """Something which has an ``obs`` and ``var`` field.