Skip to content

Commit

Permalink
Make () the default value for AxisQuery.coords. (#136)
Browse files Browse the repository at this point in the history
Since `coords` values are no longer `Optional` (and the `(slice(None),)`
workaround is no longer needed), this updates the declared API for
AxisQuery to also not be `Optional` (i.e., to request everything,
the `()` should be used).
  • Loading branch information
thetorpedodog authored Feb 21, 2023
1 parent 9fc37ab commit 6b414d7
Show file tree
Hide file tree
Showing 3 changed files with 18 additions and 15 deletions.
4 changes: 3 additions & 1 deletion python-spec/src/somacore/options.py
Original file line number Diff line number Diff line change
Expand Up @@ -120,7 +120,9 @@ class ResultOrder(enum.Enum):
DenseNDCoords = Sequence[DenseCoord]
"""A sequence of ranges to read dense data."""

# TODO: Add support for types other than int/string
# TODO: Add support for non-integer types.
# NOTE: Keep this in sync with the types accepted in `_canonicalize_coord`
# in ./query/axis.py.
# https://github.com/single-cell-data/TileDB-SOMA/issues/960
SparseDFCoord = Union[
DenseCoord,
Expand Down
28 changes: 14 additions & 14 deletions python-spec/src/somacore/query/axis.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,30 +9,31 @@


def _canonicalize_coords(
in_coords: Optional[options.SparseDFCoords],
in_coords: options.SparseDFCoords,
) -> Tuple[options.SparseDFCoord, ...]:
"""Validates coordinates and freezes sequences as tuples.
This is not strictly necessary; DataFrame will report these errors
eventually but doing it now makes for better UX.
"""
if in_coords is None:
return (slice(None),)
if not isinstance(in_coords, Sequence):
if types.is_nonstringy_sequence(in_coords):
return tuple(_canonicalize_coord(c) for c in in_coords)
if isinstance(in_coords, (str, bytes)):
raise TypeError(
f"query coordinates must be a sequence, not a single {type(in_coords)}"
"Query coordinates must be a normal sequence, not `str` or `bytes`."
f" Did you mean {(in_coords,)}?"
)
if not types.is_nonstringy_sequence(in_coords):
raise TypeError(
"query coordinates must be a normal sequence, not `str` or `bytes`."
)
return tuple(_canonicalize_coord(c) for c in in_coords)
raise TypeError(
f"Query coordinates must be a sequence, not a single {type(in_coords)}"
)


def _canonicalize_coord(coord: options.SparseDFCoord) -> options.SparseDFCoord:
"""Validates a single coordinate, freezing mutable sequences."""
# NOTE: Keep this in sync with the `SparseDFCoord` type.
if coord is None or isinstance(
coord, (bytes, int, slice, str, pa.Array, pa.ChunkedArray, np.ndarray)
coord,
(bytes, int, slice, str, pa.Array, pa.ChunkedArray, np.ndarray),
):
return coord
if isinstance(coord, Sequence):
Expand All @@ -59,13 +60,12 @@ class AxisQuery:
Examples::
AxisQuery() # all data
AxisQuery(coords=None) # also all data
AxisQuery(coords=()) # also all data
AxisQuery(coords=(slice(1,10),)) # 1D, slice
AxisQuery(coords=([0,1,2])) # 1D, point indexing using array-like
AxisQuery(coords=(slice(None), numpy.array([0,88,1001]))) # 2D
AxisQuery(value_filter="tissue == 'lung'")
AxisQuery(coords=(slice(1,None),), value_filter="tissue == 'lung'")
```
"""

value_filter: Optional[str] = attrs.field(
Expand All @@ -74,7 +74,7 @@ class AxisQuery:
)
"""A string specifying a SOMA ``value_filter``."""
coords: Tuple[options.SparseDFCoord, ...] = attrs.field(
default=(slice(None),),
default=(),
converter=_canonicalize_coords,
)
"""Query (slice) by dimension.
Expand Down
1 change: 1 addition & 0 deletions python-spec/testing/test_query_axis.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@
@mark.parametrize(
["coords", "want"],
[
((), ()),
((slice(1, 10),), (slice(1, 10),)),
([0, 1, 2], (0, 1, 2)),
((slice(None), [0, 88, 1001]), (slice(None), (0, 88, 1001))),
Expand Down

0 comments on commit 6b414d7

Please sign in to comment.