refactor: Add CompliantSeries.from_numpy#2196
Conversation
All other backends already had these
- Less repetition, but also helps document what the 2nd `TypeVar` is for (`from_`) - It has to be in that position to follow the rules of https://typing.python.org/en/latest/spec/generics.html#default-ordering-and-subscription-rules
- We've already got the compat handled there - `polars` handles the rest in https://github.com/pola-rs/polars/blob/889a2a7a57be5da432b6fa854ab698bbaf1b02ff/py-polars/polars/series/series.py#L1357-L1399
| if isinstance(arg, Series): | ||
| return arg._compliant_series._to_expr() | ||
| if isinstance(arg, Expr): | ||
| return arg._to_compliant_expr(self.__narwhals_namespace__()) | ||
| return arg._to_compliant_expr(self.__narwhals_namespace__()) # comment | ||
| if isinstance(arg, str): | ||
| return plx.col(arg) | ||
| if get_polars() is not None and "polars" in str(type(arg)): # pragma: no cover |
There was a problem hiding this comment.
I think that starting from here we should be handing off entirely to a CompliantNamespace method.
In addition to simplifying this part - we could reuse that again instead of:
narwhals/narwhals/_expression_parsing.py
Lines 93 to 107 in 7611bd4
Then everywhere we currently do:
from narwhals._expression_parsing import extract_compliant
plx: CompliantNamespace
extract_compliant(plx, ...)Would be something like this:
plx: CompliantNamespace
plx._extract_compliant(...)There was a problem hiding this comment.
I had some notes on this locally:
from __future__ import annotations
from typing import Any, Protocol
class ParseCompliant(Protocol):
"""Somewhat of an extended [polars._utils.parse.parse_into_expression]
Covers cases that are similar, but the latter is narrower:
- `nw.dataframe.BaseFrame._extract_compliant`
- `nw._expression_parsing.extract_compliant`
General
- Most usage requires a ref to `__narwhals_namespace__`
- Series/BaseFrame can convert internally
[polars._utils.parse.parse_into_expression]: https://github.com/pola-rs/polars/blob/9092a0e90005aa98077217f01e725ac4f386a335/py-polars/polars/_utils/parse/expr.py#L20-L63
""" # noqa: D415
def _parse_compliant(self, arg: Any, /) -> Any: ...| @classmethod | ||
| def from_numpy(cls, data: Into1DArray, /, *, context: _FullContext) -> Self: ... |
There was a problem hiding this comment.
Linking this back to nw.functions.new_series, we might wanna have this as:
@classmethod
def from_numpy(
cls,
data: Into1DArray,
/,
*,
context: _FullContext,
name: str = "",
dtype: DType | type[DType] | None = None,
) -> Self: ...narwhals/narwhals/functions.py
Lines 188 to 194 in 7611bd4
| else: # pragma: no cover | ||
| import sys | ||
| from importlib.util import find_spec | ||
|
|
||
| if sys.version_info >= (3, 13): | ||
| from typing import TypeVar | ||
| elif find_spec("typing_extensions"): | ||
| from typing_extensions import TypeVar | ||
| else: | ||
| from typing import TypeVar as _TypeVar | ||
|
|
||
| def TypeVar( # noqa: ANN202, N802 | ||
| name: str, | ||
| *constraints: Any, | ||
| bound: Any | None = None, | ||
| covariant: bool = False, | ||
| contravariant: bool = False, | ||
| **kwds: Any, # noqa: ARG001 | ||
| ): | ||
| return _TypeVar( | ||
| name, | ||
| *constraints, | ||
| bound=bound, | ||
| covariant=covariant, | ||
| contravariant=contravariant, | ||
| ) |
There was a problem hiding this comment.
This is a trick to get TypeVar defaults - but in a move reusable way than (#2110 (comment))
I've used them heavily when testing out the .(to|from_) protocols across the rest of the API
Peek
# NOTE: `nw.dataframe.DataFrame`
class NarwhalsDataFrame(
ArrowConvertible[_ArrowTable, IntoArrowTable],
PandasConvertible[_PandasDataFrame],
PolarsConvertible[_PolarsDataFrame],
NumpyConvertible[_2DArray],
NarwhalsDictCovertible,
NarwhalsNativeConvertible[NativeDataFrameT],
CompliantConvertible["CompliantDataFrame_[NativeDataFrameT, CompliantSeries_T]"],
ParseCompliant,
Generic[NativeDataFrameT, CompliantSeries_T],
):| class NumpyConvertible( | ||
| ToNumpy[ToNumpyT_co], | ||
| FromNumpy[FromNumpyDT_contra], | ||
| Protocol[ToNumpyT_co, FromNumpyDT_contra], | ||
| ): | ||
| def to_numpy(self, dtype: Any, *, copy: bool | None) -> ToNumpyT_co: ... |
There was a problem hiding this comment.
In relation to (https://github.com/narwhals-dev/narwhals/pull/2196/files#r1993954699) - we could then do stuff like this which uses the same TypeVar twice:
from narwhals._translate import NumpyConvertible
from narwhals.typing import _2DArray
NumpyConvertible[_2DArray]There was a problem hiding this comment.
I think that's closer to what we'd want on CompliantDataFrame and https://narwhals-dev.github.io/narwhals/api-reference/narwhals/#narwhals.from_numpy
- Only needs to be the extra stuff - `_create_compliant_series` is removed in #2196
| @classmethod | ||
| def from_numpy(cls, data: Into1DArray, /, *, context: _FullContext) -> Self: | ||
| return cls( | ||
| pl.Series(data if is_numpy_array_1d(data) else [data]), |
There was a problem hiding this comment.
not sure I follow this condition, where does it come from in the current code?
There was a problem hiding this comment.
Ah yeah this might look strange in PolarsSeries.
It seems there isn't an equivalent of ._from_scalar(value=...)
So it is mainly to match the logic of the other backends:
narwhals/narwhals/_arrow/series.py
Lines 141 to 165 in 6a5ed1d
narwhals/narwhals/_compliant/series.py
Lines 57 to 63 in 6a5ed1d
narwhals/narwhals/_pandas_like/series.py
Lines 175 to 205 in 6a5ed1d
There was a problem hiding this comment.
I imagine we'd probably end up having CompliantSeries with @classmethod's like:
CompliantSeries.from_numpy
CompliantSeries.from_iterable
CompliantSeries.from_scalar
# Maybe more that aren't relevant hereWhere they might have overlapping and/or default implementations higher up in the protocol.
E.g. (5d609a7)
There was a problem hiding this comment.
ok, i see from the type hint that it's clear actually, we only get here with numpy scalars or numpy 1d arrays
There was a problem hiding this comment.
|
@MarcoGorelli this would be the start of putting it all together.
Full diff
diff --git a/narwhals/_compliant/dataframe.py b/narwhals/_compliant/dataframe.py
index ed1d83b9..c31f2c5b 100644
--- a/narwhals/_compliant/dataframe.py
+++ b/narwhals/_compliant/dataframe.py
@@ -12,6 +12,7 @@ from typing import TypeVar
from narwhals._compliant.typing import CompliantSeriesT_co
from narwhals._compliant.typing import EagerSeriesT
from narwhals._expression_parsing import evaluate_output_names_and_aliases
+from narwhals._translate import NumpyConvertible
if TYPE_CHECKING:
from typing_extensions import Self
@@ -19,13 +20,14 @@ if TYPE_CHECKING:
from narwhals._compliant.expr import EagerExpr
from narwhals.dtypes import DType
+ from narwhals.typing import _2DArray # noqa: F401
__all__ = ["CompliantDataFrame", "CompliantLazyFrame", "EagerDataFrame"]
T = TypeVar("T")
-class CompliantDataFrame(Protocol[CompliantSeriesT_co]):
+class CompliantDataFrame(NumpyConvertible["_2DArray"], Protocol[CompliantSeriesT_co]):
def __narwhals_dataframe__(self) -> Self: ...
def __narwhals_namespace__(self) -> Any: ...
def simple_select(
diff --git a/narwhals/_compliant/namespace.py b/narwhals/_compliant/namespace.py
index f5449ec4..338306b6 100644
--- a/narwhals/_compliant/namespace.py
+++ b/narwhals/_compliant/namespace.py
@@ -6,13 +6,17 @@ from typing import Any
from typing import Container
from typing import Iterable
from typing import Literal
+from typing import Mapping
from typing import Protocol
+from typing import Sequence
+from typing import overload
from narwhals._compliant.typing import CompliantExprT
from narwhals._compliant.typing import CompliantFrameT
from narwhals._compliant.typing import EagerDataFrameT
from narwhals._compliant.typing import EagerExprT
from narwhals._compliant.typing import EagerSeriesT_co
+from narwhals.dependencies import is_numpy_array_2d
from narwhals.utils import exclude_column_names
from narwhals.utils import get_column_names
from narwhals.utils import passthrough_column_names
@@ -20,6 +24,9 @@ from narwhals.utils import passthrough_column_names
if TYPE_CHECKING:
from narwhals._compliant.selectors import CompliantSelectorNamespace
from narwhals.dtypes import DType
+ from narwhals.schema import Schema
+ from narwhals.typing import Into1DArray
+ from narwhals.typing import _2DArray
from narwhals.utils import Implementation
from narwhals.utils import Version
@@ -84,3 +91,29 @@ class EagerNamespace(
):
@property
def _series(self) -> type[EagerSeriesT_co]: ...
+ @property
+ def _dataframe(self) -> type[EagerDataFrameT]: ...
+
+ @overload
+ def from_numpy(
+ self,
+ data: Into1DArray,
+ /,
+ schema: None = ...,
+ ) -> EagerSeriesT_co: ...
+ @overload
+ def from_numpy(
+ self,
+ data: _2DArray,
+ /,
+ schema: Mapping[str, DType] | Schema | Sequence[str],
+ ) -> EagerDataFrameT: ...
+ def from_numpy(
+ self,
+ data: Into1DArray | _2DArray,
+ /,
+ schema: Mapping[str, DType] | Schema | Sequence[str] | None = None,
+ ) -> EagerSeriesT_co | EagerDataFrameT:
+ if is_numpy_array_2d(data):
+ return self._dataframe.from_numpy(data, schema=schema)
+ return self._series.from_numpy(data, context=self)
Just the fun stuffclass EagerNamespace(
CompliantNamespace[EagerDataFrameT, EagerExprT],
Protocol[EagerDataFrameT, EagerSeriesT_co, EagerExprT],
):
@property
def _series(self) -> type[EagerSeriesT_co]: ...
@property
def _dataframe(self) -> type[EagerDataFrameT]: ...
def from_numpy( # <--------------- regular method for `Namespace` only
self,
data: Into1DArray | _2DArray,
/,
schema: Mapping[str, DType] | Schema | Sequence[str] | None = None,
) -> EagerSeriesT_co | EagerDataFrameT:
if is_numpy_array_2d(data):
return self._dataframe.from_numpy(data, schema=schema)
return self._series.from_numpy(data, context=self)I really think we can get some mileage out of this pattern |
|
cool thanks! merge when ready |

What type of PR is this? (check all applicable)
Related issues
_compliantsub-package #2149 (comment)_compliantsub-package #2149 (comment)Checklist
If you have comments or can explain your changes, please do so below
from_numpynw._translate.pywould be the home for similar protocols