From 4c22a90fbd4e09c077c71904e9255614a889e7b7 Mon Sep 17 00:00:00 2001 From: dangotbanned <125183946+dangotbanned@users.noreply.github.com> Date: Mon, 4 Aug 2025 20:01:30 +0000 Subject: [PATCH 01/19] fix everything except `join`, `v1`, `v2` - 76 errors in `tests/frame/join_test.py` - stable errors are expected, as they aren't using updated typing yet --- narwhals/_compliant/dataframe.py | 17 +++++++++++------ narwhals/_compliant/typing.py | 2 ++ narwhals/_namespace.py | 8 +++----- narwhals/functions.py | 21 +++++++++++++-------- narwhals/translate.py | 13 +++++++++---- narwhals/typing.py | 4 +++- tests/conftest.py | 14 +++++++------- tests/utils.py | 6 +++--- 8 files changed, 51 insertions(+), 34 deletions(-) diff --git a/narwhals/_compliant/dataframe.py b/narwhals/_compliant/dataframe.py index ba47d7b824..2e8345d5f3 100644 --- a/narwhals/_compliant/dataframe.py +++ b/narwhals/_compliant/dataframe.py @@ -11,6 +11,7 @@ CompliantSeriesT, EagerExprT, EagerSeriesT, + NativeDataFrameT, NativeFrameT, NativeSeriesT, ) @@ -353,10 +354,12 @@ def with_row_index(self, name: str, order_by: Sequence[str]) -> Self: ... class EagerDataFrame( - CompliantDataFrame[EagerSeriesT, EagerExprT, NativeFrameT, "DataFrame[NativeFrameT]"], - CompliantLazyFrame[EagerExprT, NativeFrameT, "DataFrame[NativeFrameT]"], + CompliantDataFrame[ + EagerSeriesT, EagerExprT, NativeDataFrameT, "DataFrame[NativeDataFrameT]" + ], + CompliantLazyFrame[EagerExprT, NativeDataFrameT, "DataFrame[NativeDataFrameT]"], ValidateBackendVersion, - Protocol[EagerSeriesT, EagerExprT, NativeFrameT, NativeSeriesT], + Protocol[EagerSeriesT, EagerExprT, NativeDataFrameT, NativeSeriesT], ): @property def _backend_version(self) -> tuple[int, ...]: @@ -364,13 +367,15 @@ def _backend_version(self) -> tuple[int, ...]: def __narwhals_namespace__( self, - ) -> EagerNamespace[Self, EagerSeriesT, EagerExprT, NativeFrameT, NativeSeriesT]: ... + ) -> EagerNamespace[ + Self, EagerSeriesT, EagerExprT, NativeDataFrameT, NativeSeriesT + ]: ... - def to_narwhals(self) -> DataFrame[NativeFrameT]: + def to_narwhals(self) -> DataFrame[NativeDataFrameT]: return self._version.dataframe(self, level="full") def _with_native( - self, df: NativeFrameT, *, validate_column_names: bool = True + self, df: NativeDataFrameT, *, validate_column_names: bool = True ) -> Self: ... def _check_columns_exist(self, subset: Sequence[str]) -> ColumnNotFoundError | None: diff --git a/narwhals/_compliant/typing.py b/narwhals/_compliant/typing.py index 0fb3c301e9..fd9bcc5546 100644 --- a/narwhals/_compliant/typing.py +++ b/narwhals/_compliant/typing.py @@ -23,6 +23,7 @@ from narwhals._compliant.window import WindowInputs from narwhals.typing import ( FillNullStrategy, + NativeDataFrame, NativeFrame, NativeSeries, RankMethod, @@ -90,6 +91,7 @@ class ScalarKwargs(TypedDict, total=False): NativeSeriesT_contra = TypeVar( "NativeSeriesT_contra", bound="NativeSeries", contravariant=True ) +NativeDataFrameT = TypeVar("NativeDataFrameT", bound="NativeDataFrame") NativeFrameT = TypeVar("NativeFrameT", bound="NativeFrame") NativeFrameT_co = TypeVar("NativeFrameT_co", bound="NativeFrame", covariant=True) NativeFrameT_contra = TypeVar( diff --git a/narwhals/_namespace.py b/narwhals/_namespace.py index 94dcca16d5..07b3cca3ce 100644 --- a/narwhals/_namespace.py +++ b/narwhals/_namespace.py @@ -50,7 +50,7 @@ from narwhals._polars.namespace import PolarsNamespace from narwhals._spark_like.dataframe import SQLFrameDataFrame from narwhals._spark_like.namespace import SparkLikeNamespace - from narwhals.typing import DataFrameLike, NativeFrame, NativeLazyFrame, NativeSeries + from narwhals.typing import NativeDataFrame, NativeLazyFrame, NativeSeries T = TypeVar("T") @@ -116,7 +116,7 @@ def rename(self, *args: Any, inplace: Literal[False], **kwds: Any) -> Self: """`inplace=False` is required to avoid (incorrect?) default overloads.""" ... - class _BasePandasLikeFrame(NativeFrame, _BasePandasLike, Protocol): ... + class _BasePandasLikeFrame(NativeDataFrame, _BasePandasLike, Protocol): ... class _BasePandasLikeSeries(NativeSeries, _BasePandasLike, Protocol): def where(self, cond: Any, other: Any = ..., **kwds: Any) -> Any: ... @@ -161,9 +161,7 @@ class _ModinSeries(_BasePandasLikeSeries, Protocol): ) NativeKnown: TypeAlias = "_NativePolars | _NativeArrow | _NativePandasLike | _NativeSparkLike | _NativeDuckDB | _NativeDask | _NativeIbis" - NativeUnknown: TypeAlias = ( - "NativeFrame | NativeSeries | NativeLazyFrame | DataFrameLike" - ) + NativeUnknown: TypeAlias = "NativeDataFrame | NativeSeries | NativeLazyFrame" NativeAny: TypeAlias = "NativeKnown | NativeUnknown" __all__ = ["Namespace"] diff --git a/narwhals/functions.py b/narwhals/functions.py index 5c0a462519..bc1120315c 100644 --- a/narwhals/functions.py +++ b/narwhals/functions.py @@ -42,6 +42,7 @@ from typing_extensions import TypeAlias, TypeIs from narwhals._compliant import CompliantExpr, CompliantNamespace + from narwhals._namespace import _NativeDuckDB, _NativeIbis from narwhals._translate import IntoArrowTable from narwhals.dataframe import DataFrame, LazyFrame from narwhals.dtypes import DType @@ -51,7 +52,7 @@ FrameT, IntoDType, IntoExpr, - NativeFrame, + NativeDataFrame, NativeLazyFrame, NativeSeries, NonNestedLiteral, @@ -301,7 +302,9 @@ def from_dict( try: # implementation is UNKNOWN, Narwhals extension using this feature should # implement `from_dict` function in the top-level namespace. - native_frame: NativeFrame = _native_namespace.from_dict(data, schema=schema) + native_frame: NativeDataFrame = _native_namespace.from_dict( + data, schema=schema + ) except AttributeError as e: msg = "Unknown namespace is expected to implement `from_dict` function." raise AttributeError(msg) from e @@ -397,7 +400,9 @@ def from_numpy( try: # implementation is UNKNOWN, Narwhals extension using this feature should # implement `from_numpy` function in the top-level namespace. - native_frame: NativeFrame = _native_namespace.from_numpy(data, schema=schema) + native_frame: NativeDataFrame = _native_namespace.from_numpy( + data, schema=schema + ) except AttributeError as e: msg = "Unknown namespace is expected to implement `from_numpy` function." raise AttributeError(msg) from e @@ -470,7 +475,7 @@ def from_arrow( try: # implementation is UNKNOWN, Narwhals extension using this feature should # implement PyCapsule support - native: NativeFrame = _native_namespace.DataFrame(native_frame) + native: NativeDataFrame = _native_namespace.DataFrame(native_frame) except AttributeError as e: msg = "Unknown namespace is expected to implement `DataFrame` class which accepts object which supports PyCapsule Interface." raise AttributeError(msg) from e @@ -594,7 +599,7 @@ def read_csv( """ eager_backend = Implementation.from_backend(backend) native_namespace = eager_backend.to_native_namespace() - native_frame: NativeFrame + native_frame: NativeDataFrame if eager_backend in { Implementation.POLARS, Implementation.PANDAS, @@ -657,7 +662,7 @@ def scan_csv( """ implementation = Implementation.from_backend(backend) native_namespace = implementation.to_native_namespace() - native_frame: NativeFrame | NativeLazyFrame + native_frame: NativeDataFrame | NativeLazyFrame if implementation is Implementation.POLARS: native_frame = native_namespace.scan_csv(source, **kwargs) elif implementation in { @@ -737,7 +742,7 @@ def read_parquet( """ implementation = Implementation.from_backend(backend) native_namespace = implementation.to_native_namespace() - native_frame: NativeFrame + native_frame: NativeDataFrame | _NativeIbis | _NativeDuckDB if implementation in { Implementation.POLARS, Implementation.PANDAS, @@ -829,7 +834,7 @@ def scan_parquet( """ implementation = Implementation.from_backend(backend) native_namespace = implementation.to_native_namespace() - native_frame: NativeFrame | NativeLazyFrame + native_frame: NativeDataFrame | NativeLazyFrame if implementation is Implementation.POLARS: native_frame = native_namespace.scan_parquet(source, **kwargs) elif implementation in { diff --git a/narwhals/translate.py b/narwhals/translate.py index 41aa01750d..fd28cb1e33 100644 --- a/narwhals/translate.py +++ b/narwhals/translate.py @@ -168,13 +168,13 @@ def from_native( @overload def from_native( - native_object: IntoFrameT | IntoLazyFrameT | IntoSeriesT, + native_object: IntoDataFrameT | IntoLazyFrameT | IntoSeriesT, *, pass_through: Literal[True], eager_only: Literal[False] = ..., series_only: Literal[False] = ..., allow_series: Literal[True], -) -> DataFrame[IntoFrameT] | LazyFrame[IntoLazyFrameT] | Series[IntoSeriesT]: ... +) -> DataFrame[IntoDataFrameT] | LazyFrame[IntoLazyFrameT] | Series[IntoSeriesT]: ... @overload @@ -259,14 +259,19 @@ def from_native( def from_native( # noqa: D417 - native_object: IntoLazyFrameT | IntoFrameT | IntoSeriesT | IntoFrame | IntoSeries | T, + native_object: IntoLazyFrameT + | IntoDataFrameT + | IntoSeriesT + | IntoFrame + | IntoSeries + | T, *, pass_through: bool = False, eager_only: bool = False, series_only: bool = False, allow_series: bool | None = None, **kwds: Any, -) -> LazyFrame[IntoLazyFrameT] | DataFrame[IntoFrameT] | Series[IntoSeriesT] | T: +) -> LazyFrame[IntoLazyFrameT] | DataFrame[IntoDataFrameT] | Series[IntoSeriesT] | T: """Convert `native_object` to Narwhals Dataframe, Lazyframe, or Series. Arguments: diff --git a/narwhals/typing.py b/narwhals/typing.py index 4ea3a5dbba..dcd43360ba 100644 --- a/narwhals/typing.py +++ b/narwhals/typing.py @@ -27,6 +27,8 @@ def columns(self) -> Any: ... def join(self, *args: Any, **kwargs: Any) -> Any: ... + class NativeDataFrame(Sized, NativeFrame, Protocol): ... + class NativeLazyFrame(NativeFrame, Protocol): def explain(self, *args: Any, **kwargs: Any) -> Any: ... @@ -108,7 +110,7 @@ def Binary(self) -> type[dtypes.Binary]: ... which will be interpreted as a `nw.Expr`, e.g. `df.select('a')`. """ -IntoDataFrame: TypeAlias = Union["NativeFrame", "DataFrameLike"] +IntoDataFrame: TypeAlias = Union["NativeDataFrame", "DataFrameLike"] """Anything which can be converted to a Narwhals DataFrame. Use this if your function accepts a narwhalifiable object but doesn't care about its backend. diff --git a/tests/conftest.py b/tests/conftest.py index a55fa01270..b7c28642a8 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -26,7 +26,7 @@ from narwhals._namespace import EagerAllowed from narwhals._spark_like.dataframe import SQLFrameDataFrame - from narwhals.typing import NativeFrame, NativeLazyFrame + from narwhals.typing import NativeDataFrame, NativeLazyFrame from tests.utils import Constructor, ConstructorEager, ConstructorLazy Data: TypeAlias = "dict[str, list[Any]]" @@ -100,27 +100,27 @@ def pandas_pyarrow_constructor(obj: Data) -> pd.DataFrame: return pd.DataFrame(obj).convert_dtypes(dtype_backend="pyarrow") -def modin_constructor(obj: Data) -> NativeFrame: # pragma: no cover +def modin_constructor(obj: Data) -> NativeDataFrame: # pragma: no cover import modin.pandas as mpd import pandas as pd df = mpd.DataFrame(pd.DataFrame(obj)) - return cast("NativeFrame", df) + return cast("NativeDataFrame", df) -def modin_pyarrow_constructor(obj: Data) -> NativeFrame: # pragma: no cover +def modin_pyarrow_constructor(obj: Data) -> NativeDataFrame: # pragma: no cover import modin.pandas as mpd import pandas as pd df = mpd.DataFrame(pd.DataFrame(obj)).convert_dtypes(dtype_backend="pyarrow") - return cast("NativeFrame", df) + return cast("NativeDataFrame", df) -def cudf_constructor(obj: Data) -> NativeFrame: # pragma: no cover +def cudf_constructor(obj: Data) -> NativeDataFrame: # pragma: no cover import cudf df = cudf.DataFrame(obj) - return cast("NativeFrame", df) + return cast("NativeDataFrame", df) def polars_eager_constructor(obj: Data) -> pl.DataFrame: diff --git a/tests/utils.py b/tests/utils.py index 482e4362e1..15bb35f9a2 100644 --- a/tests/utils.py +++ b/tests/utils.py @@ -20,7 +20,7 @@ from typing_extensions import TypeAlias - from narwhals.typing import DataFrameLike, Frame, NativeFrame, NativeLazyFrame + from narwhals.typing import Frame, NativeDataFrame, NativeLazyFrame def get_module_version_as_tuple(module_name: str) -> tuple[int, ...]: @@ -40,8 +40,8 @@ def get_module_version_as_tuple(module_name: str) -> tuple[int, ...]: PYSPARK_VERSION: tuple[int, ...] = get_module_version_as_tuple("pyspark") CUDF_VERSION: tuple[int, ...] = get_module_version_as_tuple("cudf") -Constructor: TypeAlias = Callable[[Any], "NativeLazyFrame | NativeFrame | DataFrameLike"] -ConstructorEager: TypeAlias = Callable[[Any], "NativeFrame | DataFrameLike"] +Constructor: TypeAlias = Callable[[Any], "NativeLazyFrame | NativeDataFrame"] +ConstructorEager: TypeAlias = Callable[[Any], "NativeDataFrame"] ConstructorLazy: TypeAlias = Callable[[Any], "NativeLazyFrame"] From b5f76f04e9e70220847aa5622bdc4318dccc75c1 Mon Sep 17 00:00:00 2001 From: dangotbanned <125183946+dangotbanned@users.noreply.github.com> Date: Mon, 4 Aug 2025 20:27:02 +0000 Subject: [PATCH 02/19] fix(typing): Get `mypy` happier Still same issues on `join`, `v1`, `v2` --- narwhals/translate.py | 5 +---- tests/expr_and_series/struct_/field_test.py | 4 +++- tests/frame/sample_test.py | 4 ++-- tests/frame/with_row_index_test.py | 2 +- tests/preserve_pandas_like_columns_name_attr_test.py | 10 +++++----- 5 files changed, 12 insertions(+), 13 deletions(-) diff --git a/narwhals/translate.py b/narwhals/translate.py index fd28cb1e33..70767af6e0 100644 --- a/narwhals/translate.py +++ b/narwhals/translate.py @@ -188,11 +188,8 @@ def from_native( ) -> Series[IntoSeriesT]: ... -# NOTE: Seems like `mypy` is giving a false positive -# Following this advice will introduce overlapping overloads? -# > note: Flipping the order of overloads will fix this error @overload -def from_native( # type: ignore[overload-overlap] +def from_native( native_object: IntoLazyFrameT, *, pass_through: Literal[False] = ..., diff --git a/tests/expr_and_series/struct_/field_test.py b/tests/expr_and_series/struct_/field_test.py index 0945ebe8ce..fdfa8747ce 100644 --- a/tests/expr_and_series/struct_/field_test.py +++ b/tests/expr_and_series/struct_/field_test.py @@ -1,5 +1,7 @@ from __future__ import annotations +from typing import cast + import pandas as pd import pyarrow as pa import pytest @@ -53,7 +55,7 @@ def test_get_field_series( df_native = constructor_eager(data) if "pandas" in str(constructor_eager): - df_native = df_native.assign( # type: ignore[union-attr] + df_native = cast("pd.DataFrame", df_native).assign( user=pd.Series( data["user"], dtype=pd.ArrowDtype( diff --git a/tests/frame/sample_test.py b/tests/frame/sample_test.py index 46196b13ec..b86ddaee1d 100644 --- a/tests/frame/sample_test.py +++ b/tests/frame/sample_test.py @@ -36,5 +36,5 @@ def test_sample_with_seed(constructor_eager: ConstructorEager) -> None: r2 = nw.to_native(df.sample(n=n, seed=123)) r3 = nw.to_native(df.sample(n=n, seed=42)) - assert r1.equals(r2) # type: ignore[union-attr] - assert not r1.equals(r3) # type: ignore[union-attr] + assert r1.equals(r2) # type: ignore[attr-defined] + assert not r1.equals(r3) # type: ignore[attr-defined] diff --git a/tests/frame/with_row_index_test.py b/tests/frame/with_row_index_test.py index eead211910..79f43ceeab 100644 --- a/tests/frame/with_row_index_test.py +++ b/tests/frame/with_row_index_test.py @@ -58,7 +58,7 @@ def test_with_row_index_lazy_exception(constructor: Constructor) -> None: ) with context: - result = frame.with_row_index() + result = frame.with_row_index() # type: ignore[call-arg] expected = {"index": [0, 1], **data} assert_equal_data(result, expected) diff --git a/tests/preserve_pandas_like_columns_name_attr_test.py b/tests/preserve_pandas_like_columns_name_attr_test.py index cc7195e688..3127040bee 100644 --- a/tests/preserve_pandas_like_columns_name_attr_test.py +++ b/tests/preserve_pandas_like_columns_name_attr_test.py @@ -1,17 +1,17 @@ from __future__ import annotations -from typing import TYPE_CHECKING +from typing import TYPE_CHECKING, Callable import pytest import narwhals as nw if TYPE_CHECKING: - from tests.utils import Constructor + import pandas as pd def test_ops_preserve_column_index_name( - constructor: Constructor, request: pytest.FixtureRequest + constructor: Callable[..., pd.DataFrame], request: pytest.FixtureRequest ) -> None: if not any(x in str(constructor) for x in ("pandas", "modin", "cudf", "dask")): pytest.skip( @@ -23,11 +23,11 @@ def test_ops_preserve_column_index_name( data = {"a": [1, 3, 2], "b": [4, 4, 6], "z": [7.0, 8.0, 9.0]} df_native = constructor(data) - df_native.columns.name = "foo" # type: ignore[union-attr] + df_native.columns.name = "foo" df = nw.from_native(df_native) result = df.with_columns(b=nw.col("a") + 1, c=nw.col("a") * 2).select("c", "b") - assert result.to_native().columns.name == "foo" # type: ignore[union-attr] + assert result.to_native().columns.name == "foo" assert result.lazy().collect(backend="pandas").to_native().columns.name == "foo" From 2b3d2bff07e7ab3e17135e49414d8cc75eb2e529 Mon Sep 17 00:00:00 2001 From: dangotbanned <125183946+dangotbanned@users.noreply.github.com> Date: Mon, 4 Aug 2025 21:01:36 +0000 Subject: [PATCH 03/19] fix(typing): Finish `v1` --- narwhals/stable/v1/__init__.py | 120 ++++++++++++++++++++------------- narwhals/stable/v1/typing.py | 5 +- tests/v1_test.py | 9 +-- 3 files changed, 80 insertions(+), 54 deletions(-) diff --git a/narwhals/stable/v1/__init__.py b/narwhals/stable/v1/__init__.py index 7748fb02ee..018c476ff3 100644 --- a/narwhals/stable/v1/__init__.py +++ b/narwhals/stable/v1/__init__.py @@ -60,7 +60,7 @@ Unknown, ) from narwhals.translate import _from_native_impl, get_native_namespace, to_py_scalar -from narwhals.typing import IntoDataFrameT, IntoFrameT +from narwhals.typing import IntoDataFrameT, IntoFrameT, IntoLazyFrameT if TYPE_CHECKING: from collections.abc import Iterable, Mapping, Sequence @@ -75,7 +75,6 @@ IntoDType, IntoExpr, IntoFrame, - IntoLazyFrameT, IntoSeries, NonNestedLiteral, SingleColSelector, @@ -212,7 +211,7 @@ def _l1_norm(self) -> Self: return self.select(all()._l1_norm()) -class LazyFrame(NwLazyFrame[IntoFrameT]): +class LazyFrame(NwLazyFrame[IntoLazyFrameT]): @inherit_doc(NwLazyFrame) def __init__(self, df: Any, *, level: Literal["full", "lazy", "interchange"]) -> None: assert df._version is Version.V1 # noqa: S101 @@ -454,9 +453,9 @@ def __init__( @overload -def _stableify(obj: NwDataFrame[IntoFrameT]) -> DataFrame[IntoFrameT]: ... +def _stableify(obj: NwDataFrame[IntoDataFrameT]) -> DataFrame[IntoDataFrameT]: ... @overload -def _stableify(obj: NwLazyFrame[IntoFrameT]) -> LazyFrame[IntoFrameT]: ... +def _stableify(obj: NwLazyFrame[IntoLazyFrameT]) -> LazyFrame[IntoLazyFrameT]: ... @overload def _stableify(obj: NwSeries[IntoSeriesT]) -> Series[IntoSeriesT]: ... @overload @@ -464,11 +463,11 @@ def _stableify(obj: NwExpr) -> Expr: ... def _stableify( - obj: NwDataFrame[IntoFrameT] - | NwLazyFrame[IntoFrameT] + obj: NwDataFrame[IntoDataFrameT] + | NwLazyFrame[IntoLazyFrameT] | NwSeries[IntoSeriesT] | NwExpr, -) -> DataFrame[IntoFrameT] | LazyFrame[IntoFrameT] | Series[IntoSeriesT] | Expr: +) -> DataFrame[IntoDataFrameT] | LazyFrame[IntoLazyFrameT] | Series[IntoSeriesT] | Expr: if isinstance(obj, NwDataFrame): return DataFrame(obj._compliant_frame._with_version(Version.V1), level=obj._level) if isinstance(obj, NwLazyFrame): @@ -572,14 +571,14 @@ def from_native( @overload def from_native( - native_object: IntoFrameT | IntoSeriesT, + native_object: IntoDataFrameT | IntoLazyFrameT | IntoSeriesT, *, strict: Literal[False], eager_only: Literal[False] = ..., eager_or_interchange_only: Literal[False] = ..., series_only: Literal[False] = ..., allow_series: Literal[True], -) -> DataFrame[IntoFrameT] | LazyFrame[IntoFrameT] | Series[IntoSeriesT]: ... +) -> DataFrame[IntoDataFrameT] | LazyFrame[IntoLazyFrameT] | Series[IntoSeriesT]: ... @overload @@ -596,14 +595,26 @@ def from_native( @overload def from_native( - native_object: IntoFrameT, + native_object: IntoDataFrameT, + *, + strict: Literal[False], + eager_only: Literal[False] = ..., + eager_or_interchange_only: Literal[False] = ..., + series_only: Literal[False] = ..., + allow_series: None = ..., +) -> DataFrame[IntoDataFrameT]: ... + + +@overload +def from_native( + native_object: IntoLazyFrameT, *, strict: Literal[False], eager_only: Literal[False] = ..., eager_or_interchange_only: Literal[False] = ..., series_only: Literal[False] = ..., allow_series: None = ..., -) -> DataFrame[IntoFrameT] | LazyFrame[IntoFrameT]: ... +) -> LazyFrame[IntoLazyFrameT]: ... @overload @@ -622,7 +633,7 @@ def from_native( def from_native( native_object: IntoDataFrameT, *, - strict: Literal[True] = ..., + strict: Literal[True] | None = ..., eager_only: Literal[False] = ..., eager_or_interchange_only: Literal[True], series_only: Literal[False] = ..., @@ -630,11 +641,23 @@ def from_native( ) -> DataFrame[IntoDataFrameT]: ... +@overload +def from_native( + native_object: IntoLazyFrameT, + *, + strict: Literal[True] | None = ..., + eager_only: Literal[False] = ..., + eager_or_interchange_only: Literal[False] = ..., + series_only: Literal[False] = ..., + allow_series: None = ..., +) -> LazyFrame[IntoLazyFrameT]: ... + + @overload def from_native( native_object: IntoDataFrameT, *, - strict: Literal[True] = ..., + strict: Literal[True] | None = ..., eager_only: Literal[True], eager_or_interchange_only: Literal[False] = ..., series_only: Literal[False] = ..., @@ -646,7 +669,7 @@ def from_native( def from_native( native_object: IntoFrame | IntoSeries, *, - strict: Literal[True] = ..., + strict: Literal[True] | None = ..., eager_only: Literal[False] = ..., eager_or_interchange_only: Literal[False] = ..., series_only: Literal[False] = ..., @@ -658,7 +681,7 @@ def from_native( def from_native( native_object: IntoSeriesT, *, - strict: Literal[True] = ..., + strict: Literal[True] | None = ..., eager_only: Literal[False] = ..., eager_or_interchange_only: Literal[False] = ..., series_only: Literal[True], @@ -668,27 +691,14 @@ def from_native( @overload def from_native( - native_object: IntoLazyFrameT, - *, - strict: Literal[True] = ..., - eager_only: Literal[False] = ..., - eager_or_interchange_only: Literal[False] = ..., - series_only: Literal[False] = ..., - allow_series: None = ..., -) -> LazyFrame[IntoLazyFrameT]: ... - - -# NOTE: `pl.LazyFrame` originally matched here -@overload -def from_native( - native_object: IntoFrameT, + native_object: IntoDataFrameT, *, - strict: Literal[True] = ..., + strict: Literal[True] | None = ..., eager_only: Literal[False] = ..., eager_or_interchange_only: Literal[False] = ..., series_only: Literal[False] = ..., allow_series: None = ..., -) -> DataFrame[IntoFrameT] | LazyFrame[IntoFrameT]: ... +) -> DataFrame[IntoDataFrameT]: ... @overload @@ -765,14 +775,14 @@ def from_native( @overload def from_native( - native_object: IntoFrameT | IntoSeriesT, + native_object: IntoDataFrameT | IntoLazyFrameT | IntoSeriesT, *, pass_through: Literal[True], eager_only: Literal[False] = ..., eager_or_interchange_only: Literal[False] = ..., series_only: Literal[False] = ..., allow_series: Literal[True], -) -> DataFrame[IntoFrameT] | LazyFrame[IntoFrameT] | Series[IntoSeriesT]: ... +) -> DataFrame[IntoDataFrameT] | LazyFrame[IntoLazyFrameT] | Series[IntoSeriesT]: ... @overload @@ -789,14 +799,14 @@ def from_native( @overload def from_native( - native_object: IntoFrameT, + native_object: IntoDataFrameT | IntoLazyFrameT, *, pass_through: Literal[True], eager_only: Literal[False] = ..., eager_or_interchange_only: Literal[False] = ..., series_only: Literal[False] = ..., allow_series: None = ..., -) -> DataFrame[IntoFrameT] | LazyFrame[IntoFrameT]: ... +) -> DataFrame[IntoDataFrameT] | LazyFrame[IntoLazyFrameT]: ... @overload @@ -861,14 +871,26 @@ def from_native( @overload def from_native( - native_object: IntoFrameT, + native_object: IntoDataFrameT, + *, + pass_through: Literal[False] = ..., + eager_only: Literal[False] = ..., + eager_or_interchange_only: Literal[False] = ..., + series_only: Literal[False] = ..., + allow_series: None = ..., +) -> DataFrame[IntoDataFrameT]: ... + + +@overload +def from_native( + native_object: IntoLazyFrameT, *, pass_through: Literal[False] = ..., eager_only: Literal[False] = ..., eager_or_interchange_only: Literal[False] = ..., series_only: Literal[False] = ..., allow_series: None = ..., -) -> DataFrame[IntoFrameT] | LazyFrame[IntoFrameT]: ... +) -> LazyFrame[IntoLazyFrameT]: ... # All params passed in as variables @@ -885,7 +907,12 @@ def from_native( def from_native( - native_object: IntoFrameT | IntoFrame | IntoSeriesT | IntoSeries | T, + native_object: IntoDataFrameT + | IntoLazyFrameT + | IntoFrame + | IntoSeriesT + | IntoSeries + | T, *, strict: bool | None = None, pass_through: bool | None = None, @@ -894,7 +921,7 @@ def from_native( series_only: bool = False, allow_series: bool | None = None, **kwds: Any, -) -> LazyFrame[IntoFrameT] | DataFrame[IntoFrameT] | Series[IntoSeriesT] | T: +) -> LazyFrame[IntoLazyFrameT] | DataFrame[IntoDataFrameT] | Series[IntoSeriesT] | T: """Convert `native_object` to Narwhals Dataframe, Lazyframe, or Series. See `narwhals.from_native` for full docstring. Note that `native_namespace` is @@ -931,8 +958,8 @@ def to_native( ) -> IntoDataFrameT: ... @overload def to_native( - narwhals_object: LazyFrame[IntoFrameT], *, strict: Literal[True] = ... -) -> IntoFrameT: ... + narwhals_object: LazyFrame[IntoLazyFrameT], *, strict: Literal[True] = ... +) -> IntoLazyFrameT: ... @overload def to_native( narwhals_object: Series[IntoSeriesT], *, strict: Literal[True] = ... @@ -945,8 +972,8 @@ def to_native( ) -> IntoDataFrameT: ... @overload def to_native( - narwhals_object: LazyFrame[IntoFrameT], *, pass_through: Literal[False] = ... -) -> IntoFrameT: ... + narwhals_object: LazyFrame[IntoLazyFrameT], *, pass_through: Literal[False] = ... +) -> IntoLazyFrameT: ... @overload def to_native( narwhals_object: Series[IntoSeriesT], *, pass_through: Literal[False] = ... @@ -957,12 +984,12 @@ def to_native(narwhals_object: Any, *, pass_through: bool) -> Any: ... def to_native( narwhals_object: DataFrame[IntoDataFrameT] - | LazyFrame[IntoFrameT] + | LazyFrame[IntoLazyFrameT] | Series[IntoSeriesT], *, strict: bool | None = None, pass_through: bool | None = None, -) -> IntoFrameT | IntoSeriesT | Any: +) -> IntoLazyFrameT | IntoDataFrameT | IntoSeriesT | Any: """Convert Narwhals object to native one. See `narwhals.to_native` for full docstring. Note that `native_namespace` is @@ -1338,6 +1365,7 @@ def scan_parquet( "Int32", "Int64", "Int128", + "IntoFrameT", "LazyFrame", "List", "Object", diff --git a/narwhals/stable/v1/typing.py b/narwhals/stable/v1/typing.py index c3a0b421bc..d698dae520 100644 --- a/narwhals/stable/v1/typing.py +++ b/narwhals/stable/v1/typing.py @@ -4,6 +4,7 @@ if TYPE_CHECKING: import sys + from collections.abc import Sized from narwhals.stable.v1 import DataFrame, LazyFrame @@ -23,6 +24,8 @@ def columns(self) -> Any: ... def join(self, *args: Any, **kwargs: Any) -> Any: ... + class NativeDataFrame(Sized, NativeFrame, Protocol): ... + class NativeSeries(Protocol): def __len__(self) -> int: ... @@ -40,7 +43,7 @@ def __dataframe__(self, *args: Any, **kwargs: Any) -> Any: ... `nw.Expr`, e.g. `df.select('a')`. """ -IntoDataFrame: TypeAlias = Union["NativeFrame", "DataFrame[Any]", "DataFrameLike"] +IntoDataFrame: TypeAlias = Union["NativeDataFrame", "DataFrame[Any]", "DataFrameLike"] """Anything which can be converted to a Narwhals DataFrame. Use this if your function accepts a narwhalifiable object but doesn't care about its backend. diff --git a/tests/v1_test.py b/tests/v1_test.py index db88b95a51..ae0591e36a 100644 --- a/tests/v1_test.py +++ b/tests/v1_test.py @@ -577,16 +577,11 @@ def test_dataframe_recursive_v1() -> None: if TYPE_CHECKING: assert_type(pl_frame, pl.DataFrame) - assert_type( - nw_frame, "nw_v1.DataFrame[pl.DataFrame] | nw_v1.LazyFrame[pl.DataFrame]" - ) + assert_type(nw_frame, "nw_v1.DataFrame[pl.DataFrame]") nw_frame_depth_2 = nw_v1.DataFrame(nw_frame, level="full") # type: ignore[var-annotated] assert_type(nw_frame_depth_2, nw_v1.DataFrame[Any]) # NOTE: Checking that the type is `DataFrame[Unknown]` - assert_type( - nw_frame_early_return, - "nw_v1.DataFrame[pl.DataFrame] | nw_v1.LazyFrame[pl.DataFrame]", - ) + assert_type(nw_frame_early_return, "nw_v1.DataFrame[pl.DataFrame]") def test_lazyframe_recursive_v1() -> None: From a57fb057d3c2c18547079bfe119f998f511ad6ea Mon Sep 17 00:00:00 2001 From: dangotbanned <125183946+dangotbanned@users.noreply.github.com> Date: Mon, 4 Aug 2025 21:11:23 +0000 Subject: [PATCH 04/19] ignore everything in `join_test` May these can be typed *some day*, but they were never safe to begin with --- tests/frame/join_test.py | 96 ++++++++++++++++++++++++---------------- 1 file changed, 59 insertions(+), 37 deletions(-) diff --git a/tests/frame/join_test.py b/tests/frame/join_test.py index e2c575b7b2..5e202aa04a 100644 --- a/tests/frame/join_test.py +++ b/tests/frame/join_test.py @@ -96,7 +96,11 @@ def test_full_join( df_left = nw.from_native(constructor(df1)) df_right = nw.from_native(constructor(df2)) result = df_left.join( - df_right, on=on, left_on=left_on, right_on=right_on, how="full" + df_right, # type: ignore[arg-type] + on=on, + left_on=left_on, + right_on=right_on, + how="full", ).sort("id", nulls_last=True) assert_equal_data(result, expected) @@ -135,12 +139,12 @@ def test_inner_join_two_keys(constructor: Constructor) -> None: df = nw.from_native(constructor(data)) df_right = df result = df.join( - df_right, + df_right, # type: ignore[arg-type] left_on=["antananarivo", "bob"], right_on=["antananarivo", "bob"], how="inner", ) - result_on = df.join(df_right, on=["antananarivo", "bob"], how="inner") + result_on = df.join(df_right, on=["antananarivo", "bob"], how="inner") # type: ignore[arg-type] result = result.sort("idx").drop("idx_right") result_on = result_on.sort("idx").drop("idx_right") expected = { @@ -164,9 +168,12 @@ def test_inner_join_single_key(constructor: Constructor) -> None: df = nw.from_native(constructor(data)) df_right = df result = df.join( - df_right, left_on="antananarivo", right_on="antananarivo", how="inner" + df_right, # type: ignore[arg-type] + left_on="antananarivo", + right_on="antananarivo", + how="inner", ).sort("idx") - result_on = df.join(df_right, on="antananarivo", how="inner").sort("idx") + result_on = df.join(df_right, on="antananarivo", how="inner").sort("idx") # type: ignore[arg-type] result = result.drop("idx_right") result_on = result_on.drop("idx_right") expected = { @@ -186,7 +193,7 @@ def test_cross_join(constructor: Constructor) -> None: pytest.skip() data = {"antananarivo": [1, 3, 2]} df = nw.from_native(constructor(data)) - result = df.join(df, how="cross").sort("antananarivo", "antananarivo_right") + result = df.join(df, how="cross").sort("antananarivo", "antananarivo_right") # type: ignore[arg-type] expected = { "antananarivo": [1, 1, 1, 2, 2, 2, 3, 3, 3], "antananarivo_right": [1, 2, 3, 1, 2, 3, 1, 2, 3], @@ -196,7 +203,7 @@ def test_cross_join(constructor: Constructor) -> None: with pytest.raises( ValueError, match="Can not pass `left_on`, `right_on` or `on` keys for cross join" ): - df.join(df, how="cross", left_on="antananarivo") + df.join(df, how="cross", left_on="antananarivo") # type: ignore[arg-type] @pytest.mark.parametrize("how", ["inner", "left"]) @@ -208,7 +215,7 @@ def test_suffix( df = nw.from_native(constructor(data)) df_right = df result = df.join( - df_right, + df_right, # type: ignore[arg-type] left_on=["antananarivo", "bob"], right_on=["antananarivo", "bob"], how=how, @@ -224,7 +231,7 @@ def test_cross_join_suffix(constructor: Constructor, suffix: str) -> None: pytest.skip() data = {"antananarivo": [1, 3, 2]} df = nw.from_native(constructor(data)) - result = df.join(df, how="cross", suffix=suffix).sort( + result = df.join(df, how="cross", suffix=suffix).sort( # type: ignore[arg-type] "antananarivo", f"antananarivo{suffix}" ) expected = { @@ -275,7 +282,7 @@ def test_anti_join( data = {"antananarivo": [1, 3, 2], "bob": [4, 4, 6], "zor ro": [7.0, 8.0, 9.0]} df = nw.from_native(constructor(data)) other = df.filter(filter_expr) - result = df.join(other, how="anti", left_on=join_key, right_on=join_key) + result = df.join(other, how="anti", left_on=join_key, right_on=join_key) # type: ignore[arg-type] assert_equal_data(result, expected) @@ -313,7 +320,7 @@ def test_semi_join( data = {"antananarivo": [1, 3, 2], "bob": [4, 4, 6], "zor ro": [7.0, 8.0, 9.0]} df = nw.from_native(constructor(data)) other = df.filter(filter_expr) - result = df.join(other, how="semi", left_on=join_key, right_on=join_key).sort( + result = df.join(other, how="semi", left_on=join_key, right_on=join_key).sort( # type: ignore[arg-type] "antananarivo" ) assert_equal_data(result, expected) @@ -346,7 +353,7 @@ def test_left_join(constructor: Constructor) -> None: } df_left = nw.from_native(constructor(data_left)) df_right = nw.from_native(constructor(data_right)) - result = df_left.join(df_right, left_on="bob", right_on="co", how="left") + result = df_left.join(df_right, left_on="bob", right_on="co", how="left") # type: ignore[arg-type] result = result.sort("idx") result = result.drop("idx_right") expected = { @@ -355,7 +362,7 @@ def test_left_join(constructor: Constructor) -> None: "idx": [0, 1, 2], "antananarivo_right": [1, 2, None], } - result_on_list = df_left.join(df_right, on=["antananarivo", "idx"], how="left") + result_on_list = df_left.join(df_right, on=["antananarivo", "idx"], how="left") # type: ignore[arg-type] result_on_list = result_on_list.sort("idx") expected_on_list = { "antananarivo": [1, 2, 3], @@ -373,7 +380,7 @@ def test_left_join_multiple_column(constructor: Constructor) -> None: df_left = nw.from_native(constructor(data_left)) df_right = nw.from_native(constructor(data_right)) result = df_left.join( - df_right, + df_right, # type: ignore[arg-type] left_on=["antananarivo", "bob"], right_on=["antananarivo", "c"], how="left", @@ -399,7 +406,7 @@ def test_left_join_overlapping_column(constructor: Constructor) -> None: } df_left = nw.from_native(constructor(data_left)) df_right = nw.from_native(constructor(data_right)) - result = df_left.join(df_right, left_on="bob", right_on="c", how="left").sort("idx") + result = df_left.join(df_right, left_on="bob", right_on="c", how="left").sort("idx") # type: ignore[arg-type] result = result.drop("idx_right") expected: dict[str, list[Any]] = { "antananarivo": [1, 2, 3], @@ -410,7 +417,7 @@ def test_left_join_overlapping_column(constructor: Constructor) -> None: "d_right": [1, 4, 2], } assert_equal_data(result, expected) - result = df_left.join(df_right, left_on="antananarivo", right_on="d", how="left") + result = df_left.join(df_right, left_on="antananarivo", right_on="d", how="left") # type: ignore[arg-type] result = result.sort("idx") result = result.drop("idx_right") expected = { @@ -500,9 +507,12 @@ def test_joinasof_numeric( constructor({"antananarivo": [1, 2, 3, 6, 7], "val": [1, 2, 3, 6, 7]}) ).sort("antananarivo") result = df.join_asof( - df_right, left_on="antananarivo", right_on="antananarivo", strategy=strategy + df_right, # type: ignore[arg-type] + left_on="antananarivo", + right_on="antananarivo", + strategy=strategy, ) - result_on = df.join_asof(df_right, on="antananarivo", strategy=strategy) + result_on = df.join_asof(df_right, on="antananarivo", strategy=strategy) # type: ignore[arg-type] assert_equal_data(result.sort(by="antananarivo"), expected) assert_equal_data(result_on.sort(by="antananarivo"), expected) @@ -589,9 +599,12 @@ def test_joinasof_time( ) ).sort("datetime") result = df.join_asof( - df_right, left_on="datetime", right_on="datetime", strategy=strategy + df_right, # type: ignore[arg-type] + left_on="datetime", + right_on="datetime", + strategy=strategy, ) - result_on = df.join_asof(df_right, on="datetime", strategy=strategy) + result_on = df.join_asof(df_right, on="datetime", strategy=strategy) # type: ignore[arg-type] assert_equal_data(result.sort(by="datetime"), expected) assert_equal_data(result_on.sort(by="datetime"), expected) @@ -617,8 +630,8 @@ def test_joinasof_by(constructor: Constructor, request: pytest.FixtureRequest) - {"antananarivo": [1, 4, 5, 8], "bob": ["D", "D", "A", "F"], "d": [1, 3, 4, 1]} ) ).sort("antananarivo") - result = df.join_asof(df_right, on="antananarivo", by_left="bob", by_right="bob") - result_by = df.join_asof(df_right, on="antananarivo", by="bob") + result = df.join_asof(df_right, on="antananarivo", by_left="bob", by_right="bob") # type: ignore[arg-type] + result_by = df.join_asof(df_right, on="antananarivo", by="bob") # type: ignore[arg-type] expected = { "antananarivo": [1, 5, 7, 10], "bob": ["D", "D", "C", "A"], @@ -645,7 +658,10 @@ def test_joinasof_suffix( constructor({"antananarivo": [1, 2, 3, 6, 7], "val": [1, 2, 3, 6, 7]}) ).sort("antananarivo") result = df.join_asof( - df_right, left_on="antananarivo", right_on="antananarivo", suffix="_y" + df_right, # type: ignore[arg-type] + left_on="antananarivo", + right_on="antananarivo", + suffix="_y", ) expected = {"antananarivo": [1, 5, 10], "val": ["a", "b", "c"], "val_y": [1, 3, 7]} assert_equal_data(result.sort(by="antananarivo"), expected) @@ -663,7 +679,10 @@ def test_joinasof_not_implemented( match=rf"Only the following strategies are supported: \('backward', 'forward', 'nearest'\); found '{strategy}'.", ): df.join_asof( - df, left_on="antananarivo", right_on="antananarivo", strategy=strategy + df, # type: ignore[arg-type] + left_on="antananarivo", + right_on="antananarivo", + strategy=strategy, ) @@ -675,31 +694,34 @@ def test_joinasof_keys_exceptions(constructor: Constructor) -> None: ValueError, match=r"Either \(`left_on` and `right_on`\) or `on` keys should be specified.", ): - df.join_asof(df, left_on="antananarivo") + df.join_asof(df, left_on="antananarivo") # type: ignore[arg-type] with pytest.raises( ValueError, match=r"Either \(`left_on` and `right_on`\) or `on` keys should be specified.", ): - df.join_asof(df, right_on="antananarivo") + df.join_asof(df, right_on="antananarivo") # type: ignore[arg-type] with pytest.raises( ValueError, match=r"Either \(`left_on` and `right_on`\) or `on` keys should be specified.", ): - df.join_asof(df) + df.join_asof(df) # type: ignore[arg-type] with pytest.raises( ValueError, match="If `on` is specified, `left_on` and `right_on` should be None." ): df.join_asof( - df, left_on="antananarivo", right_on="antananarivo", on="antananarivo" + df, # type: ignore[arg-type] + left_on="antananarivo", + right_on="antananarivo", + on="antananarivo", ) with pytest.raises( ValueError, match="If `on` is specified, `left_on` and `right_on` should be None." ): - df.join_asof(df, left_on="antananarivo", on="antananarivo") + df.join_asof(df, left_on="antananarivo", on="antananarivo") # type: ignore[arg-type] with pytest.raises( ValueError, match="If `on` is specified, `left_on` and `right_on` should be None." ): - df.join_asof(df, right_on="antananarivo", on="antananarivo") + df.join_asof(df, right_on="antananarivo", on="antananarivo") # type: ignore[arg-type] def test_joinasof_by_exceptions(constructor: Constructor) -> None: @@ -708,35 +730,35 @@ def test_joinasof_by_exceptions(constructor: Constructor) -> None: with pytest.raises( ValueError, match="If `by` is specified, `by_left` and `by_right` should be None." ): - df.join_asof(df, on="antananarivo", by_left="bob", by_right="bob", by="bob") + df.join_asof(df, on="antananarivo", by_left="bob", by_right="bob", by="bob") # type: ignore[arg-type] with pytest.raises( ValueError, match="Can not specify only `by_left` or `by_right`, you need to specify both.", ): - df.join_asof(df, on="antananarivo", by_left="bob") + df.join_asof(df, on="antananarivo", by_left="bob") # type: ignore[arg-type] with pytest.raises( ValueError, match="Can not specify only `by_left` or `by_right`, you need to specify both.", ): - df.join_asof(df, on="antananarivo", by_right="bob") + df.join_asof(df, on="antananarivo", by_right="bob") # type: ignore[arg-type] with pytest.raises( ValueError, match="If `by` is specified, `by_left` and `by_right` should be None." ): - df.join_asof(df, on="antananarivo", by_left="bob", by="bob") + df.join_asof(df, on="antananarivo", by_left="bob", by="bob") # type: ignore[arg-type] with pytest.raises( ValueError, match="If `by` is specified, `by_left` and `by_right` should be None." ): - df.join_asof(df, on="antananarivo", by_right="bob", by="bob") + df.join_asof(df, on="antananarivo", by_right="bob", by="bob") # type: ignore[arg-type] with pytest.raises( ValueError, match="`by_left` and `by_right` must have the same length." ): df.join_asof( - df, + df, # type: ignore[arg-type] on="antananarivo", by_left=["antananarivo", "bob"], by_right=["antananarivo"], @@ -781,4 +803,4 @@ def test_join_duplicate_column_names( df = constructor({"a": [1, 2, 3, 4, 5], "b": [6, 6, 6, 6, 6]}) dfn = nw.from_native(df) with pytest.raises(exception): - dfn.join(dfn, on=["a"]).join(dfn, on=["a"]).lazy().collect() + dfn.join(dfn, on=["a"]).join(dfn, on=["a"]).lazy().collect() # type: ignore[arg-type] From 6f764a6601f3a3e5e300ffed1242b8792066234b Mon Sep 17 00:00:00 2001 From: dangotbanned <125183946+dangotbanned@users.noreply.github.com> Date: Tue, 5 Aug 2025 18:52:05 +0000 Subject: [PATCH 05/19] fix(typing): `v2` happy? --- narwhals/stable/v2/__init__.py | 42 +++++++++++++++++++++++----------- narwhals/stable/v2/typing.py | 18 +++++++++++---- 2 files changed, 42 insertions(+), 18 deletions(-) diff --git a/narwhals/stable/v2/__init__.py b/narwhals/stable/v2/__init__.py index bbff47f0b0..00646a30f6 100644 --- a/narwhals/stable/v2/__init__.py +++ b/narwhals/stable/v2/__init__.py @@ -56,7 +56,7 @@ from narwhals.series import Series as NwSeries from narwhals.stable.v2 import dependencies, dtypes, selectors from narwhals.translate import _from_native_impl, get_native_namespace, to_py_scalar -from narwhals.typing import IntoDataFrameT, IntoFrameT +from narwhals.typing import IntoDataFrameT, IntoLazyFrameT if TYPE_CHECKING: from collections.abc import Iterable, Mapping, Sequence @@ -172,7 +172,7 @@ def is_unique(self) -> Series[Any]: return _stableify(super().is_unique()) -class LazyFrame(NwLazyFrame[IntoFrameT]): +class LazyFrame(NwLazyFrame[IntoLazyFrameT]): @inherit_doc(NwLazyFrame) def __init__(self, df: Any, *, level: Literal["full", "lazy", "interchange"]) -> None: assert df._version is Version.V2 # noqa: S101 @@ -238,9 +238,9 @@ def __init__( @overload -def _stableify(obj: NwDataFrame[IntoFrameT]) -> DataFrame[IntoFrameT]: ... +def _stableify(obj: NwDataFrame[IntoDataFrameT]) -> DataFrame[IntoDataFrameT]: ... @overload -def _stableify(obj: NwLazyFrame[IntoFrameT]) -> LazyFrame[IntoFrameT]: ... +def _stableify(obj: NwLazyFrame[IntoLazyFrameT]) -> LazyFrame[IntoLazyFrameT]: ... @overload def _stableify(obj: NwSeries[IntoSeriesT]) -> Series[IntoSeriesT]: ... @overload @@ -248,11 +248,11 @@ def _stableify(obj: NwExpr) -> Expr: ... def _stableify( - obj: NwDataFrame[IntoFrameT] - | NwLazyFrame[IntoFrameT] + obj: NwDataFrame[IntoDataFrameT] + | NwLazyFrame[IntoLazyFrameT] | NwSeries[IntoSeriesT] | NwExpr, -) -> DataFrame[IntoFrameT] | LazyFrame[IntoFrameT] | Series[IntoSeriesT] | Expr: +) -> DataFrame[IntoDataFrameT] | LazyFrame[IntoLazyFrameT] | Series[IntoSeriesT] | Expr: if isinstance(obj, NwDataFrame): return DataFrame(obj._compliant_frame._with_version(Version.V2), level=obj._level) if isinstance(obj, NwLazyFrame): @@ -370,6 +370,17 @@ def from_native( ) -> DataFrame[IntoDataFrameT]: ... +@overload +def from_native( + native_object: IntoLazyFrameT, + *, + pass_through: Literal[False] = ..., + eager_only: Literal[False] = ..., + series_only: Literal[False] = ..., + allow_series: None = ..., +) -> LazyFrame[IntoLazyFrameT]: ... + + @overload def from_native( native_object: IntoDataFrameT, @@ -416,14 +427,19 @@ def from_native( def from_native( # noqa: D417 - native_object: IntoFrameT | IntoFrame | IntoSeriesT | IntoSeries | T, + native_object: IntoDataFrameT + | IntoLazyFrameT + | IntoFrame + | IntoSeriesT + | IntoSeries + | T, *, pass_through: bool = False, eager_only: bool = False, series_only: bool = False, allow_series: bool | None = None, **kwds: Any, -) -> LazyFrame[IntoFrameT] | DataFrame[IntoFrameT] | Series[IntoSeriesT] | T: +) -> LazyFrame[IntoLazyFrameT] | DataFrame[IntoDataFrameT] | Series[IntoSeriesT] | T: """Convert `native_object` to Narwhals Dataframe, Lazyframe, or Series. Arguments: @@ -480,8 +496,8 @@ def to_native( ) -> IntoDataFrameT: ... @overload def to_native( - narwhals_object: LazyFrame[IntoFrameT], *, pass_through: Literal[False] = ... -) -> IntoFrameT: ... + narwhals_object: LazyFrame[IntoLazyFrameT], *, pass_through: Literal[False] = ... +) -> IntoLazyFrameT: ... @overload def to_native( narwhals_object: Series[IntoSeriesT], *, pass_through: Literal[False] = ... @@ -492,11 +508,11 @@ def to_native(narwhals_object: Any, *, pass_through: bool) -> Any: ... def to_native( narwhals_object: DataFrame[IntoDataFrameT] - | LazyFrame[IntoFrameT] + | LazyFrame[IntoLazyFrameT] | Series[IntoSeriesT], *, pass_through: bool = False, -) -> IntoFrameT | IntoSeriesT | Any: +) -> IntoDataFrameT | IntoLazyFrameT | IntoSeriesT | Any: """Convert Narwhals object to native one. Arguments: diff --git a/narwhals/stable/v2/typing.py b/narwhals/stable/v2/typing.py index 16d42804c2..ac45701fdd 100644 --- a/narwhals/stable/v2/typing.py +++ b/narwhals/stable/v2/typing.py @@ -4,6 +4,7 @@ if TYPE_CHECKING: import sys + from collections.abc import Iterable, Sized from narwhals.stable.v2 import DataFrame, LazyFrame @@ -23,8 +24,13 @@ def columns(self) -> Any: ... def join(self, *args: Any, **kwargs: Any) -> Any: ... - class NativeSeries(Protocol): - def __len__(self) -> int: ... + class NativeDataFrame(Sized, NativeFrame, Protocol): ... + + class NativeLazyFrame(NativeFrame, Protocol): + def explain(self, *args: Any, **kwargs: Any) -> Any: ... + + class NativeSeries(Sized, Iterable[Any], Protocol): + def filter(self, *args: Any, **kwargs: Any) -> Any: ... IntoExpr: TypeAlias = Union["Expr", str, "Series[Any]"] @@ -37,7 +43,7 @@ def __len__(self) -> int: ... `nw.Expr`, e.g. `df.select('a')`. """ -IntoDataFrame: TypeAlias = Union["NativeFrame", "DataFrame[Any]"] +IntoDataFrame: TypeAlias = "NativeDataFrame" """Anything which can be converted to a Narwhals DataFrame. Use this if your function accepts a narwhalifiable object but doesn't care about its backend. @@ -50,7 +56,9 @@ def __len__(self) -> int: ... ... return df.shape """ -IntoFrame: TypeAlias = Union["NativeFrame", "DataFrame[Any]", "LazyFrame[Any]"] +IntoLazyFrame: TypeAlias = "NativeLazyFrame" + +IntoFrame: TypeAlias = Union["IntoDataFrame", "IntoLazyFrame"] """Anything which can be converted to a Narwhals DataFrame or LazyFrame. Use this if your function can accept an object which can be converted to either @@ -78,7 +86,7 @@ def __len__(self) -> int: ... ... return df.columns """ -IntoSeries: TypeAlias = Union["Series[Any]", "NativeSeries"] +IntoSeries: TypeAlias = "NativeSeries" """Anything which can be converted to a Narwhals Series. Use this if your function can accept an object which can be converted to `nw.Series` From c102992ec7f8ea52e17e9f46fad01ee85af8f990 Mon Sep 17 00:00:00 2001 From: dangotbanned <125183946+dangotbanned@users.noreply.github.com> Date: Tue, 5 Aug 2025 18:53:18 +0000 Subject: [PATCH 06/19] fix(typing): avoid `[assignment]` --- tests/v1_test.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/v1_test.py b/tests/v1_test.py index ae0591e36a..7f7ebd0079 100644 --- a/tests/v1_test.py +++ b/tests/v1_test.py @@ -473,8 +473,8 @@ def test_renamed_taxicab_norm_dataframe() -> None: result = nw_v1.from_native(pa.table({"a": [1, 2, 3, -4, 5]}))._l1_norm() expected = {"a": [15]} assert_equal_data(result, expected) - result = nw_v1.from_native(pa.table({"a": [1, 2, 3, -4, 5]})).lazy()._l1_norm() - assert_equal_data(result, expected) + result_lazy = nw_v1.from_native(pa.table({"a": [1, 2, 3, -4, 5]})).lazy()._l1_norm() + assert_equal_data(result_lazy, expected) def test_renamed_taxicab_norm_dataframe_narwhalify() -> None: From c0e08a982a47cb90aa96d9b6197fdd00be2d3373 Mon Sep 17 00:00:00 2001 From: dangotbanned <125183946+dangotbanned@users.noreply.github.com> Date: Tue, 5 Aug 2025 18:56:28 +0000 Subject: [PATCH 07/19] fix(typing): Avoid confusing mypy --- tests/tpch_q1_test.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/tpch_q1_test.py b/tests/tpch_q1_test.py index d8b9882a61..97a3fa0dcc 100644 --- a/tests/tpch_q1_test.py +++ b/tests/tpch_q1_test.py @@ -105,7 +105,7 @@ def test_q1_w_generic_funcs(library: str) -> None: if library == "pandas" and PANDAS_VERSION < (1, 5): pytest.skip() elif library == "pandas": - df_raw: IntoFrame = pd.read_csv("tests/data/lineitem.csv") + df_raw: pd.DataFrame | pl.DataFrame = pd.read_csv("tests/data/lineitem.csv") else: pytest.importorskip("polars") import polars as pl From 89095ceb6563e818e8f9c021bc76e730870145e0 Mon Sep 17 00:00:00 2001 From: dangotbanned <125183946+dangotbanned@users.noreply.github.com> Date: Tue, 5 Aug 2025 21:13:23 +0000 Subject: [PATCH 08/19] oooooh i think we're close --- narwhals/_compliant/dataframe.py | 17 ++++++++++------- narwhals/_compliant/typing.py | 2 ++ narwhals/_interchange/dataframe.py | 12 +++++++++--- narwhals/_sql/dataframe.py | 10 +++++++--- narwhals/_utils.py | 11 +++-------- narwhals/dataframe.py | 12 ++++++------ narwhals/stable/v1/__init__.py | 9 +++++---- narwhals/stable/v1/typing.py | 22 ++++++++++++++-------- narwhals/translate.py | 20 +++++++++++++------- narwhals/typing.py | 8 +++----- tests/translate/from_native_test.py | 2 +- 11 files changed, 73 insertions(+), 52 deletions(-) diff --git a/narwhals/_compliant/dataframe.py b/narwhals/_compliant/dataframe.py index 2e8345d5f3..ee7c5f0f70 100644 --- a/narwhals/_compliant/dataframe.py +++ b/narwhals/_compliant/dataframe.py @@ -13,6 +13,7 @@ EagerSeriesT, NativeDataFrameT, NativeFrameT, + NativeLazyFrameT, NativeSeriesT, ) from narwhals._translate import ( @@ -261,12 +262,12 @@ def write_parquet(self, file: str | Path | BytesIO) -> None: ... class CompliantLazyFrame( - _StoresNative[NativeFrameT], - FromNative[NativeFrameT], + _StoresNative[NativeLazyFrameT], + FromNative[NativeLazyFrameT], ToNarwhals[ToNarwhalsT_co], - Protocol[CompliantExprT_contra, NativeFrameT, ToNarwhalsT_co], + Protocol[CompliantExprT_contra, NativeLazyFrameT, ToNarwhalsT_co], ): - _native_frame: NativeFrameT + _native_frame: NativeLazyFrameT _implementation: Implementation _version: Version @@ -274,7 +275,9 @@ def __narwhals_lazyframe__(self) -> Self: ... def __narwhals_namespace__(self) -> Any: ... @classmethod - def from_native(cls, data: NativeFrameT, /, *, context: _LimitedContext) -> Self: ... + def from_native( + cls, data: NativeLazyFrameT, /, *, context: _LimitedContext + ) -> Self: ... def simple_select(self, *column_names: str) -> Self: """`select` where all args are column names.""" @@ -290,7 +293,7 @@ def aggregate(self, *exprs: CompliantExprT_contra) -> Self: def _with_version(self, version: Version) -> Self: ... @property - def native(self) -> NativeFrameT: + def native(self) -> NativeLazyFrameT: return self._native_frame @property @@ -357,7 +360,7 @@ class EagerDataFrame( CompliantDataFrame[ EagerSeriesT, EagerExprT, NativeDataFrameT, "DataFrame[NativeDataFrameT]" ], - CompliantLazyFrame[EagerExprT, NativeDataFrameT, "DataFrame[NativeDataFrameT]"], + CompliantLazyFrame[EagerExprT, "Incomplete", "DataFrame[NativeDataFrameT]"], ValidateBackendVersion, Protocol[EagerSeriesT, EagerExprT, NativeDataFrameT, NativeSeriesT], ): diff --git a/narwhals/_compliant/typing.py b/narwhals/_compliant/typing.py index fd9bcc5546..dbd6f9f7a8 100644 --- a/narwhals/_compliant/typing.py +++ b/narwhals/_compliant/typing.py @@ -23,6 +23,7 @@ from narwhals._compliant.window import WindowInputs from narwhals.typing import ( FillNullStrategy, + IntoLazyFrame, NativeDataFrame, NativeFrame, NativeSeries, @@ -92,6 +93,7 @@ class ScalarKwargs(TypedDict, total=False): "NativeSeriesT_contra", bound="NativeSeries", contravariant=True ) NativeDataFrameT = TypeVar("NativeDataFrameT", bound="NativeDataFrame") +NativeLazyFrameT = TypeVar("NativeLazyFrameT", bound="IntoLazyFrame") NativeFrameT = TypeVar("NativeFrameT", bound="NativeFrame") NativeFrameT_co = TypeVar("NativeFrameT_co", bound="NativeFrame", covariant=True) NativeFrameT_contra = TypeVar( diff --git a/narwhals/_interchange/dataframe.py b/narwhals/_interchange/dataframe.py index a222c9094e..8995111ee1 100644 --- a/narwhals/_interchange/dataframe.py +++ b/narwhals/_interchange/dataframe.py @@ -1,18 +1,20 @@ from __future__ import annotations import enum -from typing import TYPE_CHECKING, Any, NoReturn +from typing import TYPE_CHECKING, Any, NoReturn, Protocol from narwhals._utils import Version, parse_version if TYPE_CHECKING: import pandas as pd import pyarrow as pa - from typing_extensions import Self + from typing_extensions import Self, TypeIs from narwhals._interchange.series import InterchangeSeries from narwhals.dtypes import DType - from narwhals.typing import DataFrameLike + + class DataFrameLike(Protocol): + def __dataframe__(self, *args: Any, **kwargs: Any) -> Any: ... class DtypeKind(enum.IntEnum): @@ -153,3 +155,7 @@ def select(self, *exprs: str) -> Self: # pragma: no cover "at https://github.com/narwhals-dev/narwhals/issues." ) raise NotImplementedError(msg) + + +def _supports_dataframe_interchange(obj: Any) -> TypeIs[DataFrameLike]: + return hasattr(obj, "__dataframe__") diff --git a/narwhals/_sql/dataframe.py b/narwhals/_sql/dataframe.py index 540ed7d0d8..356a77373f 100644 --- a/narwhals/_sql/dataframe.py +++ b/narwhals/_sql/dataframe.py @@ -3,7 +3,11 @@ from typing import TYPE_CHECKING, Any, Protocol from narwhals._compliant.dataframe import CompliantLazyFrame -from narwhals._compliant.typing import CompliantExprT_contra, NativeExprT, NativeFrameT +from narwhals._compliant.typing import ( + CompliantExprT_contra, + NativeExprT, + NativeLazyFrameT, +) from narwhals._translate import ToNarwhalsT_co from narwhals._utils import check_columns_exist @@ -20,8 +24,8 @@ class SQLLazyFrame( - CompliantLazyFrame[CompliantExprT_contra, NativeFrameT, ToNarwhalsT_co], - Protocol[CompliantExprT_contra, NativeFrameT, ToNarwhalsT_co], + CompliantLazyFrame[CompliantExprT_contra, NativeLazyFrameT, ToNarwhalsT_co], + Protocol[CompliantExprT_contra, NativeLazyFrameT, ToNarwhalsT_co], ): def _evaluate_window_expr( self, diff --git a/narwhals/_utils.py b/narwhals/_utils.py index 64bd1bfb69..0602a7ecfd 100644 --- a/narwhals/_utils.py +++ b/narwhals/_utils.py @@ -72,7 +72,7 @@ NativeFrameT_co, NativeSeriesT_co, ) - from narwhals._compliant.typing import EvalNames + from narwhals._compliant.typing import EvalNames, NativeLazyFrameT from narwhals._namespace import EagerAllowedImplementation, Namespace from narwhals._translate import ArrowStreamExportable, IntoArrowTable, ToNarwhalsT_co from narwhals.dataframe import DataFrame, LazyFrame @@ -82,7 +82,6 @@ CompliantDataFrame, CompliantLazyFrame, CompliantSeries, - DataFrameLike, DTypes, IntoSeriesT, MultiIndexSelector, @@ -1627,8 +1626,8 @@ def is_compliant_dataframe( def is_compliant_lazyframe( - obj: CompliantLazyFrame[CompliantExprT, NativeFrameT_co, ToNarwhalsT_co] | Any, -) -> TypeIs[CompliantLazyFrame[CompliantExprT, NativeFrameT_co, ToNarwhalsT_co]]: + obj: CompliantLazyFrame[CompliantExprT, NativeLazyFrameT, ToNarwhalsT_co] | Any, +) -> TypeIs[CompliantLazyFrame[CompliantExprT, NativeLazyFrameT, ToNarwhalsT_co]]: return _hasattr_static(obj, "__narwhals_lazyframe__") @@ -1664,10 +1663,6 @@ def has_native_namespace(obj: Any) -> TypeIs[SupportsNativeNamespace]: return _hasattr_static(obj, "__native_namespace__") -def _supports_dataframe_interchange(obj: Any) -> TypeIs[DataFrameLike]: - return hasattr(obj, "__dataframe__") - - def supports_arrow_c_stream(obj: Any) -> TypeIs[ArrowStreamExportable]: return _hasattr_static(obj, "__arrow_c_stream__") diff --git a/narwhals/dataframe.py b/narwhals/dataframe.py index 52ea9a4c72..ce780d23b5 100644 --- a/narwhals/dataframe.py +++ b/narwhals/dataframe.py @@ -74,6 +74,7 @@ IntoDataFrame, IntoExpr, IntoFrame, + IntoLazyFrame, JoinStrategy, LazyUniqueKeepStrategy, MultiColSelector as _MultiColSelector, @@ -89,7 +90,7 @@ PS = ParamSpec("PS") _FrameT = TypeVar("_FrameT", bound="IntoFrame") -FrameT = TypeVar("FrameT", bound="IntoFrame") +LazyFrameT = TypeVar("LazyFrameT", bound="IntoLazyFrame") DataFrameT = TypeVar("DataFrameT", bound="IntoDataFrame") R = TypeVar("R") @@ -463,8 +464,7 @@ def _lazyframe(self) -> type[LazyFrame[Any]]: def __init__(self, df: Any, *, level: Literal["full", "lazy", "interchange"]) -> None: self._level: Literal["full", "lazy", "interchange"] = level - # NOTE: Interchange support (`DataFrameLike`) is the source of the error - self._compliant_frame: CompliantDataFrame[Any, Any, DataFrameT, Self] # type: ignore[type-var] + self._compliant_frame: CompliantDataFrame[Any, Any, DataFrameT, Self] if is_compliant_dataframe(df): self._compliant_frame = df.__narwhals_dataframe__() else: # pragma: no cover @@ -2370,7 +2370,7 @@ def explode(self, columns: str | Sequence[str], *more_columns: str) -> Self: return super().explode(columns, *more_columns) -class LazyFrame(BaseFrame[FrameT]): +class LazyFrame(BaseFrame[LazyFrameT]): """Narwhals LazyFrame, backed by a native lazyframe. Warning: @@ -2436,7 +2436,7 @@ def _dataframe(self) -> type[DataFrame[Any]]: def __init__(self, df: Any, *, level: Literal["full", "lazy", "interchange"]) -> None: self._level = level - self._compliant_frame: CompliantLazyFrame[Any, FrameT, Self] # type: ignore[type-var] + self._compliant_frame: CompliantLazyFrame[Any, LazyFrameT, Self] if is_compliant_lazyframe(df): self._compliant_frame = df.__narwhals_lazyframe__() else: # pragma: no cover @@ -2545,7 +2545,7 @@ def collect( self._compliant_frame.collect(backend=eager_backend, **kwargs), level="full" ) - def to_native(self) -> FrameT: + def to_native(self) -> LazyFrameT: """Convert Narwhals LazyFrame to native one. Returns: diff --git a/narwhals/stable/v1/__init__.py b/narwhals/stable/v1/__init__.py index 018c476ff3..4fdc46f644 100644 --- a/narwhals/stable/v1/__init__.py +++ b/narwhals/stable/v1/__init__.py @@ -59,8 +59,8 @@ UInt128, Unknown, ) +from narwhals.stable.v1.typing import IntoDataFrameT, IntoFrameT, IntoLazyFrameT from narwhals.translate import _from_native_impl, get_native_namespace, to_py_scalar -from narwhals.typing import IntoDataFrameT, IntoFrameT, IntoLazyFrameT if TYPE_CHECKING: from collections.abc import Iterable, Mapping, Sequence @@ -93,7 +93,8 @@ IntoSeriesT = TypeVar("IntoSeriesT", bound="IntoSeries", default=Any) -class DataFrame(NwDataFrame[IntoDataFrameT]): +# NOTE legit +class DataFrame(NwDataFrame[IntoDataFrameT]): # type: ignore[type-var] _version = Version.V1 @inherit_doc(NwDataFrame) @@ -453,7 +454,7 @@ def __init__( @overload -def _stableify(obj: NwDataFrame[IntoDataFrameT]) -> DataFrame[IntoDataFrameT]: ... +def _stableify(obj: NwDataFrame[IntoDataFrameT]) -> DataFrame[IntoDataFrameT]: ... # type: ignore[type-var] @overload def _stableify(obj: NwLazyFrame[IntoLazyFrameT]) -> LazyFrame[IntoLazyFrameT]: ... @overload @@ -463,7 +464,7 @@ def _stableify(obj: NwExpr) -> Expr: ... def _stableify( - obj: NwDataFrame[IntoDataFrameT] + obj: NwDataFrame[IntoDataFrameT] # type: ignore[type-var] | NwLazyFrame[IntoLazyFrameT] | NwSeries[IntoSeriesT] | NwExpr, diff --git a/narwhals/stable/v1/typing.py b/narwhals/stable/v1/typing.py index d698dae520..7581910607 100644 --- a/narwhals/stable/v1/typing.py +++ b/narwhals/stable/v1/typing.py @@ -4,7 +4,7 @@ if TYPE_CHECKING: import sys - from collections.abc import Sized + from collections.abc import Iterable, Sized from narwhals.stable.v1 import DataFrame, LazyFrame @@ -26,8 +26,11 @@ def join(self, *args: Any, **kwargs: Any) -> Any: ... class NativeDataFrame(Sized, NativeFrame, Protocol): ... - class NativeSeries(Protocol): - def __len__(self) -> int: ... + class NativeLazyFrame(NativeFrame, Protocol): + def explain(self, *args: Any, **kwargs: Any) -> Any: ... + + class NativeSeries(Sized, Iterable[Any], Protocol): + def filter(self, *args: Any, **kwargs: Any) -> Any: ... class DataFrameLike(Protocol): def __dataframe__(self, *args: Any, **kwargs: Any) -> Any: ... @@ -43,7 +46,8 @@ def __dataframe__(self, *args: Any, **kwargs: Any) -> Any: ... `nw.Expr`, e.g. `df.select('a')`. """ -IntoDataFrame: TypeAlias = Union["NativeDataFrame", "DataFrame[Any]", "DataFrameLike"] + +IntoDataFrame: TypeAlias = Union["NativeDataFrame", "DataFrameLike"] """Anything which can be converted to a Narwhals DataFrame. Use this if your function accepts a narwhalifiable object but doesn't care about its backend. @@ -56,9 +60,9 @@ def __dataframe__(self, *args: Any, **kwargs: Any) -> Any: ... ... return df.shape """ -IntoFrame: TypeAlias = Union[ - "NativeFrame", "DataFrame[Any]", "LazyFrame[Any]", "DataFrameLike" -] +IntoLazyFrame: TypeAlias = "NativeLazyFrame" + +IntoFrame: TypeAlias = Union["IntoDataFrame", "IntoLazyFrame"] """Anything which can be converted to a Narwhals DataFrame or LazyFrame. Use this if your function can accept an object which can be converted to either @@ -86,7 +90,7 @@ def __dataframe__(self, *args: Any, **kwargs: Any) -> Any: ... ... return df.columns """ -IntoSeries: TypeAlias = Union["Series[Any]", "NativeSeries"] +IntoSeries: TypeAlias = "NativeSeries" """Anything which can be converted to a Narwhals Series. Use this if your function can accept an object which can be converted to `nw.Series` @@ -129,6 +133,8 @@ def __dataframe__(self, *args: Any, **kwargs: Any) -> Any: ... ... return df.with_columns(c=df["a"] + 1).to_native() """ +IntoLazyFrameT = TypeVar("IntoLazyFrameT", bound="IntoLazyFrame") + FrameT = TypeVar("FrameT", "DataFrame[Any]", "LazyFrame[Any]") """TypeVar bound to Narwhals DataFrame or Narwhals LazyFrame. diff --git a/narwhals/translate.py b/narwhals/translate.py index 70767af6e0..ef1e3f5f1a 100644 --- a/narwhals/translate.py +++ b/narwhals/translate.py @@ -34,9 +34,10 @@ from narwhals.series import Series from narwhals.typing import ( DataFrameT, + IntoDataFrame, IntoDataFrameT, IntoFrame, - IntoFrameT, + IntoLazyFrame, IntoLazyFrameT, IntoSeries, IntoSeriesT, @@ -56,8 +57,8 @@ def to_native( ) -> IntoDataFrameT: ... @overload def to_native( - narwhals_object: LazyFrame[IntoFrameT], *, pass_through: Literal[False] = ... -) -> IntoFrameT: ... + narwhals_object: LazyFrame[IntoLazyFrameT], *, pass_through: Literal[False] = ... +) -> IntoLazyFrameT: ... @overload def to_native( narwhals_object: Series[IntoSeriesT], *, pass_through: Literal[False] = ... @@ -68,11 +69,11 @@ def to_native(narwhals_object: Any, *, pass_through: bool) -> Any: ... def to_native( narwhals_object: DataFrame[IntoDataFrameT] - | LazyFrame[IntoFrameT] + | LazyFrame[IntoLazyFrameT] | Series[IntoSeriesT], *, pass_through: bool = False, -) -> IntoDataFrameT | IntoFrameT | IntoSeriesT | Any: +) -> IntoDataFrameT | IntoLazyFrameT | IntoSeriesT | Any: """Convert Narwhals object to native one. Arguments: @@ -325,8 +326,8 @@ def _from_native_impl( # noqa: C901, PLR0911, PLR0912, PLR0915 allow_series: bool | None = None, version: Version, ) -> Any: + from narwhals._interchange.dataframe import _supports_dataframe_interchange from narwhals._utils import ( - _supports_dataframe_interchange, is_compliant_dataframe, is_compliant_lazyframe, is_compliant_series, @@ -575,7 +576,12 @@ def get_native_namespace( def _get_native_namespace_single_obj( - obj: DataFrame[Any] | LazyFrame[Any] | Series[Any] | IntoFrame | IntoSeries, + obj: DataFrame[Any] + | LazyFrame[Any] + | Series[Any] + | IntoDataFrame + | IntoLazyFrame + | IntoSeries, ) -> Any: if has_native_namespace(obj): return obj.__native_namespace__() diff --git a/narwhals/typing.py b/narwhals/typing.py index dcd43360ba..b9a6552149 100644 --- a/narwhals/typing.py +++ b/narwhals/typing.py @@ -14,6 +14,7 @@ from typing_extensions import TypeAlias from narwhals import dtypes + from narwhals._namespace import _NativeIbis from narwhals.dataframe import DataFrame, LazyFrame from narwhals.expr import Expr from narwhals.series import Series @@ -35,9 +36,6 @@ def explain(self, *args: Any, **kwargs: Any) -> Any: ... class NativeSeries(Sized, Iterable[Any], Protocol): def filter(self, *args: Any, **kwargs: Any) -> Any: ... - class DataFrameLike(Protocol): - def __dataframe__(self, *args: Any, **kwargs: Any) -> Any: ... - class SupportsNativeNamespace(Protocol): def __native_namespace__(self) -> ModuleType: ... @@ -110,7 +108,7 @@ def Binary(self) -> type[dtypes.Binary]: ... which will be interpreted as a `nw.Expr`, e.g. `df.select('a')`. """ -IntoDataFrame: TypeAlias = Union["NativeDataFrame", "DataFrameLike"] +IntoDataFrame: TypeAlias = "NativeDataFrame" """Anything which can be converted to a Narwhals DataFrame. Use this if your function accepts a narwhalifiable object but doesn't care about its backend. @@ -123,7 +121,7 @@ def Binary(self) -> type[dtypes.Binary]: ... ... return df.shape """ -IntoLazyFrame: TypeAlias = "NativeLazyFrame" +IntoLazyFrame: TypeAlias = Union["NativeLazyFrame", "_NativeIbis"] IntoFrame: TypeAlias = Union["IntoDataFrame", "IntoLazyFrame"] """Anything which can be converted to a Narwhals DataFrame or LazyFrame. diff --git a/tests/translate/from_native_test.py b/tests/translate/from_native_test.py index 8a5c568190..c0676b6872 100644 --- a/tests/translate/from_native_test.py +++ b/tests/translate/from_native_test.py @@ -301,7 +301,7 @@ def __dataframe__(self) -> None: # pragma: no cover result = nw.from_native(mockdf, pass_through=True) assert result is mockdf with pytest.raises(TypeError): - nw.from_native(mockdf) + nw.from_native(mockdf) # type: ignore[call-overload] def test_from_native_strict_native_series() -> None: From 56102a918bf73fcca258c11aa3b2fa863de86f17 Mon Sep 17 00:00:00 2001 From: dangotbanned <125183946+dangotbanned@users.noreply.github.com> Date: Mon, 18 Aug 2025 11:43:42 +0000 Subject: [PATCH 09/19] fix: avoid introducing `__all__` extension oops `ruff` autofix bad --- narwhals/stable/v1/__init__.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/narwhals/stable/v1/__init__.py b/narwhals/stable/v1/__init__.py index d090bd661c..eb20b58476 100644 --- a/narwhals/stable/v1/__init__.py +++ b/narwhals/stable/v1/__init__.py @@ -59,7 +59,7 @@ UInt128, Unknown, ) -from narwhals.stable.v1.typing import IntoDataFrameT, IntoFrameT, IntoLazyFrameT +from narwhals.stable.v1.typing import IntoDataFrameT, IntoLazyFrameT from narwhals.translate import _from_native_impl, get_native_namespace, to_py_scalar if TYPE_CHECKING: @@ -1375,7 +1375,6 @@ def scan_parquet( "Int32", "Int64", "Int128", - "IntoFrameT", "LazyFrame", "List", "Object", From 3e14692b0b2eaaa9f6d90d68fa5bcd0e97a17b47 Mon Sep 17 00:00:00 2001 From: dangotbanned <125183946+dangotbanned@users.noreply.github.com> Date: Mon, 18 Aug 2025 11:46:05 +0000 Subject: [PATCH 10/19] refactor: rename external import guard --- narwhals/_interchange/dataframe.py | 2 +- narwhals/translate.py | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/narwhals/_interchange/dataframe.py b/narwhals/_interchange/dataframe.py index 8995111ee1..c2115370dc 100644 --- a/narwhals/_interchange/dataframe.py +++ b/narwhals/_interchange/dataframe.py @@ -157,5 +157,5 @@ def select(self, *exprs: str) -> Self: # pragma: no cover raise NotImplementedError(msg) -def _supports_dataframe_interchange(obj: Any) -> TypeIs[DataFrameLike]: +def supports_dataframe_interchange(obj: Any) -> TypeIs[DataFrameLike]: return hasattr(obj, "__dataframe__") diff --git a/narwhals/translate.py b/narwhals/translate.py index ef1e3f5f1a..572961ccb7 100644 --- a/narwhals/translate.py +++ b/narwhals/translate.py @@ -326,7 +326,7 @@ def _from_native_impl( # noqa: C901, PLR0911, PLR0912, PLR0915 allow_series: bool | None = None, version: Version, ) -> Any: - from narwhals._interchange.dataframe import _supports_dataframe_interchange + from narwhals._interchange.dataframe import supports_dataframe_interchange from narwhals._utils import ( is_compliant_dataframe, is_compliant_lazyframe, @@ -511,7 +511,7 @@ def _from_native_impl( # noqa: C901, PLR0911, PLR0912, PLR0915 return ns_spark.compliant.from_native(native_object).to_narwhals() # Interchange protocol - if _supports_dataframe_interchange(native_object): + if supports_dataframe_interchange(native_object): from narwhals._interchange.dataframe import InterchangeFrame if eager_only or series_only: From eb7929a8b5e8f7f8948a9cb35a29df51ffd207b0 Mon Sep 17 00:00:00 2001 From: dangotbanned <125183946+dangotbanned@users.noreply.github.com> Date: Mon, 18 Aug 2025 11:48:43 +0000 Subject: [PATCH 11/19] refactor: Reuse `v1.typing.DataFrameLike` def --- narwhals/_interchange/dataframe.py | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/narwhals/_interchange/dataframe.py b/narwhals/_interchange/dataframe.py index c2115370dc..a07f1fad4c 100644 --- a/narwhals/_interchange/dataframe.py +++ b/narwhals/_interchange/dataframe.py @@ -1,7 +1,7 @@ from __future__ import annotations import enum -from typing import TYPE_CHECKING, Any, NoReturn, Protocol +from typing import TYPE_CHECKING, Any, NoReturn from narwhals._utils import Version, parse_version @@ -12,9 +12,7 @@ from narwhals._interchange.series import InterchangeSeries from narwhals.dtypes import DType - - class DataFrameLike(Protocol): - def __dataframe__(self, *args: Any, **kwargs: Any) -> Any: ... + from narwhals.stable.v1.typing import DataFrameLike class DtypeKind(enum.IntEnum): From ea96b84540ef9f50467cf59cead716baeebc8690 Mon Sep 17 00:00:00 2001 From: dangotbanned <125183946+dangotbanned@users.noreply.github.com> Date: Mon, 18 Aug 2025 11:55:23 +0000 Subject: [PATCH 12/19] refactor(typing): Realign, simplify `get_native_namespace` --- narwhals/translate.py | 14 +++----------- 1 file changed, 3 insertions(+), 11 deletions(-) diff --git a/narwhals/translate.py b/narwhals/translate.py index 572961ccb7..fdace19827 100644 --- a/narwhals/translate.py +++ b/narwhals/translate.py @@ -34,10 +34,9 @@ from narwhals.series import Series from narwhals.typing import ( DataFrameT, - IntoDataFrame, + Frame, IntoDataFrameT, IntoFrame, - IntoLazyFrame, IntoLazyFrameT, IntoSeries, IntoSeriesT, @@ -541,9 +540,7 @@ def _from_native_impl( # noqa: C901, PLR0911, PLR0912, PLR0915 return native_object -def get_native_namespace( - *obj: DataFrame[Any] | LazyFrame[Any] | Series[Any] | IntoFrame | IntoSeries, -) -> Any: +def get_native_namespace(*obj: Frame | Series[Any] | IntoFrame | IntoSeries) -> Any: """Get native namespace from object. Arguments: @@ -576,12 +573,7 @@ def get_native_namespace( def _get_native_namespace_single_obj( - obj: DataFrame[Any] - | LazyFrame[Any] - | Series[Any] - | IntoDataFrame - | IntoLazyFrame - | IntoSeries, + obj: Frame | Series[Any] | IntoFrame | IntoSeries, ) -> Any: if has_native_namespace(obj): return obj.__native_namespace__() From 33178dcc9c41671090d7d293bb3334ce69d79874 Mon Sep 17 00:00:00 2001 From: dangotbanned <125183946+dangotbanned@users.noreply.github.com> Date: Mon, 18 Aug 2025 12:09:41 +0000 Subject: [PATCH 13/19] test(typing): Ensure type ignore only on lazy branch --- tests/frame/with_row_index_test.py | 19 ++++++------------- 1 file changed, 6 insertions(+), 13 deletions(-) diff --git a/tests/frame/with_row_index_test.py b/tests/frame/with_row_index_test.py index 79f43ceeab..c9d5f59fe0 100644 --- a/tests/frame/with_row_index_test.py +++ b/tests/frame/with_row_index_test.py @@ -1,6 +1,5 @@ from __future__ import annotations -from contextlib import nullcontext as does_not_raise from typing import TYPE_CHECKING import pytest @@ -49,16 +48,10 @@ def test_with_row_index_lazy( def test_with_row_index_lazy_exception(constructor: Constructor) -> None: frame = nw.from_native(constructor(data)) - msg = r"(LazyFrame\.)?with_row_index\(\) missing 1 required keyword-only argument: 'order_by'$" - context = ( - pytest.raises(TypeError, match=msg) - if isinstance(frame, nw.LazyFrame) - else does_not_raise() - ) - - with context: - result = frame.with_row_index() # type: ignore[call-arg] - - expected = {"index": [0, 1], **data} - assert_equal_data(result, expected) + if isinstance(frame, nw.LazyFrame): + with pytest.raises(TypeError, match=msg): + frame.with_row_index() # type: ignore[call-arg] + else: + result = frame.with_row_index() + assert_equal_data(result, {"index": [0, 1], **data}) From 7b329c5daf7acfc8d0e7d33bbb83acb47e841692 Mon Sep 17 00:00:00 2001 From: dangotbanned <125183946+dangotbanned@users.noreply.github.com> Date: Mon, 18 Aug 2025 12:20:16 +0000 Subject: [PATCH 14/19] test(typing): Fix dask `union-attr` --- tests/expr_and_series/dt/datetime_attributes_test.py | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/tests/expr_and_series/dt/datetime_attributes_test.py b/tests/expr_and_series/dt/datetime_attributes_test.py index 2fbe13149c..c7bf55e7c0 100644 --- a/tests/expr_and_series/dt/datetime_attributes_test.py +++ b/tests/expr_and_series/dt/datetime_attributes_test.py @@ -1,12 +1,16 @@ from __future__ import annotations from datetime import date, datetime +from typing import TYPE_CHECKING, cast import pytest import narwhals as nw from tests.utils import Constructor, ConstructorEager, assert_equal_data +if TYPE_CHECKING: + import dask.dataframe as dd + data = { "a": [datetime(2021, 3, 1, 12, 34, 56, 49000), datetime(2020, 1, 2, 2, 4, 14, 715000)] } @@ -119,7 +123,9 @@ def test_to_date(request: pytest.FixtureRequest, constructor: Constructor) -> No request.applymarker(pytest.mark.xfail) dates = {"a": [datetime(2001, 1, 1), None, datetime(2001, 1, 3)]} if "dask" in str(constructor): - df = nw.from_native(constructor(dates).astype({"a": "timestamp[ns][pyarrow]"})) # type: ignore[union-attr] + df_dask = cast("dd.DataFrame", constructor(dates)) + df_dask = cast("dd.DataFrame", df_dask.astype({"a": "timestamp[ns][pyarrow]"})) + df = nw.from_native(df_dask) else: df = nw.from_native(constructor(dates)) result = df.select(nw.col("a").dt.date()) From 9a7bf8f7f8f3e88bb2d604e2a1efabe90d83b493 Mon Sep 17 00:00:00 2001 From: dangotbanned <125183946+dangotbanned@users.noreply.github.com> Date: Mon, 18 Aug 2025 12:27:55 +0000 Subject: [PATCH 15/19] test(typing): repeat pandas assign cast fix --- tests/expr_and_series/struct_/field_test.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/expr_and_series/struct_/field_test.py b/tests/expr_and_series/struct_/field_test.py index fdfa8747ce..39d83247ee 100644 --- a/tests/expr_and_series/struct_/field_test.py +++ b/tests/expr_and_series/struct_/field_test.py @@ -20,7 +20,7 @@ def test_get_field_expr(request: pytest.FixtureRequest, constructor: Constructor df_native = constructor(data) if "pandas" in str(constructor): - df_native = df_native.assign( # type: ignore[union-attr] + df_native = cast("pd.DataFrame", df_native).assign( user=pd.Series( data["user"], dtype=pd.ArrowDtype( From dbe443222edb25059d7d06077c53b2ff616c44d0 Mon Sep 17 00:00:00 2001 From: dangotbanned <125183946+dangotbanned@users.noreply.github.com> Date: Wed, 20 Aug 2025 10:16:27 +0000 Subject: [PATCH 16/19] test: Call `.lazy()` before every join test https://github.com/narwhals-dev/narwhals/pull/2944#discussion_r2287577327 --- tests/frame/join_test.py | 295 +++++++++++++++++++++------------------ 1 file changed, 159 insertions(+), 136 deletions(-) diff --git a/tests/frame/join_test.py b/tests/frame/join_test.py index 5e202aa04a..5ed8b843e8 100644 --- a/tests/frame/join_test.py +++ b/tests/frame/join_test.py @@ -1,8 +1,13 @@ +"""Every join test needs to use `.lazy()` for typing. + +See https://github.com/narwhals-dev/narwhals/pull/2944#discussion_r2286264815 +""" + from __future__ import annotations import re from datetime import datetime -from typing import Any, Literal +from typing import TYPE_CHECKING, Any, Literal import pandas as pd import pytest @@ -16,6 +21,9 @@ assert_equal_data, ) +if TYPE_CHECKING: + from narwhals.typing import JoinStrategy + @pytest.mark.parametrize( ("df1", "df2", "expected", "on", "left_on", "right_on"), @@ -93,14 +101,10 @@ def test_full_join( right_on: None | str | list[str], constructor: Constructor, ) -> None: - df_left = nw.from_native(constructor(df1)) - df_right = nw.from_native(constructor(df2)) + df_left = nw.from_native(constructor(df1)).lazy() + df_right = nw.from_native(constructor(df2)).lazy() result = df_left.join( - df_right, # type: ignore[arg-type] - on=on, - left_on=left_on, - right_on=right_on, - how="full", + df_right, on=on, left_on=left_on, right_on=right_on, how="full" ).sort("id", nulls_last=True) assert_equal_data(result, expected) @@ -136,15 +140,15 @@ def test_inner_join_two_keys(constructor: Constructor) -> None: "zor ro": [7.0, 8.0, 9.0], "idx": [0, 1, 2], } - df = nw.from_native(constructor(data)) + df = nw.from_native(constructor(data)).lazy() df_right = df result = df.join( - df_right, # type: ignore[arg-type] + df_right, left_on=["antananarivo", "bob"], right_on=["antananarivo", "bob"], how="inner", ) - result_on = df.join(df_right, on=["antananarivo", "bob"], how="inner") # type: ignore[arg-type] + result_on = df.join(df_right, on=["antananarivo", "bob"], how="inner") result = result.sort("idx").drop("idx_right") result_on = result_on.sort("idx").drop("idx_right") expected = { @@ -165,15 +169,12 @@ def test_inner_join_single_key(constructor: Constructor) -> None: "zor ro": [7.0, 8.0, 9.0], "idx": [0, 1, 2], } - df = nw.from_native(constructor(data)) + df = nw.from_native(constructor(data)).lazy() df_right = df result = df.join( - df_right, # type: ignore[arg-type] - left_on="antananarivo", - right_on="antananarivo", - how="inner", + df_right, left_on="antananarivo", right_on="antananarivo", how="inner" ).sort("idx") - result_on = df.join(df_right, on="antananarivo", how="inner").sort("idx") # type: ignore[arg-type] + result_on = df.join(df_right, on="antananarivo", how="inner").sort("idx") result = result.drop("idx_right") result_on = result_on.drop("idx_right") expected = { @@ -192,8 +193,8 @@ def test_cross_join(constructor: Constructor) -> None: if "duckdb" in str(constructor) and DUCKDB_VERSION < (1, 1, 4): pytest.skip() data = {"antananarivo": [1, 3, 2]} - df = nw.from_native(constructor(data)) - result = df.join(df, how="cross").sort("antananarivo", "antananarivo_right") # type: ignore[arg-type] + df = nw.from_native(constructor(data)).lazy() + result = df.join(df, how="cross").sort("antananarivo", "antananarivo_right") expected = { "antananarivo": [1, 1, 1, 2, 2, 2, 3, 3, 3], "antananarivo_right": [1, 2, 3, 1, 2, 3, 1, 2, 3], @@ -203,7 +204,7 @@ def test_cross_join(constructor: Constructor) -> None: with pytest.raises( ValueError, match="Can not pass `left_on`, `right_on` or `on` keys for cross join" ): - df.join(df, how="cross", left_on="antananarivo") # type: ignore[arg-type] + df.join(df, how="cross", left_on="antananarivo") @pytest.mark.parametrize("how", ["inner", "left"]) @@ -212,10 +213,10 @@ def test_suffix( constructor: Constructor, how: Literal["inner", "left"], suffix: str ) -> None: data = {"antananarivo": [1, 3, 2], "bob": [4, 4, 6], "zor ro": [7.0, 8.0, 9.0]} - df = nw.from_native(constructor(data)) + df = nw.from_native(constructor(data)).lazy() df_right = df result = df.join( - df_right, # type: ignore[arg-type] + df_right, left_on=["antananarivo", "bob"], right_on=["antananarivo", "bob"], how=how, @@ -230,8 +231,8 @@ def test_cross_join_suffix(constructor: Constructor, suffix: str) -> None: if "duckdb" in str(constructor) and DUCKDB_VERSION < (1, 1, 4): pytest.skip() data = {"antananarivo": [1, 3, 2]} - df = nw.from_native(constructor(data)) - result = df.join(df, how="cross", suffix=suffix).sort( # type: ignore[arg-type] + df = nw.from_native(constructor(data)).lazy() + result = df.join(df, how="cross", suffix=suffix).sort( "antananarivo", f"antananarivo{suffix}" ) expected = { @@ -280,9 +281,9 @@ def test_anti_join( expected: dict[str, list[Any]], ) -> None: data = {"antananarivo": [1, 3, 2], "bob": [4, 4, 6], "zor ro": [7.0, 8.0, 9.0]} - df = nw.from_native(constructor(data)) + df = nw.from_native(constructor(data)).lazy() other = df.filter(filter_expr) - result = df.join(other, how="anti", left_on=join_key, right_on=join_key) # type: ignore[arg-type] + result = df.join(other, how="anti", left_on=join_key, right_on=join_key) assert_equal_data(result, expected) @@ -318,9 +319,9 @@ def test_semi_join( expected: dict[str, list[Any]], ) -> None: data = {"antananarivo": [1, 3, 2], "bob": [4, 4, 6], "zor ro": [7.0, 8.0, 9.0]} - df = nw.from_native(constructor(data)) + df = nw.from_native(constructor(data)).lazy() other = df.filter(filter_expr) - result = df.join(other, how="semi", left_on=join_key, right_on=join_key).sort( # type: ignore[arg-type] + result = df.join(other, how="semi", left_on=join_key, right_on=join_key).sort( "antananarivo" ) assert_equal_data(result, expected) @@ -329,7 +330,7 @@ def test_semi_join( @pytest.mark.parametrize("how", ["right"]) def test_join_not_implemented(constructor: Constructor, how: str) -> None: data = {"antananarivo": [1, 3, 2], "bob": [4, 4, 6], "zor ro": [7.0, 8.0, 9.0]} - df = nw.from_native(constructor(data)) + df = nw.from_native(constructor(data)).lazy() with pytest.raises( NotImplementedError, @@ -337,7 +338,12 @@ def test_join_not_implemented(constructor: Constructor, how: str) -> None: f"Only the following join strategies are supported: ('inner', 'left', 'full', 'cross', 'anti', 'semi'); found '{how}'." ), ): - df.join(df, left_on="antananarivo", right_on="antananarivo", how=how) # type: ignore[arg-type] + df.join( + df, + left_on="antananarivo", + right_on="antananarivo", + how=how, # type: ignore[arg-type] + ) def test_left_join(constructor: Constructor) -> None: @@ -351,9 +357,9 @@ def test_left_join(constructor: Constructor) -> None: "co": [4.0, 5.0, 7.0], "idx": [0.0, 1.0, 2.0], } - df_left = nw.from_native(constructor(data_left)) - df_right = nw.from_native(constructor(data_right)) - result = df_left.join(df_right, left_on="bob", right_on="co", how="left") # type: ignore[arg-type] + df_left = nw.from_native(constructor(data_left)).lazy() + df_right = nw.from_native(constructor(data_right)).lazy() + result = df_left.join(df_right, left_on="bob", right_on="co", how="left") result = result.sort("idx") result = result.drop("idx_right") expected = { @@ -362,7 +368,7 @@ def test_left_join(constructor: Constructor) -> None: "idx": [0, 1, 2], "antananarivo_right": [1, 2, None], } - result_on_list = df_left.join(df_right, on=["antananarivo", "idx"], how="left") # type: ignore[arg-type] + result_on_list = df_left.join(df_right, on=["antananarivo", "idx"], how="left") result_on_list = result_on_list.sort("idx") expected_on_list = { "antananarivo": [1, 2, 3], @@ -377,10 +383,10 @@ def test_left_join(constructor: Constructor) -> None: def test_left_join_multiple_column(constructor: Constructor) -> None: data_left = {"antananarivo": [1, 2, 3], "bob": [4, 5, 6], "idx": [0, 1, 2]} data_right = {"antananarivo": [1, 2, 3], "c": [4, 5, 6], "idx": [0, 1, 2]} - df_left = nw.from_native(constructor(data_left)) - df_right = nw.from_native(constructor(data_right)) + df_left = nw.from_native(constructor(data_left)).lazy() + df_right = nw.from_native(constructor(data_right)).lazy() result = df_left.join( - df_right, # type: ignore[arg-type] + df_right, left_on=["antananarivo", "bob"], right_on=["antananarivo", "c"], how="left", @@ -404,9 +410,9 @@ def test_left_join_overlapping_column(constructor: Constructor) -> None: "d": [1.0, 4.0, 2.0], "idx": [0.0, 1.0, 2.0], } - df_left = nw.from_native(constructor(data_left)) - df_right = nw.from_native(constructor(data_right)) - result = df_left.join(df_right, left_on="bob", right_on="c", how="left").sort("idx") # type: ignore[arg-type] + df_left = nw.from_native(constructor(data_left)).lazy() + df_right = nw.from_native(constructor(data_right)).lazy() + result = df_left.join(df_right, left_on="bob", right_on="c", how="left").sort("idx") result = result.drop("idx_right") expected: dict[str, list[Any]] = { "antananarivo": [1, 2, 3], @@ -417,7 +423,7 @@ def test_left_join_overlapping_column(constructor: Constructor) -> None: "d_right": [1, 4, 2], } assert_equal_data(result, expected) - result = df_left.join(df_right, left_on="antananarivo", right_on="d", how="left") # type: ignore[arg-type] + result = df_left.join(df_right, left_on="antananarivo", right_on="d", how="left") result = result.sort("idx") result = result.drop("idx_right") expected = { @@ -432,35 +438,35 @@ def test_left_join_overlapping_column(constructor: Constructor) -> None: @pytest.mark.parametrize("how", ["inner", "left", "semi", "anti"]) -def test_join_keys_exceptions(constructor: Constructor, how: str) -> None: +def test_join_keys_exceptions(constructor: Constructor, how: JoinStrategy) -> None: data = {"antananarivo": [1, 3, 2], "bob": [4, 4, 6], "zor ro": [7.0, 8.0, 9.0]} - df = nw.from_native(constructor(data)) + df = nw.from_native(constructor(data)).lazy() with pytest.raises( ValueError, match=rf"Either \(`left_on` and `right_on`\) or `on` keys should be specified for {how}.", ): - df.join(df, how=how) # type: ignore[arg-type] + df.join(df, how=how) with pytest.raises( ValueError, match=rf"Either \(`left_on` and `right_on`\) or `on` keys should be specified for {how}.", ): - df.join(df, how=how, left_on="antananarivo") # type: ignore[arg-type] + df.join(df, how=how, left_on="antananarivo") with pytest.raises( ValueError, match=rf"Either \(`left_on` and `right_on`\) or `on` keys should be specified for {how}.", ): - df.join(df, how=how, right_on="antananarivo") # type: ignore[arg-type] + df.join(df, how=how, right_on="antananarivo") with pytest.raises( ValueError, match=f"If `on` is specified, `left_on` and `right_on` should be None for {how}.", ): - df.join(df, how=how, on="antananarivo", right_on="antananarivo") # type: ignore[arg-type] + df.join(df, how=how, on="antananarivo", right_on="antananarivo") with pytest.raises( ValueError, match="`left_on` and `right_on` must have the same length." ): - df.join(df, how=how, left_on=["antananarivo", "bob"], right_on="antananarivo") # type: ignore[arg-type] + df.join(df, how=how, left_on=["antananarivo", "bob"], right_on="antananarivo") @pytest.mark.parametrize( @@ -500,19 +506,22 @@ def test_joinasof_numeric( ("pandas_pyarrow" in str(constructor)) or ("pandas_nullable" in str(constructor)) ): request.applymarker(pytest.mark.xfail) - df = nw.from_native( - constructor({"antananarivo": [1, 5, 10], "val": ["a", "b", "c"]}) - ).sort("antananarivo") - df_right = nw.from_native( - constructor({"antananarivo": [1, 2, 3, 6, 7], "val": [1, 2, 3, 6, 7]}) - ).sort("antananarivo") + df = ( + nw.from_native(constructor({"antananarivo": [1, 5, 10], "val": ["a", "b", "c"]})) + .lazy() + .sort("antananarivo") + ) + df_right = ( + nw.from_native( + constructor({"antananarivo": [1, 2, 3, 6, 7], "val": [1, 2, 3, 6, 7]}) + ) + .lazy() + .sort("antananarivo") + ) result = df.join_asof( - df_right, # type: ignore[arg-type] - left_on="antananarivo", - right_on="antananarivo", - strategy=strategy, + df_right, left_on="antananarivo", right_on="antananarivo", strategy=strategy ) - result_on = df.join_asof(df_right, on="antananarivo", strategy=strategy) # type: ignore[arg-type] + result_on = df.join_asof(df_right, on="antananarivo", strategy=strategy) assert_equal_data(result.sort(by="antananarivo"), expected) assert_equal_data(result_on.sort(by="antananarivo"), expected) @@ -572,39 +581,44 @@ def test_joinasof_time( request.applymarker(pytest.mark.xfail) if PANDAS_VERSION < (2, 1) and ("pandas_pyarrow" in str(constructor)): request.applymarker(pytest.mark.xfail) - df = nw.from_native( - constructor( - { - "datetime": [ - datetime(2016, 3, 1), - datetime(2018, 8, 1), - datetime(2019, 1, 1), - ], - "population": [82.19, 82.66, 83.12], - } + df = ( + nw.from_native( + constructor( + { + "datetime": [ + datetime(2016, 3, 1), + datetime(2018, 8, 1), + datetime(2019, 1, 1), + ], + "population": [82.19, 82.66, 83.12], + } + ) ) - ).sort("datetime") - df_right = nw.from_native( - constructor( - { - "datetime": [ - datetime(2016, 1, 1), - datetime(2017, 1, 1), - datetime(2018, 1, 1), - datetime(2019, 1, 1), - datetime(2020, 1, 1), - ], - "gdp": [4164, 4411, 4566, 4696, 4827], - } + .lazy() + .sort("datetime") + ) + df_right = ( + nw.from_native( + constructor( + { + "datetime": [ + datetime(2016, 1, 1), + datetime(2017, 1, 1), + datetime(2018, 1, 1), + datetime(2019, 1, 1), + datetime(2020, 1, 1), + ], + "gdp": [4164, 4411, 4566, 4696, 4827], + } + ) ) - ).sort("datetime") + .lazy() + .sort("datetime") + ) result = df.join_asof( - df_right, # type: ignore[arg-type] - left_on="datetime", - right_on="datetime", - strategy=strategy, + df_right, left_on="datetime", right_on="datetime", strategy=strategy ) - result_on = df.join_asof(df_right, on="datetime", strategy=strategy) # type: ignore[arg-type] + result_on = df.join_asof(df_right, on="datetime", strategy=strategy) assert_equal_data(result.sort(by="datetime"), expected) assert_equal_data(result_on.sort(by="datetime"), expected) @@ -616,22 +630,34 @@ def test_joinasof_by(constructor: Constructor, request: pytest.FixtureRequest) - ("pandas_pyarrow" in str(constructor)) or ("pandas_nullable" in str(constructor)) ): request.applymarker(pytest.mark.xfail) - df = nw.from_native( - constructor( - { - "antananarivo": [1, 5, 7, 10], - "bob": ["D", "D", "C", "A"], - "c": [9, 2, 1, 1], - } + df = ( + nw.from_native( + constructor( + { + "antananarivo": [1, 5, 7, 10], + "bob": ["D", "D", "C", "A"], + "c": [9, 2, 1, 1], + } + ) ) - ).sort("antananarivo") - df_right = nw.from_native( - constructor( - {"antananarivo": [1, 4, 5, 8], "bob": ["D", "D", "A", "F"], "d": [1, 3, 4, 1]} + .lazy() + .sort("antananarivo") + ) + df_right = ( + nw.from_native( + constructor( + { + "antananarivo": [1, 4, 5, 8], + "bob": ["D", "D", "A", "F"], + "d": [1, 3, 4, 1], + } + ) ) - ).sort("antananarivo") - result = df.join_asof(df_right, on="antananarivo", by_left="bob", by_right="bob") # type: ignore[arg-type] - result_by = df.join_asof(df_right, on="antananarivo", by="bob") # type: ignore[arg-type] + .lazy() + .sort("antananarivo") + ) + result = df.join_asof(df_right, on="antananarivo", by_left="bob", by_right="bob") + result_by = df.join_asof(df_right, on="antananarivo", by="bob") expected = { "antananarivo": [1, 5, 7, 10], "bob": ["D", "D", "C", "A"], @@ -651,17 +677,20 @@ def test_joinasof_suffix( ("pandas_pyarrow" in str(constructor)) or ("pandas_nullable" in str(constructor)) ): request.applymarker(pytest.mark.xfail) - df = nw.from_native( - constructor({"antananarivo": [1, 5, 10], "val": ["a", "b", "c"]}) - ).sort("antananarivo") - df_right = nw.from_native( - constructor({"antananarivo": [1, 2, 3, 6, 7], "val": [1, 2, 3, 6, 7]}) - ).sort("antananarivo") + df = ( + nw.from_native(constructor({"antananarivo": [1, 5, 10], "val": ["a", "b", "c"]})) + .lazy() + .sort("antananarivo") + ) + df_right = ( + nw.from_native( + constructor({"antananarivo": [1, 2, 3, 6, 7], "val": [1, 2, 3, 6, 7]}) + ) + .lazy() + .sort("antananarivo") + ) result = df.join_asof( - df_right, # type: ignore[arg-type] - left_on="antananarivo", - right_on="antananarivo", - suffix="_y", + df_right, left_on="antananarivo", right_on="antananarivo", suffix="_y" ) expected = {"antananarivo": [1, 5, 10], "val": ["a", "b", "c"], "val_y": [1, 3, 7]} assert_equal_data(result.sort(by="antananarivo"), expected) @@ -672,93 +701,87 @@ def test_joinasof_not_implemented( constructor: Constructor, strategy: Literal["backward", "forward"] ) -> None: data = {"antananarivo": [1, 3, 2], "bob": [4, 4, 6], "zor ro": [7.0, 8.0, 9.0]} - df = nw.from_native(constructor(data)) + df = nw.from_native(constructor(data)).lazy() with pytest.raises( NotImplementedError, match=rf"Only the following strategies are supported: \('backward', 'forward', 'nearest'\); found '{strategy}'.", ): df.join_asof( - df, # type: ignore[arg-type] - left_on="antananarivo", - right_on="antananarivo", - strategy=strategy, + df, left_on="antananarivo", right_on="antananarivo", strategy=strategy ) def test_joinasof_keys_exceptions(constructor: Constructor) -> None: data = {"antananarivo": [1, 3, 2], "bob": [4, 4, 6], "zor ro": [7.0, 8.0, 9.0]} - df = nw.from_native(constructor(data)) + df = nw.from_native(constructor(data)).lazy() with pytest.raises( ValueError, match=r"Either \(`left_on` and `right_on`\) or `on` keys should be specified.", ): - df.join_asof(df, left_on="antananarivo") # type: ignore[arg-type] + df.join_asof(df, left_on="antananarivo") with pytest.raises( ValueError, match=r"Either \(`left_on` and `right_on`\) or `on` keys should be specified.", ): - df.join_asof(df, right_on="antananarivo") # type: ignore[arg-type] + df.join_asof(df, right_on="antananarivo") with pytest.raises( ValueError, match=r"Either \(`left_on` and `right_on`\) or `on` keys should be specified.", ): - df.join_asof(df) # type: ignore[arg-type] + df.join_asof(df) with pytest.raises( ValueError, match="If `on` is specified, `left_on` and `right_on` should be None." ): df.join_asof( - df, # type: ignore[arg-type] - left_on="antananarivo", - right_on="antananarivo", - on="antananarivo", + df, left_on="antananarivo", right_on="antananarivo", on="antananarivo" ) with pytest.raises( ValueError, match="If `on` is specified, `left_on` and `right_on` should be None." ): - df.join_asof(df, left_on="antananarivo", on="antananarivo") # type: ignore[arg-type] + df.join_asof(df, left_on="antananarivo", on="antananarivo") with pytest.raises( ValueError, match="If `on` is specified, `left_on` and `right_on` should be None." ): - df.join_asof(df, right_on="antananarivo", on="antananarivo") # type: ignore[arg-type] + df.join_asof(df, right_on="antananarivo", on="antananarivo") def test_joinasof_by_exceptions(constructor: Constructor) -> None: data = {"antananarivo": [1, 3, 2], "bob": [4, 4, 6], "zor ro": [7.0, 8.0, 9.0]} - df = nw.from_native(constructor(data)) + df = nw.from_native(constructor(data)).lazy() with pytest.raises( ValueError, match="If `by` is specified, `by_left` and `by_right` should be None." ): - df.join_asof(df, on="antananarivo", by_left="bob", by_right="bob", by="bob") # type: ignore[arg-type] + df.join_asof(df, on="antananarivo", by_left="bob", by_right="bob", by="bob") with pytest.raises( ValueError, match="Can not specify only `by_left` or `by_right`, you need to specify both.", ): - df.join_asof(df, on="antananarivo", by_left="bob") # type: ignore[arg-type] + df.join_asof(df, on="antananarivo", by_left="bob") with pytest.raises( ValueError, match="Can not specify only `by_left` or `by_right`, you need to specify both.", ): - df.join_asof(df, on="antananarivo", by_right="bob") # type: ignore[arg-type] + df.join_asof(df, on="antananarivo", by_right="bob") with pytest.raises( ValueError, match="If `by` is specified, `by_left` and `by_right` should be None." ): - df.join_asof(df, on="antananarivo", by_left="bob", by="bob") # type: ignore[arg-type] + df.join_asof(df, on="antananarivo", by_left="bob", by="bob") with pytest.raises( ValueError, match="If `by` is specified, `by_left` and `by_right` should be None." ): - df.join_asof(df, on="antananarivo", by_right="bob", by="bob") # type: ignore[arg-type] + df.join_asof(df, on="antananarivo", by_right="bob", by="bob") with pytest.raises( ValueError, match="`by_left` and `by_right` must have the same length." ): df.join_asof( - df, # type: ignore[arg-type] + df, on="antananarivo", by_left=["antananarivo", "bob"], by_right=["antananarivo"], @@ -801,6 +824,6 @@ def test_join_duplicate_column_names( else: exception = nw.exceptions.DuplicateError df = constructor({"a": [1, 2, 3, 4, 5], "b": [6, 6, 6, 6, 6]}) - dfn = nw.from_native(df) + dfn = nw.from_native(df).lazy() with pytest.raises(exception): - dfn.join(dfn, on=["a"]).join(dfn, on=["a"]).lazy().collect() # type: ignore[arg-type] + dfn.join(dfn, on=["a"]).join(dfn, on=["a"]).lazy().collect() From 3c7e1cc6cf3b299b62b78ecb63e86fbe3a9e9d1d Mon Sep 17 00:00:00 2001 From: dangotbanned <125183946+dangotbanned@users.noreply.github.com> Date: Wed, 20 Aug 2025 10:30:41 +0000 Subject: [PATCH 17/19] test: Move into a documented helper --- tests/frame/join_test.py | 195 +++++++++++++++++---------------------- 1 file changed, 85 insertions(+), 110 deletions(-) diff --git a/tests/frame/join_test.py b/tests/frame/join_test.py index 5ed8b843e8..8beb959fea 100644 --- a/tests/frame/join_test.py +++ b/tests/frame/join_test.py @@ -1,8 +1,3 @@ -"""Every join test needs to use `.lazy()` for typing. - -See https://github.com/narwhals-dev/narwhals/pull/2944#discussion_r2286264815 -""" - from __future__ import annotations import re @@ -22,7 +17,19 @@ ) if TYPE_CHECKING: - from narwhals.typing import JoinStrategy + from narwhals.typing import IntoLazyFrameT, JoinStrategy, NativeDataFrame + + +def from_native_lazy( + native: IntoLazyFrameT | NativeDataFrame, +) -> nw.LazyFrame[IntoLazyFrameT] | nw.LazyFrame[Any]: + """Every join test [needs to use `.lazy()` for typing]*. + + *Unless both left/right frames are of the same concrete type. + + [needs to use `.lazy()` for typing]: https://github.com/narwhals-dev/narwhals/pull/2944#discussion_r2286264815 + """ + return nw.from_native(native).lazy() @pytest.mark.parametrize( @@ -101,8 +108,8 @@ def test_full_join( right_on: None | str | list[str], constructor: Constructor, ) -> None: - df_left = nw.from_native(constructor(df1)).lazy() - df_right = nw.from_native(constructor(df2)).lazy() + df_left = from_native_lazy(constructor(df1)) + df_right = from_native_lazy(constructor(df2)) result = df_left.join( df_right, on=on, left_on=left_on, right_on=right_on, how="full" ).sort("id", nulls_last=True) @@ -117,8 +124,8 @@ def test_full_join_duplicate( df1 = {"foo": [1, 2, 3], "val1": [1, 2, 3]} df2 = {"foo": [1, 2, 3], "foo_right": [1, 2, 3]} - df_left = nw.from_native(constructor(df1)).lazy() - df_right = nw.from_native(constructor(df2)).lazy() + df_left = from_native_lazy(constructor(df1)) + df_right = from_native_lazy(constructor(df2)) exceptions: list[type[Exception]] = [nw.exceptions.NarwhalsError] if "pyspark" in str(constructor) and "sqlframe" not in str(constructor): @@ -140,7 +147,7 @@ def test_inner_join_two_keys(constructor: Constructor) -> None: "zor ro": [7.0, 8.0, 9.0], "idx": [0, 1, 2], } - df = nw.from_native(constructor(data)).lazy() + df = from_native_lazy(constructor(data)) df_right = df result = df.join( df_right, @@ -169,7 +176,7 @@ def test_inner_join_single_key(constructor: Constructor) -> None: "zor ro": [7.0, 8.0, 9.0], "idx": [0, 1, 2], } - df = nw.from_native(constructor(data)).lazy() + df = from_native_lazy(constructor(data)) df_right = df result = df.join( df_right, left_on="antananarivo", right_on="antananarivo", how="inner" @@ -193,7 +200,7 @@ def test_cross_join(constructor: Constructor) -> None: if "duckdb" in str(constructor) and DUCKDB_VERSION < (1, 1, 4): pytest.skip() data = {"antananarivo": [1, 3, 2]} - df = nw.from_native(constructor(data)).lazy() + df = from_native_lazy(constructor(data)) result = df.join(df, how="cross").sort("antananarivo", "antananarivo_right") expected = { "antananarivo": [1, 1, 1, 2, 2, 2, 3, 3, 3], @@ -213,7 +220,7 @@ def test_suffix( constructor: Constructor, how: Literal["inner", "left"], suffix: str ) -> None: data = {"antananarivo": [1, 3, 2], "bob": [4, 4, 6], "zor ro": [7.0, 8.0, 9.0]} - df = nw.from_native(constructor(data)).lazy() + df = from_native_lazy(constructor(data)) df_right = df result = df.join( df_right, @@ -231,7 +238,7 @@ def test_cross_join_suffix(constructor: Constructor, suffix: str) -> None: if "duckdb" in str(constructor) and DUCKDB_VERSION < (1, 1, 4): pytest.skip() data = {"antananarivo": [1, 3, 2]} - df = nw.from_native(constructor(data)).lazy() + df = from_native_lazy(constructor(data)) result = df.join(df, how="cross", suffix=suffix).sort( "antananarivo", f"antananarivo{suffix}" ) @@ -281,7 +288,7 @@ def test_anti_join( expected: dict[str, list[Any]], ) -> None: data = {"antananarivo": [1, 3, 2], "bob": [4, 4, 6], "zor ro": [7.0, 8.0, 9.0]} - df = nw.from_native(constructor(data)).lazy() + df = from_native_lazy(constructor(data)) other = df.filter(filter_expr) result = df.join(other, how="anti", left_on=join_key, right_on=join_key) assert_equal_data(result, expected) @@ -319,7 +326,7 @@ def test_semi_join( expected: dict[str, list[Any]], ) -> None: data = {"antananarivo": [1, 3, 2], "bob": [4, 4, 6], "zor ro": [7.0, 8.0, 9.0]} - df = nw.from_native(constructor(data)).lazy() + df = from_native_lazy(constructor(data)) other = df.filter(filter_expr) result = df.join(other, how="semi", left_on=join_key, right_on=join_key).sort( "antananarivo" @@ -330,7 +337,7 @@ def test_semi_join( @pytest.mark.parametrize("how", ["right"]) def test_join_not_implemented(constructor: Constructor, how: str) -> None: data = {"antananarivo": [1, 3, 2], "bob": [4, 4, 6], "zor ro": [7.0, 8.0, 9.0]} - df = nw.from_native(constructor(data)).lazy() + df = from_native_lazy(constructor(data)) with pytest.raises( NotImplementedError, @@ -357,8 +364,8 @@ def test_left_join(constructor: Constructor) -> None: "co": [4.0, 5.0, 7.0], "idx": [0.0, 1.0, 2.0], } - df_left = nw.from_native(constructor(data_left)).lazy() - df_right = nw.from_native(constructor(data_right)).lazy() + df_left = from_native_lazy(constructor(data_left)) + df_right = from_native_lazy(constructor(data_right)) result = df_left.join(df_right, left_on="bob", right_on="co", how="left") result = result.sort("idx") result = result.drop("idx_right") @@ -383,8 +390,8 @@ def test_left_join(constructor: Constructor) -> None: def test_left_join_multiple_column(constructor: Constructor) -> None: data_left = {"antananarivo": [1, 2, 3], "bob": [4, 5, 6], "idx": [0, 1, 2]} data_right = {"antananarivo": [1, 2, 3], "c": [4, 5, 6], "idx": [0, 1, 2]} - df_left = nw.from_native(constructor(data_left)).lazy() - df_right = nw.from_native(constructor(data_right)).lazy() + df_left = from_native_lazy(constructor(data_left)) + df_right = from_native_lazy(constructor(data_right)) result = df_left.join( df_right, left_on=["antananarivo", "bob"], @@ -410,8 +417,8 @@ def test_left_join_overlapping_column(constructor: Constructor) -> None: "d": [1.0, 4.0, 2.0], "idx": [0.0, 1.0, 2.0], } - df_left = nw.from_native(constructor(data_left)).lazy() - df_right = nw.from_native(constructor(data_right)).lazy() + df_left = from_native_lazy(constructor(data_left)) + df_right = from_native_lazy(constructor(data_right)) result = df_left.join(df_right, left_on="bob", right_on="c", how="left").sort("idx") result = result.drop("idx_right") expected: dict[str, list[Any]] = { @@ -440,7 +447,7 @@ def test_left_join_overlapping_column(constructor: Constructor) -> None: @pytest.mark.parametrize("how", ["inner", "left", "semi", "anti"]) def test_join_keys_exceptions(constructor: Constructor, how: JoinStrategy) -> None: data = {"antananarivo": [1, 3, 2], "bob": [4, 4, 6], "zor ro": [7.0, 8.0, 9.0]} - df = nw.from_native(constructor(data)).lazy() + df = from_native_lazy(constructor(data)) with pytest.raises( ValueError, @@ -506,18 +513,12 @@ def test_joinasof_numeric( ("pandas_pyarrow" in str(constructor)) or ("pandas_nullable" in str(constructor)) ): request.applymarker(pytest.mark.xfail) - df = ( - nw.from_native(constructor({"antananarivo": [1, 5, 10], "val": ["a", "b", "c"]})) - .lazy() - .sort("antananarivo") - ) - df_right = ( - nw.from_native( - constructor({"antananarivo": [1, 2, 3, 6, 7], "val": [1, 2, 3, 6, 7]}) - ) - .lazy() - .sort("antananarivo") - ) + df = from_native_lazy( + constructor({"antananarivo": [1, 5, 10], "val": ["a", "b", "c"]}) + ).sort("antananarivo") + df_right = from_native_lazy( + constructor({"antananarivo": [1, 2, 3, 6, 7], "val": [1, 2, 3, 6, 7]}) + ).sort("antananarivo") result = df.join_asof( df_right, left_on="antananarivo", right_on="antananarivo", strategy=strategy ) @@ -581,40 +582,32 @@ def test_joinasof_time( request.applymarker(pytest.mark.xfail) if PANDAS_VERSION < (2, 1) and ("pandas_pyarrow" in str(constructor)): request.applymarker(pytest.mark.xfail) - df = ( - nw.from_native( - constructor( - { - "datetime": [ - datetime(2016, 3, 1), - datetime(2018, 8, 1), - datetime(2019, 1, 1), - ], - "population": [82.19, 82.66, 83.12], - } - ) + df = from_native_lazy( + constructor( + { + "datetime": [ + datetime(2016, 3, 1), + datetime(2018, 8, 1), + datetime(2019, 1, 1), + ], + "population": [82.19, 82.66, 83.12], + } ) - .lazy() - .sort("datetime") - ) - df_right = ( - nw.from_native( - constructor( - { - "datetime": [ - datetime(2016, 1, 1), - datetime(2017, 1, 1), - datetime(2018, 1, 1), - datetime(2019, 1, 1), - datetime(2020, 1, 1), - ], - "gdp": [4164, 4411, 4566, 4696, 4827], - } - ) + ).sort("datetime") + df_right = from_native_lazy( + constructor( + { + "datetime": [ + datetime(2016, 1, 1), + datetime(2017, 1, 1), + datetime(2018, 1, 1), + datetime(2019, 1, 1), + datetime(2020, 1, 1), + ], + "gdp": [4164, 4411, 4566, 4696, 4827], + } ) - .lazy() - .sort("datetime") - ) + ).sort("datetime") result = df.join_asof( df_right, left_on="datetime", right_on="datetime", strategy=strategy ) @@ -630,32 +623,20 @@ def test_joinasof_by(constructor: Constructor, request: pytest.FixtureRequest) - ("pandas_pyarrow" in str(constructor)) or ("pandas_nullable" in str(constructor)) ): request.applymarker(pytest.mark.xfail) - df = ( - nw.from_native( - constructor( - { - "antananarivo": [1, 5, 7, 10], - "bob": ["D", "D", "C", "A"], - "c": [9, 2, 1, 1], - } - ) + df = from_native_lazy( + constructor( + { + "antananarivo": [1, 5, 7, 10], + "bob": ["D", "D", "C", "A"], + "c": [9, 2, 1, 1], + } ) - .lazy() - .sort("antananarivo") - ) - df_right = ( - nw.from_native( - constructor( - { - "antananarivo": [1, 4, 5, 8], - "bob": ["D", "D", "A", "F"], - "d": [1, 3, 4, 1], - } - ) + ).sort("antananarivo") + df_right = from_native_lazy( + constructor( + {"antananarivo": [1, 4, 5, 8], "bob": ["D", "D", "A", "F"], "d": [1, 3, 4, 1]} ) - .lazy() - .sort("antananarivo") - ) + ).sort("antananarivo") result = df.join_asof(df_right, on="antananarivo", by_left="bob", by_right="bob") result_by = df.join_asof(df_right, on="antananarivo", by="bob") expected = { @@ -677,18 +658,12 @@ def test_joinasof_suffix( ("pandas_pyarrow" in str(constructor)) or ("pandas_nullable" in str(constructor)) ): request.applymarker(pytest.mark.xfail) - df = ( - nw.from_native(constructor({"antananarivo": [1, 5, 10], "val": ["a", "b", "c"]})) - .lazy() - .sort("antananarivo") - ) - df_right = ( - nw.from_native( - constructor({"antananarivo": [1, 2, 3, 6, 7], "val": [1, 2, 3, 6, 7]}) - ) - .lazy() - .sort("antananarivo") - ) + df = from_native_lazy( + constructor({"antananarivo": [1, 5, 10], "val": ["a", "b", "c"]}) + ).sort("antananarivo") + df_right = from_native_lazy( + constructor({"antananarivo": [1, 2, 3, 6, 7], "val": [1, 2, 3, 6, 7]}) + ).sort("antananarivo") result = df.join_asof( df_right, left_on="antananarivo", right_on="antananarivo", suffix="_y" ) @@ -701,7 +676,7 @@ def test_joinasof_not_implemented( constructor: Constructor, strategy: Literal["backward", "forward"] ) -> None: data = {"antananarivo": [1, 3, 2], "bob": [4, 4, 6], "zor ro": [7.0, 8.0, 9.0]} - df = nw.from_native(constructor(data)).lazy() + df = from_native_lazy(constructor(data)) with pytest.raises( NotImplementedError, @@ -714,7 +689,7 @@ def test_joinasof_not_implemented( def test_joinasof_keys_exceptions(constructor: Constructor) -> None: data = {"antananarivo": [1, 3, 2], "bob": [4, 4, 6], "zor ro": [7.0, 8.0, 9.0]} - df = nw.from_native(constructor(data)).lazy() + df = from_native_lazy(constructor(data)) with pytest.raises( ValueError, @@ -749,7 +724,7 @@ def test_joinasof_keys_exceptions(constructor: Constructor) -> None: def test_joinasof_by_exceptions(constructor: Constructor) -> None: data = {"antananarivo": [1, 3, 2], "bob": [4, 4, 6], "zor ro": [7.0, 8.0, 9.0]} - df = nw.from_native(constructor(data)).lazy() + df = from_native_lazy(constructor(data)) with pytest.raises( ValueError, match="If `by` is specified, `by_left` and `by_right` should be None." ): @@ -824,6 +799,6 @@ def test_join_duplicate_column_names( else: exception = nw.exceptions.DuplicateError df = constructor({"a": [1, 2, 3, 4, 5], "b": [6, 6, 6, 6, 6]}) - dfn = nw.from_native(df).lazy() + dfn = from_native_lazy(df) with pytest.raises(exception): - dfn.join(dfn, on=["a"]).join(dfn, on=["a"]).lazy().collect() + dfn.join(dfn, on=["a"]).join(dfn, on=["a"]).collect() From 4eadd63e5e011d2533b2c579a1acb587693e1191 Mon Sep 17 00:00:00 2001 From: dangotbanned <125183946+dangotbanned@users.noreply.github.com> Date: Thu, 21 Aug 2025 08:30:05 +0000 Subject: [PATCH 18/19] test: `PolarsDataFrame.join` coverage https://github.com/narwhals-dev/narwhals/pull/2944#discussion_r2288927287 --- tests/frame/join_test.py | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/tests/frame/join_test.py b/tests/frame/join_test.py index 8beb959fea..86d0aae4b8 100644 --- a/tests/frame/join_test.py +++ b/tests/frame/join_test.py @@ -798,7 +798,11 @@ def test_join_duplicate_column_names( request.applymarker(pytest.mark.xfail) else: exception = nw.exceptions.DuplicateError - df = constructor({"a": [1, 2, 3, 4, 5], "b": [6, 6, 6, 6, 6]}) - dfn = from_native_lazy(df) - with pytest.raises(exception): - dfn.join(dfn, on=["a"]).join(dfn, on=["a"]).collect() + data = {"a": [1, 2, 3, 4, 5], "b": [6, 6, 6, 6, 6]} + df = nw.from_native(constructor(data)) + if isinstance(df, nw.LazyFrame): + with pytest.raises(exception): + df.join(df, on=["a"]).join(df, on=["a"]).collect() + else: + with pytest.raises(exception): + df.join(df, on=["a"]).join(df, on=["a"]) From d3b8e13fe5951d0e12ed1f8de57e48af1d90e772 Mon Sep 17 00:00:00 2001 From: dangotbanned <125183946+dangotbanned@users.noreply.github.com> Date: Thu, 21 Aug 2025 09:08:07 +0000 Subject: [PATCH 19/19] test: `DataFrame.join_asof` coverage https://github.com/narwhals-dev/narwhals/pull/2944#discussion_r2288927287 --- tests/frame/join_test.py | 66 ++++++++++++++++++---------------------- 1 file changed, 29 insertions(+), 37 deletions(-) diff --git a/tests/frame/join_test.py b/tests/frame/join_test.py index 86d0aae4b8..5b22c76df5 100644 --- a/tests/frame/join_test.py +++ b/tests/frame/join_test.py @@ -722,45 +722,37 @@ def test_joinasof_keys_exceptions(constructor: Constructor) -> None: df.join_asof(df, right_on="antananarivo", on="antananarivo") -def test_joinasof_by_exceptions(constructor: Constructor) -> None: - data = {"antananarivo": [1, 3, 2], "bob": [4, 4, 6], "zor ro": [7.0, 8.0, 9.0]} - df = from_native_lazy(constructor(data)) - with pytest.raises( - ValueError, match="If `by` is specified, `by_left` and `by_right` should be None." - ): - df.join_asof(df, on="antananarivo", by_left="bob", by_right="bob", by="bob") - - with pytest.raises( - ValueError, - match="Can not specify only `by_left` or `by_right`, you need to specify both.", - ): - df.join_asof(df, on="antananarivo", by_left="bob") +ON = "antananarivo" +BY = "bob" - with pytest.raises( - ValueError, - match="Can not specify only `by_left` or `by_right`, you need to specify both.", - ): - df.join_asof(df, on="antananarivo", by_right="bob") - - with pytest.raises( - ValueError, match="If `by` is specified, `by_left` and `by_right` should be None." - ): - df.join_asof(df, on="antananarivo", by_left="bob", by="bob") - with pytest.raises( - ValueError, match="If `by` is specified, `by_left` and `by_right` should be None." - ): - df.join_asof(df, on="antananarivo", by_right="bob", by="bob") - - with pytest.raises( - ValueError, match="`by_left` and `by_right` must have the same length." - ): - df.join_asof( - df, - on="antananarivo", - by_left=["antananarivo", "bob"], - by_right=["antananarivo"], - ) +@pytest.mark.parametrize( + ("on", "by_left", "by_right", "by", "message"), + [ + (ON, BY, BY, BY, r"If.+by.+by_left.+by_right.+should be None"), + (ON, BY, None, None, r"not.+by_left.+or.+by_right.+need.+both"), + (ON, None, BY, None, r"not.+by_left.+or.+by_right.+need.+both"), + (ON, BY, None, BY, r"If.+by.+by_left.+by_right.+should be None"), + (ON, None, BY, BY, r"If.+by.+by_left.+by_right.+should be None"), + (ON, [ON, BY], [ON], None, r"by_left.+by_right.+same.+length"), + ], +) +def test_joinasof_by_exceptions( + constructor: Constructor, + on: str | None, + by_left: str | list[str] | None, + by_right: str | list[str] | None, + by: str | list[str] | None, + message: str, +) -> None: + data = {ON: [1, 3, 2], BY: [4, 4, 6], "zor ro": [7.0, 8.0, 9.0]} + df = nw.from_native(constructor(data)) + if isinstance(df, nw.LazyFrame): + with pytest.raises(ValueError, match=message): + df.join_asof(df, on=on, by_left=by_left, by_right=by_right, by=by) + else: + with pytest.raises(ValueError, match=message): + df.join_asof(df, on=on, by_left=by_left, by_right=by_right, by=by) def test_join_duplicate_column_names(