From 2c36eea9c9a816c62c80cb6d52aee8fd75ac3767 Mon Sep 17 00:00:00 2001 From: dangotbanned <125183946+dangotbanned@users.noreply.github.com> Date: Wed, 20 Aug 2025 13:49:53 +0000 Subject: [PATCH 01/40] feat(typing): Make `Implementation` less opaque - Based on (https://github.com/python/mypy/issues/9937) - Issue raised privately by @FBruzzesi --- narwhals/dataframe.py | 84 +++++++++++++++++++------- tests/expr_and_series/is_close_test.py | 11 +--- 2 files changed, 63 insertions(+), 32 deletions(-) diff --git a/narwhals/dataframe.py b/narwhals/dataframe.py index 19def52e03..4d558b04de 100644 --- a/narwhals/dataframe.py +++ b/narwhals/dataframe.py @@ -71,8 +71,21 @@ from narwhals._compliant import CompliantDataFrame, CompliantLazyFrame from narwhals._compliant.typing import CompliantExprAny, EagerNamespaceAny + from narwhals._namespace import _NativePandasLikeDataFrame from narwhals._translate import IntoArrowTable - from narwhals._typing import Dask, DuckDB, EagerAllowed, Ibis, IntoBackend, Polars + from narwhals._typing import ( + Dask, + DuckDB, + EagerAllowed, + Ibis, + IntoBackend, + Polars, + _ArrowImpl, + _EagerAllowedImpl, + _PandasImpl, + _PandasLikeImpl, + _PolarsImpl, + ) from narwhals.group_by import GroupBy, LazyGroupBy from narwhals.typing import ( AsofJoinStrategy, @@ -407,6 +420,33 @@ def explode(self, columns: str | Sequence[str], *more_columns: str) -> Self: return self._with_compliant(self._compliant_frame.explode(columns=to_explode)) +class _ImplDescriptor: + @overload + def __get__(self, instance: DataFrame[pl.DataFrame], owner: Any) -> _PolarsImpl: ... + @overload + def __get__(self, instance: DataFrame[pd.DataFrame], owner: Any) -> _PandasImpl: ... + @overload + def __get__( + self, instance: DataFrame[_NativePandasLikeDataFrame], owner: Any + ) -> _PandasLikeImpl: ... + @overload + def __get__(self, instance: DataFrame[pa.Table], owner: Any) -> _ArrowImpl: ... + @overload + def __get__( + self, instance: DataFrame[pl.DataFrame | pd.DataFrame | pa.Table], owner: Any + ) -> _PolarsImpl | _PandasImpl | _ArrowImpl: ... + @overload + def __get__( + self, instance: DataFrame[IntoDataFrame], owner: Any + ) -> _EagerAllowedImpl: ... + @overload + def __get__(self, instance: None, owner: Any) -> Self: ... + def __get__(self, instance: Any | None, owner: Any) -> Any: + if instance is None: + return self + return instance._compliant_frame._implementation + + class DataFrame(BaseFrame[DataFrameT]): """Narwhals DataFrame, backed by a native eager dataframe. @@ -660,28 +700,26 @@ def from_numpy( ) raise ValueError(msg) - @property - def implementation(self) -> Implementation: - """Return implementation of native frame. - - This can be useful when you need to use special-casing for features outside of - Narwhals' scope - for example, when dealing with pandas' Period Dtype. - - Examples: - >>> import narwhals as nw - >>> import pandas as pd - >>> df_native = pd.DataFrame({"a": [1, 2, 3]}) - >>> df = nw.from_native(df_native) - >>> df.implementation - - >>> df.implementation.is_pandas() - True - >>> df.implementation.is_pandas_like() - True - >>> df.implementation.is_polars() - False - """ - return self._compliant_frame._implementation + implementation = _ImplDescriptor() + """Return implementation of native frame. + + This can be useful when you need to use special-casing for features outside of + Narwhals' scope - for example, when dealing with pandas' Period Dtype. + + Examples: + >>> import narwhals as nw + >>> import pandas as pd + >>> df_native = pd.DataFrame({"a": [1, 2, 3]}) + >>> df = nw.from_native(df_native) + >>> df.implementation + + >>> df.implementation.is_pandas() + True + >>> df.implementation.is_pandas_like() + True + >>> df.implementation.is_polars() + False + """ def __len__(self) -> int: return self._compliant_frame.__len__() diff --git a/tests/expr_and_series/is_close_test.py b/tests/expr_and_series/is_close_test.py index a40ee0312e..c5bb4df551 100644 --- a/tests/expr_and_series/is_close_test.py +++ b/tests/expr_and_series/is_close_test.py @@ -11,7 +11,6 @@ import pytest import narwhals as nw -from narwhals._utils import is_eager_allowed from narwhals.exceptions import ComputeError, InvalidOperationError from tests.conftest import ( dask_lazy_p1_constructor, @@ -114,11 +113,8 @@ def test_is_close_series_with_series( ) -> None: df = nw.from_native(constructor_eager(data), eager_only=True) x, y = df["x"], df["y"] - backend = df.implementation - assert is_eager_allowed(backend) - nulls = nw.new_series( - name="nulls", values=[None] * len(x), dtype=nw.Float64(), backend=backend + "nulls", [None] * len(x), nw.Float64(), backend=df.implementation ) x = x.zip_with(x != NAN_PLACEHOLDER, x**0.5).zip_with(x != NULL_PLACEHOLDER, nulls) y = y.zip_with(y != NAN_PLACEHOLDER, y**0.5).zip_with(y != NULL_PLACEHOLDER, nulls) @@ -141,11 +137,8 @@ def test_is_close_series_with_scalar( ) -> None: df = nw.from_native(constructor_eager(data), eager_only=True) y = df["y"] - backend = df.implementation - assert is_eager_allowed(backend) - nulls = nw.new_series( - name="nulls", values=[None] * len(y), dtype=nw.Float64(), backend=backend + "nulls", [None] * len(y), nw.Float64(), backend=df.implementation ) y = y.zip_with(y != NAN_PLACEHOLDER, y**0.5).zip_with(y != NULL_PLACEHOLDER, nulls) result = y.is_close(other, abs_tol=abs_tol, rel_tol=rel_tol, nans_equal=nans_equal) From 2350dfc62a0a187d53bc3fa33c40aaa58dcbd226 Mon Sep 17 00:00:00 2001 From: dangotbanned <125183946+dangotbanned@users.noreply.github.com> Date: Wed, 20 Aug 2025 14:10:02 +0000 Subject: [PATCH 02/40] ci(typing): fix pyright coverage --- narwhals/dataframe.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/narwhals/dataframe.py b/narwhals/dataframe.py index 4d558b04de..274539d4d5 100644 --- a/narwhals/dataframe.py +++ b/narwhals/dataframe.py @@ -700,7 +700,7 @@ def from_numpy( ) raise ValueError(msg) - implementation = _ImplDescriptor() + implementation: _ImplDescriptor = _ImplDescriptor() """Return implementation of native frame. This can be useful when you need to use special-casing for features outside of From fe80d520e84e5d64b5fd4a0d0b4c4845f5ce51fb Mon Sep 17 00:00:00 2001 From: dangotbanned <125183946+dangotbanned@users.noreply.github.com> Date: Wed, 20 Aug 2025 14:39:04 +0000 Subject: [PATCH 03/40] ci: Handle descriptors in API reference https://results.pre-commit.ci/run/github/760058710/1755699011._wD_fbikQnqzClVkSVIiJw --- narwhals/dataframe.py | 3 +++ utils/check_api_reference.py | 7 ++++--- 2 files changed, 7 insertions(+), 3 deletions(-) diff --git a/narwhals/dataframe.py b/narwhals/dataframe.py index 274539d4d5..a554c739d8 100644 --- a/narwhals/dataframe.py +++ b/narwhals/dataframe.py @@ -421,6 +421,9 @@ def explode(self, columns: str | Sequence[str], *more_columns: str) -> Self: class _ImplDescriptor: + def __set_name__(self, owner: type[Any], name: str) -> None: + self.__name__: str = name + @overload def __get__(self, instance: DataFrame[pl.DataFrame], owner: Any) -> _PolarsImpl: ... @overload diff --git a/utils/check_api_reference.py b/utils/check_api_reference.py index efabd53f8f..6264fc9875 100644 --- a/utils/check_api_reference.py +++ b/utils/check_api_reference.py @@ -6,7 +6,7 @@ # ruff: noqa: N806 from collections import deque -from inspect import isfunction +from inspect import isfunction, ismethoddescriptor from pathlib import Path from types import MethodType, ModuleType from typing import TYPE_CHECKING, Any @@ -24,13 +24,14 @@ def _is_public_method_or_property(obj: Any) -> bool: return ( - isfunction(obj) or isinstance(obj, (MethodType, property)) + isfunction(obj) + or (isinstance(obj, (MethodType, property)) or ismethoddescriptor(obj)) ) and obj.__name__.startswith(LOWERCASE) else: def _is_public_method_or_property(obj: Any) -> bool: return ( - (isfunction(obj) or isinstance(obj, MethodType)) + (isfunction(obj) or (isinstance(obj, MethodType) or ismethoddescriptor(obj))) and obj.__name__.startswith(LOWERCASE) ) or (isinstance(obj, property) and obj.fget.__name__.startswith(LOWERCASE)) From cadcdf0d7419302db78e9a21b239f739d29cd36c Mon Sep 17 00:00:00 2001 From: dangotbanned <125183946+dangotbanned@users.noreply.github.com> Date: Thu, 21 Aug 2025 09:20:59 +0000 Subject: [PATCH 04/40] cov https://github.com/narwhals-dev/narwhals/actions/runs/17105179067/job/48512107390?pr=3016 --- narwhals/dataframe.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/narwhals/dataframe.py b/narwhals/dataframe.py index 20112446c7..ff83ce503f 100644 --- a/narwhals/dataframe.py +++ b/narwhals/dataframe.py @@ -446,7 +446,7 @@ def __get__( @overload def __get__(self, instance: None, owner: Any) -> Self: ... def __get__(self, instance: Any | None, owner: Any) -> Any: - if instance is None: + if instance is None: # pragma: no cover return self return instance._compliant_frame._implementation From 5b2bc62b7f06bd2a38088eb36b8e17cf72edb8b8 Mon Sep 17 00:00:00 2001 From: dangotbanned <125183946+dangotbanned@users.noreply.github.com> Date: Thu, 21 Aug 2025 10:44:33 +0000 Subject: [PATCH 05/40] add typing tests, tweak overloads --- narwhals/dataframe.py | 19 ++++++++--- tests/implementation_test.py | 63 ++++++++++++++++++++++++++++++++++++ 2 files changed, 77 insertions(+), 5 deletions(-) diff --git a/narwhals/dataframe.py b/narwhals/dataframe.py index ff83ce503f..0a682405db 100644 --- a/narwhals/dataframe.py +++ b/narwhals/dataframe.py @@ -71,7 +71,11 @@ from narwhals._compliant import CompliantDataFrame, CompliantLazyFrame from narwhals._compliant.typing import CompliantExprAny, EagerNamespaceAny - from narwhals._namespace import _NativePandasLikeDataFrame + from narwhals._namespace import ( + _CuDFDataFrame, + _ModinDataFrame, + _NativePandasLikeDataFrame, + ) from narwhals._translate import IntoArrowTable from narwhals._typing import ( Dask, @@ -81,7 +85,9 @@ IntoBackend, Polars, _ArrowImpl, + _CudfImpl, _EagerAllowedImpl, + _ModinImpl, _PandasImpl, _PandasLikeImpl, _PolarsImpl, @@ -430,6 +436,11 @@ def __get__(self, instance: DataFrame[pl.DataFrame], owner: Any) -> _PolarsImpl: @overload def __get__(self, instance: DataFrame[pd.DataFrame], owner: Any) -> _PandasImpl: ... @overload + def __get__(self, instance: DataFrame[_ModinDataFrame], owner: Any) -> _ModinImpl: ... + + @overload # oof, looks like these two need their names aligned 😅 + def __get__(self, instance: DataFrame[_CuDFDataFrame], owner: Any) -> _CudfImpl: ... + @overload def __get__( self, instance: DataFrame[_NativePandasLikeDataFrame], owner: Any ) -> _PandasLikeImpl: ... @@ -440,11 +451,9 @@ def __get__( self, instance: DataFrame[pl.DataFrame | pd.DataFrame | pa.Table], owner: Any ) -> _PolarsImpl | _PandasImpl | _ArrowImpl: ... @overload - def __get__( - self, instance: DataFrame[IntoDataFrame], owner: Any - ) -> _EagerAllowedImpl: ... - @overload def __get__(self, instance: None, owner: Any) -> Self: ... + @overload + def __get__(self, instance: DataFrame[Any], owner: Any) -> _EagerAllowedImpl: ... def __get__(self, instance: Any | None, owner: Any) -> Any: if instance is None: # pragma: no cover return self diff --git a/tests/implementation_test.py b/tests/implementation_test.py index c6f7ccffc8..6c745e5962 100644 --- a/tests/implementation_test.py +++ b/tests/implementation_test.py @@ -1,9 +1,21 @@ from __future__ import annotations +from typing import TYPE_CHECKING, Any + import pytest import narwhals as nw +if TYPE_CHECKING: + from narwhals._typing import ( + _ArrowImpl, + _EagerAllowedImpl, + _ModinImpl, + _PandasImpl, + _PolarsImpl, + ) + from narwhals.typing import IntoDataFrame + def test_implementation_pandas() -> None: pytest.importorskip("pandas") @@ -52,3 +64,54 @@ def test_implementation_polars() -> None: ) def test_implementation_new(member: str, value: str) -> None: assert nw.Implementation(value) is getattr(nw.Implementation, member) + + +if TYPE_CHECKING: + + def test_implementation_typing() -> None: # noqa: PLR0914 + import modin.pandas as mpd + import pandas as pd + import polars as pl + import pyarrow as pa + from typing_extensions import assert_type + + data: dict[str, Any] = {"a": [1, 2, 3]} + polars_df = nw.from_native(pl.DataFrame(data)) + pandas_df = nw.from_native(pd.DataFrame(data)) + arrow_df = nw.from_native(pa.table(data)) + + polars_impl = polars_df.implementation + pandas_impl = pandas_df.implementation + arrow_impl = arrow_df.implementation + + assert_type(polars_impl, _PolarsImpl) + assert_type(pandas_impl, _PandasImpl) + assert_type(arrow_impl, _ArrowImpl) + + modin_native = mpd.DataFrame.from_dict(data) + modin_df = nw.from_native(modin_native) + modin_impl = modin_df.implementation + # TODO @dangotbanned: Is this even possible? + # - `mypy` won't ever work, treats as `Any` + # - `pyright` can resolve `modin_df: narwhals.dataframe.DataFrame[modin.pandas.dataframe.DataFrame]` + # - But we run into variance issues if trying to widen the concrete type again + assert_type(modin_impl, _ModinImpl) # type: ignore[assert-type] + + can_lazyframe_collect_dfs: list[ + nw.DataFrame[pl.DataFrame] + | nw.DataFrame[pd.DataFrame] + | nw.DataFrame[pa.Table] + ] = [polars_df, pandas_df, arrow_df] + can_lazyframe_collect_impl = can_lazyframe_collect_dfs[0].implementation + assert_type(can_lazyframe_collect_impl, _PolarsImpl | _PandasImpl | _ArrowImpl) + + very_lost_df = nw.DataFrame.__new__(nw.DataFrame) + very_lost_impl = very_lost_df.implementation + # TODO @dangotbanned: Is this so bad? + # - Currently `DataFrame[Any]` matches the first overload (`_PolarsImpl`) + # - That is accepted **everywhere** that uses `IntoBackend` + assert_type(very_lost_impl, _EagerAllowedImpl) # type: ignore[assert-type] + + not_so_lost_df = nw.DataFrame.__new__(nw.DataFrame[IntoDataFrame]) + not_so_lost_impl = not_so_lost_df.implementation + assert_type(not_so_lost_impl, _EagerAllowedImpl) From 14974bc8375063c637c687e244ec53f9cc9749ab Mon Sep 17 00:00:00 2001 From: dangotbanned <125183946+dangotbanned@users.noreply.github.com> Date: Thu, 21 Aug 2025 11:28:01 +0000 Subject: [PATCH 06/40] refactor(typing): Switch most overloads to `BaseFrame` Towards solving https://github.com/narwhals-dev/narwhals/pull/3016#discussion_r2288442696 --- narwhals/dataframe.py | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/narwhals/dataframe.py b/narwhals/dataframe.py index 0a682405db..01068dd39c 100644 --- a/narwhals/dataframe.py +++ b/narwhals/dataframe.py @@ -432,23 +432,23 @@ def __set_name__(self, owner: type[Any], name: str) -> None: self.__name__: str = name @overload - def __get__(self, instance: DataFrame[pl.DataFrame], owner: Any) -> _PolarsImpl: ... + def __get__(self, instance: BaseFrame[pl.DataFrame], owner: Any) -> _PolarsImpl: ... @overload - def __get__(self, instance: DataFrame[pd.DataFrame], owner: Any) -> _PandasImpl: ... + def __get__(self, instance: BaseFrame[pd.DataFrame], owner: Any) -> _PandasImpl: ... @overload - def __get__(self, instance: DataFrame[_ModinDataFrame], owner: Any) -> _ModinImpl: ... + def __get__(self, instance: BaseFrame[_ModinDataFrame], owner: Any) -> _ModinImpl: ... @overload # oof, looks like these two need their names aligned 😅 - def __get__(self, instance: DataFrame[_CuDFDataFrame], owner: Any) -> _CudfImpl: ... + def __get__(self, instance: BaseFrame[_CuDFDataFrame], owner: Any) -> _CudfImpl: ... @overload def __get__( - self, instance: DataFrame[_NativePandasLikeDataFrame], owner: Any + self, instance: BaseFrame[_NativePandasLikeDataFrame], owner: Any ) -> _PandasLikeImpl: ... @overload - def __get__(self, instance: DataFrame[pa.Table], owner: Any) -> _ArrowImpl: ... + def __get__(self, instance: BaseFrame[pa.Table], owner: Any) -> _ArrowImpl: ... @overload def __get__( - self, instance: DataFrame[pl.DataFrame | pd.DataFrame | pa.Table], owner: Any + self, instance: BaseFrame[pl.DataFrame | pd.DataFrame | pa.Table], owner: Any ) -> _PolarsImpl | _PandasImpl | _ArrowImpl: ... @overload def __get__(self, instance: None, owner: Any) -> Self: ... From 685409c022be5e1484c70606073d0c890015b0c4 Mon Sep 17 00:00:00 2001 From: dangotbanned <125183946+dangotbanned@users.noreply.github.com> Date: Thu, 21 Aug 2025 12:10:33 +0000 Subject: [PATCH 07/40] feat(typing): Get basic `LazyFrame.implementation` working the `pyspark` parts are going to be needed next --- narwhals/_namespace.py | 20 ++++-- narwhals/dataframe.py | 129 ++++++++++++++++------------------- tests/implementation_test.py | 5 ++ tests/namespace_test.py | 2 +- 4 files changed, 79 insertions(+), 77 deletions(-) diff --git a/narwhals/_namespace.py b/narwhals/_namespace.py index efe2a2bcdf..c605f0665e 100644 --- a/narwhals/_namespace.py +++ b/narwhals/_namespace.py @@ -10,6 +10,7 @@ Literal, Protocol, TypeVar, + cast, overload, ) @@ -37,8 +38,6 @@ import pandas as pd import polars as pl import pyarrow as pa - import pyspark.sql as pyspark_sql - from pyspark.sql.connect.dataframe import DataFrame as PySparkConnectDataFrame from typing_extensions import Self, TypeAlias, TypeIs from narwhals._arrow.namespace import ArrowNamespace @@ -112,6 +111,13 @@ class _ModinDataFrame(_BasePandasLikeFrame, Protocol): class _ModinSeries(_BasePandasLikeSeries, Protocol): _pandas_class: type[pd.Series[Any]] + # NOTE: Using `pyspark.sql.DataFrame` creates false positives in overloads when not installed + class _PySparkDataFrame(NativeLazyFrame, Protocol): + # Not on sqlframe classes + # Insane method name that no other framework would clobber + # https://github.com/apache/spark/blob/8530444e25b83971da4314c608aa7d763adeceb3/python/pyspark/sql/dataframe.py#L4875 + def dropDuplicatesWithinWatermark(self, *arg: Any, **kwargs: Any) -> Any: ... # noqa: N802 + _NativePolars: TypeAlias = "pl.DataFrame | pl.LazyFrame | pl.Series" _NativeArrow: TypeAlias = "pa.Table | pa.ChunkedArray[Any]" _NativeDuckDB: TypeAlias = "duckdb.DuckDBPyRelation" @@ -124,8 +130,8 @@ class _ModinSeries(_BasePandasLikeSeries, Protocol): ) _NativePandasLike: TypeAlias = "_NativePandasLikeDataFrame |_NativePandasLikeSeries" _NativeSQLFrame: TypeAlias = "SQLFrameDataFrame" - _NativePySpark: TypeAlias = "pyspark_sql.DataFrame" - _NativePySparkConnect: TypeAlias = "PySparkConnectDataFrame" + _NativePySpark: TypeAlias = _PySparkDataFrame + _NativePySparkConnect: TypeAlias = _PySparkDataFrame _NativeSparkLike: TypeAlias = ( "_NativeSQLFrame | _NativePySpark | _NativePySparkConnect" ) @@ -371,8 +377,10 @@ def is_native_dask(obj: Any) -> TypeIs[_NativeDask]: is_native_duckdb: _Guard[_NativeDuckDB] = is_duckdb_relation is_native_sqlframe: _Guard[_NativeSQLFrame] = is_sqlframe_dataframe -is_native_pyspark: _Guard[_NativePySpark] = is_pyspark_dataframe -is_native_pyspark_connect: _Guard[_NativePySparkConnect] = is_pyspark_connect_dataframe +is_native_pyspark = cast("_Guard[_NativePySpark]", is_pyspark_dataframe) +is_native_pyspark_connect = cast( + "_Guard[_NativePySparkConnect]", is_pyspark_connect_dataframe +) def is_native_pandas(obj: Any) -> TypeIs[_NativePandas]: diff --git a/narwhals/dataframe.py b/narwhals/dataframe.py index 01068dd39c..aa11adf719 100644 --- a/narwhals/dataframe.py +++ b/narwhals/dataframe.py @@ -87,6 +87,7 @@ _ArrowImpl, _CudfImpl, _EagerAllowedImpl, + _LazyAllowedImpl, _ModinImpl, _PandasImpl, _PandasLikeImpl, @@ -123,10 +124,68 @@ MultiIndexSelector: TypeAlias = "_MultiIndexSelector[Series[Any]]" +class _ImplDescriptor: + def __set_name__(self, owner: type[Any], name: str) -> None: + self.__name__: str = name + + @overload + def __get__( + self, instance: DataFrame[pl.DataFrame] | LazyFrame[pl.LazyFrame], owner: Any + ) -> _PolarsImpl: ... + @overload + def __get__(self, instance: BaseFrame[pd.DataFrame], owner: Any) -> _PandasImpl: ... + @overload + def __get__(self, instance: BaseFrame[_ModinDataFrame], owner: Any) -> _ModinImpl: ... + + @overload # oof, looks like these two need their names aligned 😅 + def __get__(self, instance: BaseFrame[_CuDFDataFrame], owner: Any) -> _CudfImpl: ... + @overload + def __get__( + self, instance: BaseFrame[_NativePandasLikeDataFrame], owner: Any + ) -> _PandasLikeImpl: ... + @overload + def __get__(self, instance: BaseFrame[pa.Table], owner: Any) -> _ArrowImpl: ... + @overload + def __get__( + self, instance: BaseFrame[pl.DataFrame | pd.DataFrame | pa.Table], owner: Any + ) -> _PolarsImpl | _PandasImpl | _ArrowImpl: ... + @overload + def __get__(self, instance: None, owner: Any) -> Self: ... + @overload + def __get__(self, instance: DataFrame[Any], owner: Any) -> _EagerAllowedImpl: ... + @overload + def __get__(self, instance: LazyFrame[Any], owner: Any) -> _LazyAllowedImpl: ... + def __get__(self, instance: Any | None, owner: Any) -> Any: + if instance is None: # pragma: no cover + return self + return instance._compliant_frame._implementation + + class BaseFrame(Generic[_FrameT]): _compliant_frame: Any _level: Literal["full", "lazy", "interchange"] + implementation: _ImplDescriptor = _ImplDescriptor() + """Return implementation of native frame. + + This can be useful when you need to use special-casing for features outside of + Narwhals' scope - for example, when dealing with pandas' Period Dtype. + + Examples: + >>> import narwhals as nw + >>> import pandas as pd + >>> df_native = pd.DataFrame({"a": [1, 2, 3]}) + >>> df = nw.from_native(df_native) + >>> df.implementation + + >>> df.implementation.is_pandas() + True + >>> df.implementation.is_pandas_like() + True + >>> df.implementation.is_polars() + False + """ + def __native_namespace__(self) -> ModuleType: return self._compliant_frame.__native_namespace__() # type: ignore[no-any-return] @@ -427,39 +486,6 @@ def explode(self, columns: str | Sequence[str], *more_columns: str) -> Self: return self._with_compliant(self._compliant_frame.explode(columns=to_explode)) -class _ImplDescriptor: - def __set_name__(self, owner: type[Any], name: str) -> None: - self.__name__: str = name - - @overload - def __get__(self, instance: BaseFrame[pl.DataFrame], owner: Any) -> _PolarsImpl: ... - @overload - def __get__(self, instance: BaseFrame[pd.DataFrame], owner: Any) -> _PandasImpl: ... - @overload - def __get__(self, instance: BaseFrame[_ModinDataFrame], owner: Any) -> _ModinImpl: ... - - @overload # oof, looks like these two need their names aligned 😅 - def __get__(self, instance: BaseFrame[_CuDFDataFrame], owner: Any) -> _CudfImpl: ... - @overload - def __get__( - self, instance: BaseFrame[_NativePandasLikeDataFrame], owner: Any - ) -> _PandasLikeImpl: ... - @overload - def __get__(self, instance: BaseFrame[pa.Table], owner: Any) -> _ArrowImpl: ... - @overload - def __get__( - self, instance: BaseFrame[pl.DataFrame | pd.DataFrame | pa.Table], owner: Any - ) -> _PolarsImpl | _PandasImpl | _ArrowImpl: ... - @overload - def __get__(self, instance: None, owner: Any) -> Self: ... - @overload - def __get__(self, instance: DataFrame[Any], owner: Any) -> _EagerAllowedImpl: ... - def __get__(self, instance: Any | None, owner: Any) -> Any: - if instance is None: # pragma: no cover - return self - return instance._compliant_frame._implementation - - class DataFrame(BaseFrame[DataFrameT]): """Narwhals DataFrame, backed by a native eager dataframe. @@ -712,27 +738,6 @@ def from_numpy( ) raise ValueError(msg) - implementation: _ImplDescriptor = _ImplDescriptor() - """Return implementation of native frame. - - This can be useful when you need to use special-casing for features outside of - Narwhals' scope - for example, when dealing with pandas' Period Dtype. - - Examples: - >>> import narwhals as nw - >>> import pandas as pd - >>> df_native = pd.DataFrame({"a": [1, 2, 3]}) - >>> df = nw.from_native(df_native) - >>> df.implementation - - >>> df.implementation.is_pandas() - True - >>> df.implementation.is_pandas_like() - True - >>> df.implementation.is_polars() - False - """ - def __len__(self) -> int: return self._compliant_frame.__len__() @@ -2345,22 +2350,6 @@ def __init__(self, df: Any, *, level: Literal["full", "lazy", "interchange"]) -> def __repr__(self) -> str: # pragma: no cover return generate_repr("Narwhals LazyFrame", self.to_native().__repr__()) - @property - def implementation(self) -> Implementation: - """Return implementation of native frame. - - This can be useful when you need to use special-casing for features outside of - Narwhals' scope - for example, when dealing with pandas' Period Dtype. - - Examples: - >>> import narwhals as nw - >>> import dask.dataframe as dd - >>> lf_native = dd.from_dict({"a": [1, 2]}, npartitions=1) - >>> nw.from_native(lf_native).implementation - - """ - return self._compliant_frame._implementation - def __getitem__(self, item: str | slice) -> NoReturn: msg = "Slicing is not supported on LazyFrame" raise TypeError(msg) diff --git a/tests/implementation_test.py b/tests/implementation_test.py index 6c745e5962..46205cfd66 100644 --- a/tests/implementation_test.py +++ b/tests/implementation_test.py @@ -77,14 +77,19 @@ def test_implementation_typing() -> None: # noqa: PLR0914 data: dict[str, Any] = {"a": [1, 2, 3]} polars_df = nw.from_native(pl.DataFrame(data)) + polars_ldf = nw.from_native(pl.LazyFrame(data)) pandas_df = nw.from_native(pd.DataFrame(data)) arrow_df = nw.from_native(pa.table(data)) polars_impl = polars_df.implementation + lazy_polars_impl = polars_ldf.implementation pandas_impl = pandas_df.implementation arrow_impl = arrow_df.implementation assert_type(polars_impl, _PolarsImpl) + assert_type(lazy_polars_impl, _PolarsImpl) + # NOTE: Testing the lazy versions of pandas/pyarrow would require adding overloads to `DataFrame.lazy` + # Currently, everything becomes `LazyFrame[Any]` assert_type(pandas_impl, _PandasImpl) assert_type(arrow_impl, _ArrowImpl) diff --git a/tests/namespace_test.py b/tests/namespace_test.py index b1eae1df66..1445c0aad6 100644 --- a/tests/namespace_test.py +++ b/tests/namespace_test.py @@ -81,7 +81,7 @@ def test_namespace_from_native_object(constructor: Constructor) -> None: def test_namespace_from_native_object_invalid() -> None: data = {"a": [1, 2, 3], "b": [4, 5, 6]} with pytest.raises(TypeError, match=r"dict"): - Namespace.from_native_object(data) # pyright: ignore[reportCallIssue, reportArgumentType] + Namespace.from_native_object(data) # type: ignore[call-overload] @eager_allowed From 0f83e447b4d41076ad79daed5c0f4c25d65ccae9 Mon Sep 17 00:00:00 2001 From: dangotbanned <125183946+dangotbanned@users.noreply.github.com> Date: Thu, 21 Aug 2025 12:49:47 +0000 Subject: [PATCH 08/40] kinda support dask --- narwhals/_namespace.py | 2 +- narwhals/dataframe.py | 4 ++++ tests/implementation_test.py | 22 +++++++++++++++++++--- 3 files changed, 24 insertions(+), 4 deletions(-) diff --git a/narwhals/_namespace.py b/narwhals/_namespace.py index c605f0665e..14a52511e6 100644 --- a/narwhals/_namespace.py +++ b/narwhals/_namespace.py @@ -90,7 +90,7 @@ class _BasePandasLikeFrame(NativeDataFrame, _BasePandasLike, Protocol): ... class _BasePandasLikeSeries(NativeSeries, _BasePandasLike, Protocol): def where(self, cond: Any, other: Any = ..., **kwds: Any) -> Any: ... - class _NativeDask(Protocol): + class _NativeDask(NativeLazyFrame, Protocol): _partition_type: type[pd.DataFrame] class _CuDFDataFrame(_BasePandasLikeFrame, Protocol): diff --git a/narwhals/dataframe.py b/narwhals/dataframe.py index aa11adf719..83d22a523a 100644 --- a/narwhals/dataframe.py +++ b/narwhals/dataframe.py @@ -74,6 +74,7 @@ from narwhals._namespace import ( _CuDFDataFrame, _ModinDataFrame, + _NativeDask, _NativePandasLikeDataFrame, ) from narwhals._translate import IntoArrowTable @@ -86,6 +87,7 @@ Polars, _ArrowImpl, _CudfImpl, + _DaskImpl, _EagerAllowedImpl, _LazyAllowedImpl, _ModinImpl, @@ -150,6 +152,8 @@ def __get__( self, instance: BaseFrame[pl.DataFrame | pd.DataFrame | pa.Table], owner: Any ) -> _PolarsImpl | _PandasImpl | _ArrowImpl: ... @overload + def __get__(self, instance: LazyFrame[_NativeDask], owner: Any) -> _DaskImpl: ... + @overload def __get__(self, instance: None, owner: Any) -> Self: ... @overload def __get__(self, instance: DataFrame[Any], owner: Any) -> _EagerAllowedImpl: ... diff --git a/tests/implementation_test.py b/tests/implementation_test.py index 46205cfd66..764551743f 100644 --- a/tests/implementation_test.py +++ b/tests/implementation_test.py @@ -1,6 +1,8 @@ from __future__ import annotations -from typing import TYPE_CHECKING, Any +# Using pyright's assert type instead +# mypy: disable-error-code="assert-type" +from typing import TYPE_CHECKING, Any, cast import pytest @@ -9,7 +11,9 @@ if TYPE_CHECKING: from narwhals._typing import ( _ArrowImpl, + _DaskImpl, _EagerAllowedImpl, + _LazyAllowedImpl, _ModinImpl, _PandasImpl, _PolarsImpl, @@ -69,6 +73,7 @@ def test_implementation_new(member: str, value: str) -> None: if TYPE_CHECKING: def test_implementation_typing() -> None: # noqa: PLR0914 + import dask.dataframe as dd import modin.pandas as mpd import pandas as pd import polars as pl @@ -100,7 +105,18 @@ def test_implementation_typing() -> None: # noqa: PLR0914 # - `mypy` won't ever work, treats as `Any` # - `pyright` can resolve `modin_df: narwhals.dataframe.DataFrame[modin.pandas.dataframe.DataFrame]` # - But we run into variance issues if trying to widen the concrete type again - assert_type(modin_impl, _ModinImpl) # type: ignore[assert-type] + assert_type(modin_impl, _ModinImpl) # pyright: ignore[reportAssertTypeFailure] + # If ^^^ can be fixed, the next one should be removed + assert_type(modin_impl, _EagerAllowedImpl) + + # NOTE: Constructor returns `Unknown` + dask_native = cast("dd.DataFrame", dd.DataFrame.from_dict(data)) + dask_ldf = nw.from_native(dask_native) + dask_impl = dask_ldf.implementation + # NOTE: Same issue as modin + assert_type(dask_impl, _DaskImpl) # pyright: ignore[reportAssertTypeFailure] + # If ^^^ can be fixed, the next one should be removed + assert_type(dask_impl, _LazyAllowedImpl) can_lazyframe_collect_dfs: list[ nw.DataFrame[pl.DataFrame] @@ -115,7 +131,7 @@ def test_implementation_typing() -> None: # noqa: PLR0914 # TODO @dangotbanned: Is this so bad? # - Currently `DataFrame[Any]` matches the first overload (`_PolarsImpl`) # - That is accepted **everywhere** that uses `IntoBackend` - assert_type(very_lost_impl, _EagerAllowedImpl) # type: ignore[assert-type] + assert_type(very_lost_impl, _EagerAllowedImpl) # pyright: ignore[reportAssertTypeFailure] not_so_lost_df = nw.DataFrame.__new__(nw.DataFrame[IntoDataFrame]) not_so_lost_impl = not_so_lost_df.implementation From 49a10bd09762ca7d27676fa37db0c0b6e1cdbd07 Mon Sep 17 00:00:00 2001 From: dangotbanned <125183946+dangotbanned@users.noreply.github.com> Date: Thu, 21 Aug 2025 12:53:44 +0000 Subject: [PATCH 09/40] ci: try include `dask` in typing? https://github.com/narwhals-dev/narwhals/actions/runs/17127403666/job/48582334595?pr=3016 --- pyproject.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pyproject.toml b/pyproject.toml index 62511b0e26..a231c7ee70 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -74,7 +74,7 @@ typing = [ # keep some of these pinned and bump periodically so there's fewer s "sqlframe", "polars==1.32.2", "uv", - "narwhals[ibis]", + "narwhals[ibis,dask]", ] docs = [ "black", # required by mkdocstrings_handlers From fd2b93e7203f5aa47d7e8f3ca2e75b8fb740afe7 Mon Sep 17 00:00:00 2001 From: dangotbanned <125183946+dangotbanned@users.noreply.github.com> Date: Thu, 21 Aug 2025 12:56:07 +0000 Subject: [PATCH 10/40] aaaaand `modin` as well https://github.com/narwhals-dev/narwhals/actions/runs/17127503198/job/48582656870?pr=3016 --- pyproject.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pyproject.toml b/pyproject.toml index a231c7ee70..ec9da9a99f 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -74,7 +74,7 @@ typing = [ # keep some of these pinned and bump periodically so there's fewer s "sqlframe", "polars==1.32.2", "uv", - "narwhals[ibis,dask]", + "narwhals[ibis,dask,modin]", ] docs = [ "black", # required by mkdocstrings_handlers From 618ce8c1f2eb713e907ca2e7451e0a1eb63665d5 Mon Sep 17 00:00:00 2001 From: dangotbanned <125183946+dangotbanned@users.noreply.github.com> Date: Thu, 21 Aug 2025 17:00:45 +0000 Subject: [PATCH 11/40] feat(typing): `duckdb` & `sqlframe` work! --- narwhals/dataframe.py | 10 ++++++++++ tests/implementation_test.py | 13 +++++++++++++ 2 files changed, 23 insertions(+) diff --git a/narwhals/dataframe.py b/narwhals/dataframe.py index 83d22a523a..e530391ea9 100644 --- a/narwhals/dataframe.py +++ b/narwhals/dataframe.py @@ -75,7 +75,9 @@ _CuDFDataFrame, _ModinDataFrame, _NativeDask, + _NativeDuckDB, _NativePandasLikeDataFrame, + _NativeSQLFrame, ) from narwhals._translate import IntoArrowTable from narwhals._typing import ( @@ -88,12 +90,14 @@ _ArrowImpl, _CudfImpl, _DaskImpl, + _DuckDBImpl, _EagerAllowedImpl, _LazyAllowedImpl, _ModinImpl, _PandasImpl, _PandasLikeImpl, _PolarsImpl, + _SQLFrameImpl, ) from narwhals.group_by import GroupBy, LazyGroupBy from narwhals.typing import ( @@ -152,6 +156,12 @@ def __get__( self, instance: BaseFrame[pl.DataFrame | pd.DataFrame | pa.Table], owner: Any ) -> _PolarsImpl | _PandasImpl | _ArrowImpl: ... @overload + def __get__(self, instance: LazyFrame[_NativeDuckDB], owner: Any) -> _DuckDBImpl: ... + @overload + def __get__( + self, instance: LazyFrame[_NativeSQLFrame], owner: Any + ) -> _SQLFrameImpl: ... + @overload def __get__(self, instance: LazyFrame[_NativeDask], owner: Any) -> _DaskImpl: ... @overload def __get__(self, instance: None, owner: Any) -> Self: ... diff --git a/tests/implementation_test.py b/tests/implementation_test.py index 764551743f..59d0180222 100644 --- a/tests/implementation_test.py +++ b/tests/implementation_test.py @@ -12,11 +12,13 @@ from narwhals._typing import ( _ArrowImpl, _DaskImpl, + _DuckDBImpl, _EagerAllowedImpl, _LazyAllowedImpl, _ModinImpl, _PandasImpl, _PolarsImpl, + _SQLFrameImpl, ) from narwhals.typing import IntoDataFrame @@ -80,16 +82,25 @@ def test_implementation_typing() -> None: # noqa: PLR0914 import pyarrow as pa from typing_extensions import assert_type + from tests.conftest import ( + duckdb_lazy_constructor, + sqlframe_pyspark_lazy_constructor, + ) + data: dict[str, Any] = {"a": [1, 2, 3]} polars_df = nw.from_native(pl.DataFrame(data)) polars_ldf = nw.from_native(pl.LazyFrame(data)) pandas_df = nw.from_native(pd.DataFrame(data)) arrow_df = nw.from_native(pa.table(data)) + duckdb_ldf = nw.from_native(duckdb_lazy_constructor(data)) + sqlframe_ldf = nw.from_native(sqlframe_pyspark_lazy_constructor(data)) polars_impl = polars_df.implementation lazy_polars_impl = polars_ldf.implementation pandas_impl = pandas_df.implementation arrow_impl = arrow_df.implementation + duckdb_impl = duckdb_ldf.implementation + sqlframe_impl = sqlframe_ldf.implementation assert_type(polars_impl, _PolarsImpl) assert_type(lazy_polars_impl, _PolarsImpl) @@ -97,6 +108,8 @@ def test_implementation_typing() -> None: # noqa: PLR0914 # Currently, everything becomes `LazyFrame[Any]` assert_type(pandas_impl, _PandasImpl) assert_type(arrow_impl, _ArrowImpl) + assert_type(duckdb_impl, _DuckDBImpl) + assert_type(sqlframe_impl, _SQLFrameImpl) modin_native = mpd.DataFrame.from_dict(data) modin_df = nw.from_native(modin_native) From 0606a14c86972fade1618efcc07989bb8b9631d0 Mon Sep 17 00:00:00 2001 From: dangotbanned <125183946+dangotbanned@users.noreply.github.com> Date: Thu, 21 Aug 2025 18:02:54 +0000 Subject: [PATCH 12/40] kinda support `ibis` --- narwhals/dataframe.py | 4 ++++ tests/implementation_test.py | 9 +++++++++ 2 files changed, 13 insertions(+) diff --git a/narwhals/dataframe.py b/narwhals/dataframe.py index e530391ea9..52105aa339 100644 --- a/narwhals/dataframe.py +++ b/narwhals/dataframe.py @@ -76,6 +76,7 @@ _ModinDataFrame, _NativeDask, _NativeDuckDB, + _NativeIbis, _NativePandasLikeDataFrame, _NativeSQLFrame, ) @@ -92,6 +93,7 @@ _DaskImpl, _DuckDBImpl, _EagerAllowedImpl, + _IbisImpl, _LazyAllowedImpl, _ModinImpl, _PandasImpl, @@ -164,6 +166,8 @@ def __get__( @overload def __get__(self, instance: LazyFrame[_NativeDask], owner: Any) -> _DaskImpl: ... @overload + def __get__(self, instance: LazyFrame[_NativeIbis], owner: Any) -> _IbisImpl: ... + @overload def __get__(self, instance: None, owner: Any) -> Self: ... @overload def __get__(self, instance: DataFrame[Any], owner: Any) -> _EagerAllowedImpl: ... diff --git a/tests/implementation_test.py b/tests/implementation_test.py index 59d0180222..58d2e857be 100644 --- a/tests/implementation_test.py +++ b/tests/implementation_test.py @@ -14,6 +14,7 @@ _DaskImpl, _DuckDBImpl, _EagerAllowedImpl, + _IbisImpl, _LazyAllowedImpl, _ModinImpl, _PandasImpl, @@ -84,6 +85,7 @@ def test_implementation_typing() -> None: # noqa: PLR0914 from tests.conftest import ( duckdb_lazy_constructor, + ibis_lazy_constructor, sqlframe_pyspark_lazy_constructor, ) @@ -94,6 +96,7 @@ def test_implementation_typing() -> None: # noqa: PLR0914 arrow_df = nw.from_native(pa.table(data)) duckdb_ldf = nw.from_native(duckdb_lazy_constructor(data)) sqlframe_ldf = nw.from_native(sqlframe_pyspark_lazy_constructor(data)) + ibis_ldf = nw.from_native(ibis_lazy_constructor(data)) polars_impl = polars_df.implementation lazy_polars_impl = polars_ldf.implementation @@ -101,6 +104,7 @@ def test_implementation_typing() -> None: # noqa: PLR0914 arrow_impl = arrow_df.implementation duckdb_impl = duckdb_ldf.implementation sqlframe_impl = sqlframe_ldf.implementation + ibis_impl = ibis_ldf.implementation assert_type(polars_impl, _PolarsImpl) assert_type(lazy_polars_impl, _PolarsImpl) @@ -131,6 +135,11 @@ def test_implementation_typing() -> None: # noqa: PLR0914 # If ^^^ can be fixed, the next one should be removed assert_type(dask_impl, _LazyAllowedImpl) + # NOTE: Also same issue 🤔 + # TODO @dangotbanned: try something else instead + assert_type(ibis_impl, _IbisImpl) # pyright: ignore[reportAssertTypeFailure] + assert_type(dask_impl, _LazyAllowedImpl) + can_lazyframe_collect_dfs: list[ nw.DataFrame[pl.DataFrame] | nw.DataFrame[pd.DataFrame] From 37aaa6922b28afe427a9cf1e3fff233997d9f131 Mon Sep 17 00:00:00 2001 From: dangotbanned <125183946+dangotbanned@users.noreply.github.com> Date: Thu, 21 Aug 2025 18:43:43 +0000 Subject: [PATCH 13/40] test(typing): Simplify Any/Into, also test lazy Related https://github.com/narwhals-dev/narwhals/pull/3016#discussion_r2291784745 --- tests/implementation_test.py | 24 +++++++++++++++--------- 1 file changed, 15 insertions(+), 9 deletions(-) diff --git a/tests/implementation_test.py b/tests/implementation_test.py index 58d2e857be..460a7ed228 100644 --- a/tests/implementation_test.py +++ b/tests/implementation_test.py @@ -21,7 +21,7 @@ _PolarsImpl, _SQLFrameImpl, ) - from narwhals.typing import IntoDataFrame + from narwhals.typing import IntoDataFrame, IntoLazyFrame def test_implementation_pandas() -> None: @@ -75,7 +75,7 @@ def test_implementation_new(member: str, value: str) -> None: if TYPE_CHECKING: - def test_implementation_typing() -> None: # noqa: PLR0914 + def test_implementation_typing() -> None: # noqa: PLR0914, PLR0915 import dask.dataframe as dd import modin.pandas as mpd import pandas as pd @@ -97,6 +97,10 @@ def test_implementation_typing() -> None: # noqa: PLR0914 duckdb_ldf = nw.from_native(duckdb_lazy_constructor(data)) sqlframe_ldf = nw.from_native(sqlframe_pyspark_lazy_constructor(data)) ibis_ldf = nw.from_native(ibis_lazy_constructor(data)) + any_df = cast("nw.DataFrame[Any]", "fake df 1") + any_ldf = cast("nw.LazyFrame[Any]", "fake ldf 1") + bound_df = cast("nw.DataFrame[IntoDataFrame]", "fake df 2") + bound_ldf = cast("nw.LazyFrame[IntoLazyFrame]", "fake ldf 2") polars_impl = polars_df.implementation lazy_polars_impl = polars_ldf.implementation @@ -148,13 +152,15 @@ def test_implementation_typing() -> None: # noqa: PLR0914 can_lazyframe_collect_impl = can_lazyframe_collect_dfs[0].implementation assert_type(can_lazyframe_collect_impl, _PolarsImpl | _PandasImpl | _ArrowImpl) - very_lost_df = nw.DataFrame.__new__(nw.DataFrame) - very_lost_impl = very_lost_df.implementation + any_df_impl = any_df.implementation + any_ldf_impl = any_ldf.implementation # TODO @dangotbanned: Is this so bad? - # - Currently `DataFrame[Any]` matches the first overload (`_PolarsImpl`) + # - Currently `DataFrame[Any] | LazyFrame[Any]` matches the first overload (`_PolarsImpl`) # - That is accepted **everywhere** that uses `IntoBackend` - assert_type(very_lost_impl, _EagerAllowedImpl) # pyright: ignore[reportAssertTypeFailure] + assert_type(any_df_impl, _EagerAllowedImpl) # pyright: ignore[reportAssertTypeFailure] + assert_type(any_ldf_impl, _LazyAllowedImpl) # pyright: ignore[reportAssertTypeFailure] - not_so_lost_df = nw.DataFrame.__new__(nw.DataFrame[IntoDataFrame]) - not_so_lost_impl = not_so_lost_df.implementation - assert_type(not_so_lost_impl, _EagerAllowedImpl) + bound_df_impl = bound_df.implementation + assert_type(bound_df_impl, _EagerAllowedImpl) + bound_ldf_impl = bound_ldf.implementation + assert_type(bound_ldf_impl, _LazyAllowedImpl) From 141b68732b5569967e8d49129f92d1874cdf4fbc Mon Sep 17 00:00:00 2001 From: dangotbanned <125183946+dangotbanned@users.noreply.github.com> Date: Thu, 21 Aug 2025 19:57:59 +0000 Subject: [PATCH 14/40] test(typing): Add `DataFrame.lazy` suite Towards https://github.com/narwhals-dev/narwhals/pull/3016#discussion_r2291875889 --- tests/implementation_test.py | 23 +++++++++++++++++++++++ 1 file changed, 23 insertions(+) diff --git a/tests/implementation_test.py b/tests/implementation_test.py index 460a7ed228..3ff5f3d085 100644 --- a/tests/implementation_test.py +++ b/tests/implementation_test.py @@ -164,3 +164,26 @@ def test_implementation_typing() -> None: # noqa: PLR0914, PLR0915 assert_type(bound_df_impl, _EagerAllowedImpl) bound_ldf_impl = bound_ldf.implementation assert_type(bound_ldf_impl, _LazyAllowedImpl) + + # NOTE: `DataFrame.lazy` + # [True Positive] + any_df.lazy(polars_ldf.implementation) + any_df.lazy(polars_df.implementation) + any_df.lazy(duckdb_ldf.implementation) + + # [True Negative] + any_df.lazy(pandas_df.implementation) # type: ignore[arg-type] + any_df.lazy(arrow_df.implementation) # type: ignore[arg-type] + any_df.lazy(modin_df.implementation) # pyright: ignore[reportArgumentType] + any_df.lazy(sqlframe_ldf.implementation) # type: ignore[arg-type] + any_df.lazy(bound_ldf.implementation) # type: ignore[arg-type] + any_df.lazy(bound_df.implementation) # type: ignore[arg-type] + any_df.lazy(can_lazyframe_collect_dfs[0].implementation) # type: ignore[arg-type] + + # [False Positive] + any_df.lazy(any_ldf.implementation) + any_df.lazy(any_df.implementation) + + # [False Negative] + any_df.lazy(ibis_ldf.implementation) # pyright: ignore[reportArgumentType] + any_df.lazy(dask_ldf.implementation) # pyright: ignore[reportArgumentType] From cabedd42030f8d1a20974327999330245efbcb88 Mon Sep 17 00:00:00 2001 From: dangotbanned <125183946+dangotbanned@users.noreply.github.com> Date: Fri, 22 Aug 2025 12:57:14 +0000 Subject: [PATCH 15/40] refactor: Prepare for `Series` support Avoids the need for runtime branching in the descriptor --- narwhals/dataframe.py | 14 +++++++++++++- narwhals/series.py | 4 ++++ 2 files changed, 17 insertions(+), 1 deletion(-) diff --git a/narwhals/dataframe.py b/narwhals/dataframe.py index 52105aa339..b54d6a9a31 100644 --- a/narwhals/dataframe.py +++ b/narwhals/dataframe.py @@ -176,7 +176,7 @@ def __get__(self, instance: LazyFrame[Any], owner: Any) -> _LazyAllowedImpl: ... def __get__(self, instance: Any | None, owner: Any) -> Any: if instance is None: # pragma: no cover return self - return instance._compliant_frame._implementation + return instance._compliant._implementation class BaseFrame(Generic[_FrameT]): @@ -204,6 +204,10 @@ class BaseFrame(Generic[_FrameT]): False """ + @property + @abstractmethod + def _compliant(self) -> Any: ... + def __native_namespace__(self) -> ModuleType: return self._compliant_frame.__native_namespace__() # type: ignore[no-any-return] @@ -531,6 +535,10 @@ class DataFrame(BaseFrame[DataFrameT]): _version: ClassVar[Version] = Version.MAIN + @property + def _compliant(self) -> CompliantDataFrame[Any, Any, DataFrameT, Self]: + return self._compliant_frame + def _extract_compliant(self, arg: Any) -> Any: from narwhals.expr import Expr from narwhals.series import Series @@ -2305,6 +2313,10 @@ class LazyFrame(BaseFrame[LazyFrameT]): ``` """ + @property + def _compliant(self) -> CompliantLazyFrame[Any, LazyFrameT, Self]: + return self._compliant_frame + def _extract_compliant(self, arg: Any) -> Any: from narwhals.expr import Expr from narwhals.series import Series diff --git a/narwhals/series.py b/narwhals/series.py index 0d0d36617d..1cb34bceec 100644 --- a/narwhals/series.py +++ b/narwhals/series.py @@ -77,6 +77,10 @@ class Series(Generic[IntoSeriesT]): _version: ClassVar[Version] = Version.MAIN + @property + def _compliant(self) -> CompliantSeries[IntoSeriesT]: + return self._compliant_series + @property def _dataframe(self) -> type[DataFrame[Any]]: from narwhals.dataframe import DataFrame From 71c5163145dd8b500b5c2f5ca726af06e0e00810 Mon Sep 17 00:00:00 2001 From: dangotbanned <125183946+dangotbanned@users.noreply.github.com> Date: Fri, 22 Aug 2025 12:59:00 +0000 Subject: [PATCH 16/40] extend this overload abomination will work on simplifying later --- narwhals/dataframe.py | 32 +++++++++++++++++++++++++------- 1 file changed, 25 insertions(+), 7 deletions(-) diff --git a/narwhals/dataframe.py b/narwhals/dataframe.py index b54d6a9a31..015d4b8494 100644 --- a/narwhals/dataframe.py +++ b/narwhals/dataframe.py @@ -78,6 +78,7 @@ _NativeDuckDB, _NativeIbis, _NativePandasLikeDataFrame, + _NativePandasLikeSeries, _NativeSQLFrame, ) from narwhals._translate import IntoArrowTable @@ -138,10 +139,14 @@ def __set_name__(self, owner: type[Any], name: str) -> None: @overload def __get__( - self, instance: DataFrame[pl.DataFrame] | LazyFrame[pl.LazyFrame], owner: Any + self, + instance: DataFrame[pl.DataFrame] | LazyFrame[pl.LazyFrame] | Series[pl.Series], + owner: Any, ) -> _PolarsImpl: ... @overload - def __get__(self, instance: BaseFrame[pd.DataFrame], owner: Any) -> _PandasImpl: ... + def __get__( + self, instance: BaseFrame[pd.DataFrame] | Series[pd.DataFrame], owner: Any + ) -> _PandasImpl: ... @overload def __get__(self, instance: BaseFrame[_ModinDataFrame], owner: Any) -> _ModinImpl: ... @@ -149,13 +154,20 @@ def __get__(self, instance: BaseFrame[_ModinDataFrame], owner: Any) -> _ModinImp def __get__(self, instance: BaseFrame[_CuDFDataFrame], owner: Any) -> _CudfImpl: ... @overload def __get__( - self, instance: BaseFrame[_NativePandasLikeDataFrame], owner: Any + self, + instance: BaseFrame[_NativePandasLikeDataFrame] | Series[_NativePandasLikeSeries], + owner: Any, ) -> _PandasLikeImpl: ... @overload - def __get__(self, instance: BaseFrame[pa.Table], owner: Any) -> _ArrowImpl: ... + def __get__( + self, instance: BaseFrame[pa.Table] | Series[pa.ChunkedArray[Any]], owner: Any + ) -> _ArrowImpl: ... @overload def __get__( - self, instance: BaseFrame[pl.DataFrame | pd.DataFrame | pa.Table], owner: Any + self, + instance: BaseFrame[pl.DataFrame | pd.DataFrame | pa.Table] + | Series[pl.Series | pd.Series[Any] | pa.ChunkedArray[Any]], + owner: Any, ) -> _PolarsImpl | _PandasImpl | _ArrowImpl: ... @overload def __get__(self, instance: LazyFrame[_NativeDuckDB], owner: Any) -> _DuckDBImpl: ... @@ -170,10 +182,16 @@ def __get__(self, instance: LazyFrame[_NativeIbis], owner: Any) -> _IbisImpl: .. @overload def __get__(self, instance: None, owner: Any) -> Self: ... @overload - def __get__(self, instance: DataFrame[Any], owner: Any) -> _EagerAllowedImpl: ... + def __get__( + self, instance: DataFrame[Any] | Series[Any], owner: Any + ) -> _EagerAllowedImpl: ... @overload def __get__(self, instance: LazyFrame[Any], owner: Any) -> _LazyAllowedImpl: ... - def __get__(self, instance: Any | None, owner: Any) -> Any: + def __get__( + self, + instance: DataFrame[Any] | LazyFrame[Any] | BaseFrame[Any] | Series[Any] | None, + owner: Any, + ) -> Any: if instance is None: # pragma: no cover return self return instance._compliant._implementation From 2b7945bf0e36d41e9519f0c6dd4802d9a4fb21f3 Mon Sep 17 00:00:00 2001 From: dangotbanned <125183946+dangotbanned@users.noreply.github.com> Date: Fri, 22 Aug 2025 13:06:02 +0000 Subject: [PATCH 17/40] refactor: Move `_ImplDescriptor` to `_utils` --- narwhals/_utils.py | 89 ++++++++++++++++++++++++++++++++++++++- narwhals/dataframe.py | 96 +------------------------------------------ 2 files changed, 89 insertions(+), 96 deletions(-) diff --git a/narwhals/_utils.py b/narwhals/_utils.py index 0d7aa2d13d..d476580580 100644 --- a/narwhals/_utils.py +++ b/narwhals/_utils.py @@ -74,16 +74,37 @@ NativeSeriesT_co, ) from narwhals._compliant.typing import EvalNames, NativeLazyFrameT - from narwhals._namespace import Namespace + from narwhals._namespace import ( + Namespace, + _CuDFDataFrame, + _ModinDataFrame, + _NativeDask, + _NativeDuckDB, + _NativeIbis, + _NativePandasLikeDataFrame, + _NativePandasLikeSeries, + _NativeSQLFrame, + ) from narwhals._translate import ArrowStreamExportable, IntoArrowTable, ToNarwhalsT_co from narwhals._typing import ( Backend, IntoBackend, + _ArrowImpl, + _CudfImpl, + _DaskImpl, _DataFrameLazyImpl, + _DuckDBImpl, _EagerAllowedImpl, + _IbisImpl, + _LazyAllowedImpl, _LazyFrameCollectImpl, + _ModinImpl, + _PandasImpl, + _PandasLikeImpl, + _PolarsImpl, + _SQLFrameImpl, ) - from narwhals.dataframe import DataFrame, LazyFrame + from narwhals.dataframe import BaseFrame, DataFrame, LazyFrame from narwhals.dtypes import DType from narwhals.series import Series from narwhals.typing import ( @@ -2032,3 +2053,67 @@ def deep_attrgetter(attr: str, *nested: str) -> attrgetter[Any]: def deep_getattr(obj: Any, name_1: str, *nested: str) -> Any: """Perform a nested attribute lookup on `obj`.""" return deep_attrgetter(name_1, *nested)(obj) + + +class _ImplDescriptor: + def __set_name__(self, owner: type[Any], name: str) -> None: + self.__name__: str = name + + @overload + def __get__( + self, + instance: DataFrame[pl.DataFrame] | LazyFrame[pl.LazyFrame] | Series[pl.Series], + owner: Any, + ) -> _PolarsImpl: ... + @overload + def __get__( + self, instance: BaseFrame[pd.DataFrame] | Series[pd.DataFrame], owner: Any + ) -> _PandasImpl: ... + @overload + def __get__(self, instance: BaseFrame[_ModinDataFrame], owner: Any) -> _ModinImpl: ... + + @overload # oof, looks like these two need their names aligned 😅 + def __get__(self, instance: BaseFrame[_CuDFDataFrame], owner: Any) -> _CudfImpl: ... + @overload + def __get__( + self, + instance: BaseFrame[_NativePandasLikeDataFrame] | Series[_NativePandasLikeSeries], + owner: Any, + ) -> _PandasLikeImpl: ... + @overload + def __get__( + self, instance: BaseFrame[pa.Table] | Series[pa.ChunkedArray[Any]], owner: Any + ) -> _ArrowImpl: ... + @overload + def __get__( + self, + instance: BaseFrame[pl.DataFrame | pd.DataFrame | pa.Table] + | Series[pl.Series | pd.Series[Any] | pa.ChunkedArray[Any]], + owner: Any, + ) -> _PolarsImpl | _PandasImpl | _ArrowImpl: ... + @overload + def __get__(self, instance: LazyFrame[_NativeDuckDB], owner: Any) -> _DuckDBImpl: ... + @overload + def __get__( + self, instance: LazyFrame[_NativeSQLFrame], owner: Any + ) -> _SQLFrameImpl: ... + @overload + def __get__(self, instance: LazyFrame[_NativeDask], owner: Any) -> _DaskImpl: ... + @overload + def __get__(self, instance: LazyFrame[_NativeIbis], owner: Any) -> _IbisImpl: ... + @overload + def __get__(self, instance: None, owner: Any) -> Self: ... + @overload + def __get__( + self, instance: DataFrame[Any] | Series[Any], owner: Any + ) -> _EagerAllowedImpl: ... + @overload + def __get__(self, instance: LazyFrame[Any], owner: Any) -> _LazyAllowedImpl: ... + def __get__( + self, + instance: DataFrame[Any] | LazyFrame[Any] | BaseFrame[Any] | Series[Any] | None, + owner: Any, + ) -> Any: + if instance is None: # pragma: no cover + return self + return instance._compliant._implementation diff --git a/narwhals/dataframe.py b/narwhals/dataframe.py index 015d4b8494..30899a4023 100644 --- a/narwhals/dataframe.py +++ b/narwhals/dataframe.py @@ -26,6 +26,7 @@ from narwhals._utils import ( Implementation, Version, + _ImplDescriptor, can_dataframe_lazy, can_lazyframe_collect, check_columns_exist, @@ -71,37 +72,8 @@ from narwhals._compliant import CompliantDataFrame, CompliantLazyFrame from narwhals._compliant.typing import CompliantExprAny, EagerNamespaceAny - from narwhals._namespace import ( - _CuDFDataFrame, - _ModinDataFrame, - _NativeDask, - _NativeDuckDB, - _NativeIbis, - _NativePandasLikeDataFrame, - _NativePandasLikeSeries, - _NativeSQLFrame, - ) from narwhals._translate import IntoArrowTable - from narwhals._typing import ( - Dask, - DuckDB, - EagerAllowed, - Ibis, - IntoBackend, - Polars, - _ArrowImpl, - _CudfImpl, - _DaskImpl, - _DuckDBImpl, - _EagerAllowedImpl, - _IbisImpl, - _LazyAllowedImpl, - _ModinImpl, - _PandasImpl, - _PandasLikeImpl, - _PolarsImpl, - _SQLFrameImpl, - ) + from narwhals._typing import Dask, DuckDB, EagerAllowed, Ibis, IntoBackend, Polars from narwhals.group_by import GroupBy, LazyGroupBy from narwhals.typing import ( AsofJoinStrategy, @@ -133,70 +105,6 @@ MultiIndexSelector: TypeAlias = "_MultiIndexSelector[Series[Any]]" -class _ImplDescriptor: - def __set_name__(self, owner: type[Any], name: str) -> None: - self.__name__: str = name - - @overload - def __get__( - self, - instance: DataFrame[pl.DataFrame] | LazyFrame[pl.LazyFrame] | Series[pl.Series], - owner: Any, - ) -> _PolarsImpl: ... - @overload - def __get__( - self, instance: BaseFrame[pd.DataFrame] | Series[pd.DataFrame], owner: Any - ) -> _PandasImpl: ... - @overload - def __get__(self, instance: BaseFrame[_ModinDataFrame], owner: Any) -> _ModinImpl: ... - - @overload # oof, looks like these two need their names aligned 😅 - def __get__(self, instance: BaseFrame[_CuDFDataFrame], owner: Any) -> _CudfImpl: ... - @overload - def __get__( - self, - instance: BaseFrame[_NativePandasLikeDataFrame] | Series[_NativePandasLikeSeries], - owner: Any, - ) -> _PandasLikeImpl: ... - @overload - def __get__( - self, instance: BaseFrame[pa.Table] | Series[pa.ChunkedArray[Any]], owner: Any - ) -> _ArrowImpl: ... - @overload - def __get__( - self, - instance: BaseFrame[pl.DataFrame | pd.DataFrame | pa.Table] - | Series[pl.Series | pd.Series[Any] | pa.ChunkedArray[Any]], - owner: Any, - ) -> _PolarsImpl | _PandasImpl | _ArrowImpl: ... - @overload - def __get__(self, instance: LazyFrame[_NativeDuckDB], owner: Any) -> _DuckDBImpl: ... - @overload - def __get__( - self, instance: LazyFrame[_NativeSQLFrame], owner: Any - ) -> _SQLFrameImpl: ... - @overload - def __get__(self, instance: LazyFrame[_NativeDask], owner: Any) -> _DaskImpl: ... - @overload - def __get__(self, instance: LazyFrame[_NativeIbis], owner: Any) -> _IbisImpl: ... - @overload - def __get__(self, instance: None, owner: Any) -> Self: ... - @overload - def __get__( - self, instance: DataFrame[Any] | Series[Any], owner: Any - ) -> _EagerAllowedImpl: ... - @overload - def __get__(self, instance: LazyFrame[Any], owner: Any) -> _LazyAllowedImpl: ... - def __get__( - self, - instance: DataFrame[Any] | LazyFrame[Any] | BaseFrame[Any] | Series[Any] | None, - owner: Any, - ) -> Any: - if instance is None: # pragma: no cover - return self - return instance._compliant._implementation - - class BaseFrame(Generic[_FrameT]): _compliant_frame: Any _level: Literal["full", "lazy", "interchange"] From c573cfd61d3a662ec180f557eea00e210b424851 Mon Sep 17 00:00:00 2001 From: dangotbanned <125183946+dangotbanned@users.noreply.github.com> Date: Fri, 22 Aug 2025 13:06:57 +0000 Subject: [PATCH 18/40] feat(typing): Add (new) `Series.implementation` --- narwhals/series.py | 48 ++++++++++++++++++++-------------------------- 1 file changed, 21 insertions(+), 27 deletions(-) diff --git a/narwhals/series.py b/narwhals/series.py index 1cb34bceec..d34350d1ff 100644 --- a/narwhals/series.py +++ b/narwhals/series.py @@ -7,6 +7,7 @@ from narwhals._utils import ( Implementation, Version, + _ImplDescriptor, _validate_rolling_arguments, ensure_type, generate_repr, @@ -229,33 +230,26 @@ def from_iterable( ) raise ValueError(msg) - @property - def implementation(self) -> Implementation: - """Return implementation of native Series. - - This can be useful when you need to use special-casing for features outside of - Narwhals' scope - for example, when dealing with pandas' Period Dtype. - - Examples: - >>> import narwhals as nw - >>> import pandas as pd - - >>> s_native = pd.Series([1, 2, 3]) - >>> s = nw.from_native(s_native, series_only=True) - - >>> s.implementation - - - >>> s.implementation.is_pandas() - True - - >>> s.implementation.is_pandas_like() - True - - >>> s.implementation.is_polars() - False - """ - return self._compliant_series._implementation + implementation: _ImplDescriptor = _ImplDescriptor() + """Return implementation of native Series. + + This can be useful when you need to use special-casing for features outside of + Narwhals' scope - for example, when dealing with pandas' Period Dtype. + + Examples: + >>> import narwhals as nw + >>> import pandas as pd + >>> s_native = pd.Series([1, 2, 3]) + >>> s = nw.from_native(s_native, series_only=True) + >>> s.implementation + + >>> s.implementation.is_pandas() + True + >>> s.implementation.is_pandas_like() + True + >>> s.implementation.is_polars() + False + """ def __bool__(self) -> NoReturn: msg = ( From 410b5bd6ffbf6c9c359bdaa2addb854bea3371e7 Mon Sep 17 00:00:00 2001 From: dangotbanned <125183946+dangotbanned@users.noreply.github.com> Date: Fri, 22 Aug 2025 13:18:47 +0000 Subject: [PATCH 19/40] oop --- narwhals/_utils.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/narwhals/_utils.py b/narwhals/_utils.py index d476580580..a60be39724 100644 --- a/narwhals/_utils.py +++ b/narwhals/_utils.py @@ -2067,7 +2067,7 @@ def __get__( ) -> _PolarsImpl: ... @overload def __get__( - self, instance: BaseFrame[pd.DataFrame] | Series[pd.DataFrame], owner: Any + self, instance: BaseFrame[pd.DataFrame] | Series[pd.Series[Any]], owner: Any ) -> _PandasImpl: ... @overload def __get__(self, instance: BaseFrame[_ModinDataFrame], owner: Any) -> _ModinImpl: ... From e07cbc5d40727366575ce0beeea4348b1f753fd2 Mon Sep 17 00:00:00 2001 From: dangotbanned <125183946+dangotbanned@users.noreply.github.com> Date: Fri, 22 Aug 2025 13:50:42 +0000 Subject: [PATCH 20/40] test(typing): Add `Series` tests --- tests/implementation_test.py | 60 +++++++++++++++++++++++++++--------- 1 file changed, 46 insertions(+), 14 deletions(-) diff --git a/tests/implementation_test.py b/tests/implementation_test.py index 3ff5f3d085..aefe3deb80 100644 --- a/tests/implementation_test.py +++ b/tests/implementation_test.py @@ -21,7 +21,7 @@ _PolarsImpl, _SQLFrameImpl, ) - from narwhals.typing import IntoDataFrame, IntoLazyFrame + from narwhals.typing import IntoDataFrame, IntoLazyFrame, IntoSeries def test_implementation_pandas() -> None: @@ -92,30 +92,43 @@ def test_implementation_typing() -> None: # noqa: PLR0914, PLR0915 data: dict[str, Any] = {"a": [1, 2, 3]} polars_df = nw.from_native(pl.DataFrame(data)) polars_ldf = nw.from_native(pl.LazyFrame(data)) + polars_ser = nw.from_native(pl.Series(data["a"]), series_only=True) pandas_df = nw.from_native(pd.DataFrame(data)) + pandas_ser = nw.from_native(pd.Series(data["a"]), series_only=True) arrow_df = nw.from_native(pa.table(data)) + # NOTE: The overloads are too complicated, simplifying to `Any` + arrow_ser_native = cast("pa.ChunkedArray[Any]", pa.chunked_array([data["a"]])) # type: ignore[redundant-cast] + arrow_ser = nw.from_native(arrow_ser_native, series_only=True) duckdb_ldf = nw.from_native(duckdb_lazy_constructor(data)) sqlframe_ldf = nw.from_native(sqlframe_pyspark_lazy_constructor(data)) ibis_ldf = nw.from_native(ibis_lazy_constructor(data)) any_df = cast("nw.DataFrame[Any]", "fake df 1") any_ldf = cast("nw.LazyFrame[Any]", "fake ldf 1") + any_ser = cast("nw.Series[Any]", "fake ser 1") bound_df = cast("nw.DataFrame[IntoDataFrame]", "fake df 2") bound_ldf = cast("nw.LazyFrame[IntoLazyFrame]", "fake ldf 2") - - polars_impl = polars_df.implementation - lazy_polars_impl = polars_ldf.implementation - pandas_impl = pandas_df.implementation - arrow_impl = arrow_df.implementation + bound_ser = cast("nw.Series[IntoSeries]", "fake ser 2") + + polars_df_impl = polars_df.implementation + polars_ldf_impl = polars_ldf.implementation + polars_ser_impl = polars_ser.implementation + pandas_df_impl = pandas_df.implementation + pandas_ser_impl = pandas_ser.implementation + arrow_df_impl = arrow_df.implementation + arrow_ser_impl = arrow_ser.implementation duckdb_impl = duckdb_ldf.implementation sqlframe_impl = sqlframe_ldf.implementation ibis_impl = ibis_ldf.implementation - assert_type(polars_impl, _PolarsImpl) - assert_type(lazy_polars_impl, _PolarsImpl) + assert_type(polars_df_impl, _PolarsImpl) + assert_type(polars_ldf_impl, _PolarsImpl) + assert_type(polars_ser_impl, _PolarsImpl) # NOTE: Testing the lazy versions of pandas/pyarrow would require adding overloads to `DataFrame.lazy` # Currently, everything becomes `LazyFrame[Any]` - assert_type(pandas_impl, _PandasImpl) - assert_type(arrow_impl, _ArrowImpl) + assert_type(pandas_df_impl, _PandasImpl) + assert_type(pandas_ser_impl, _PandasImpl) + assert_type(arrow_df_impl, _ArrowImpl) + assert_type(arrow_ser_impl, _ArrowImpl) assert_type(duckdb_impl, _DuckDBImpl) assert_type(sqlframe_impl, _SQLFrameImpl) @@ -144,26 +157,43 @@ def test_implementation_typing() -> None: # noqa: PLR0914, PLR0915 assert_type(ibis_impl, _IbisImpl) # pyright: ignore[reportAssertTypeFailure] assert_type(dask_impl, _LazyAllowedImpl) + # NOTE: Any combination of eager objects that **does not** include `cuDF`, `modin` should + # preserve that detail can_lazyframe_collect_dfs: list[ nw.DataFrame[pl.DataFrame] | nw.DataFrame[pd.DataFrame] | nw.DataFrame[pa.Table] ] = [polars_df, pandas_df, arrow_df] - can_lazyframe_collect_impl = can_lazyframe_collect_dfs[0].implementation - assert_type(can_lazyframe_collect_impl, _PolarsImpl | _PandasImpl | _ArrowImpl) + can_lazyframe_collect_dfs_impl = can_lazyframe_collect_dfs[0].implementation + assert_type( + can_lazyframe_collect_dfs_impl, _PolarsImpl | _PandasImpl | _ArrowImpl + ) + can_lazyframe_collect_sers: list[ + nw.Series[pl.Series] + | nw.Series[pd.Series[Any]] + | nw.Series[pa.ChunkedArray[Any]] + ] = [polars_ser, pandas_ser, arrow_ser] + can_lazyframe_collect_sers_impl = can_lazyframe_collect_sers[0].implementation + assert_type( + can_lazyframe_collect_sers_impl, _PolarsImpl | _PandasImpl | _ArrowImpl + ) any_df_impl = any_df.implementation any_ldf_impl = any_ldf.implementation + any_ser_impl = any_ser.implementation # TODO @dangotbanned: Is this so bad? - # - Currently `DataFrame[Any] | LazyFrame[Any]` matches the first overload (`_PolarsImpl`) + # - Currently `DataFrame[Any] | LazyFrame[Any] | Series[Any]` matches the first overload (`_PolarsImpl`) # - That is accepted **everywhere** that uses `IntoBackend` assert_type(any_df_impl, _EagerAllowedImpl) # pyright: ignore[reportAssertTypeFailure] assert_type(any_ldf_impl, _LazyAllowedImpl) # pyright: ignore[reportAssertTypeFailure] + assert_type(any_ser_impl, _EagerAllowedImpl) # pyright: ignore[reportAssertTypeFailure] bound_df_impl = bound_df.implementation - assert_type(bound_df_impl, _EagerAllowedImpl) bound_ldf_impl = bound_ldf.implementation + bound_ser_impl = bound_ser.implementation + assert_type(bound_df_impl, _EagerAllowedImpl) assert_type(bound_ldf_impl, _LazyAllowedImpl) + assert_type(bound_ser_impl, _EagerAllowedImpl) # NOTE: `DataFrame.lazy` # [True Positive] @@ -178,11 +208,13 @@ def test_implementation_typing() -> None: # noqa: PLR0914, PLR0915 any_df.lazy(sqlframe_ldf.implementation) # type: ignore[arg-type] any_df.lazy(bound_ldf.implementation) # type: ignore[arg-type] any_df.lazy(bound_df.implementation) # type: ignore[arg-type] + any_df.lazy(bound_ser.implementation) # type: ignore[arg-type] any_df.lazy(can_lazyframe_collect_dfs[0].implementation) # type: ignore[arg-type] # [False Positive] any_df.lazy(any_ldf.implementation) any_df.lazy(any_df.implementation) + any_df.lazy(any_ser.implementation) # [False Negative] any_df.lazy(ibis_ldf.implementation) # pyright: ignore[reportArgumentType] From c4bceed4ef7847f6ff291ac00bbeac1dafed706f Mon Sep 17 00:00:00 2001 From: dangotbanned <125183946+dangotbanned@users.noreply.github.com> Date: Fri, 22 Aug 2025 15:14:18 +0000 Subject: [PATCH 21/40] test: Redo everything, check collect as well --- tests/implementation_test.py | 331 +++++++++++++++++++++-------------- 1 file changed, 195 insertions(+), 136 deletions(-) diff --git a/tests/implementation_test.py b/tests/implementation_test.py index aefe3deb80..67536b480a 100644 --- a/tests/implementation_test.py +++ b/tests/implementation_test.py @@ -74,148 +74,207 @@ def test_implementation_new(member: str, value: str) -> None: if TYPE_CHECKING: + import dask.dataframe as dd + import duckdb + import ibis + import modin.pandas as mpd + import pandas as pd + import polars as pl + import pyarrow as pa + from sqlframe.base.dataframe import BaseDataFrame + from typing_extensions import assert_type - def test_implementation_typing() -> None: # noqa: PLR0914, PLR0915 - import dask.dataframe as dd - import modin.pandas as mpd - import pandas as pd - import polars as pl - import pyarrow as pa - from typing_extensions import assert_type - - from tests.conftest import ( - duckdb_lazy_constructor, - ibis_lazy_constructor, - sqlframe_pyspark_lazy_constructor, - ) - - data: dict[str, Any] = {"a": [1, 2, 3]} - polars_df = nw.from_native(pl.DataFrame(data)) - polars_ldf = nw.from_native(pl.LazyFrame(data)) - polars_ser = nw.from_native(pl.Series(data["a"]), series_only=True) - pandas_df = nw.from_native(pd.DataFrame(data)) - pandas_ser = nw.from_native(pd.Series(data["a"]), series_only=True) - arrow_df = nw.from_native(pa.table(data)) - # NOTE: The overloads are too complicated, simplifying to `Any` - arrow_ser_native = cast("pa.ChunkedArray[Any]", pa.chunked_array([data["a"]])) # type: ignore[redundant-cast] - arrow_ser = nw.from_native(arrow_ser_native, series_only=True) - duckdb_ldf = nw.from_native(duckdb_lazy_constructor(data)) - sqlframe_ldf = nw.from_native(sqlframe_pyspark_lazy_constructor(data)) - ibis_ldf = nw.from_native(ibis_lazy_constructor(data)) - any_df = cast("nw.DataFrame[Any]", "fake df 1") - any_ldf = cast("nw.LazyFrame[Any]", "fake ldf 1") - any_ser = cast("nw.Series[Any]", "fake ser 1") - bound_df = cast("nw.DataFrame[IntoDataFrame]", "fake df 2") - bound_ldf = cast("nw.LazyFrame[IntoLazyFrame]", "fake ldf 2") - bound_ser = cast("nw.Series[IntoSeries]", "fake ser 2") - - polars_df_impl = polars_df.implementation - polars_ldf_impl = polars_ldf.implementation - polars_ser_impl = polars_ser.implementation - pandas_df_impl = pandas_df.implementation - pandas_ser_impl = pandas_ser.implementation - arrow_df_impl = arrow_df.implementation - arrow_ser_impl = arrow_ser.implementation - duckdb_impl = duckdb_ldf.implementation - sqlframe_impl = sqlframe_ldf.implementation - ibis_impl = ibis_ldf.implementation - - assert_type(polars_df_impl, _PolarsImpl) - assert_type(polars_ldf_impl, _PolarsImpl) - assert_type(polars_ser_impl, _PolarsImpl) - # NOTE: Testing the lazy versions of pandas/pyarrow would require adding overloads to `DataFrame.lazy` - # Currently, everything becomes `LazyFrame[Any]` - assert_type(pandas_df_impl, _PandasImpl) - assert_type(pandas_ser_impl, _PandasImpl) - assert_type(arrow_df_impl, _ArrowImpl) - assert_type(arrow_ser_impl, _ArrowImpl) - assert_type(duckdb_impl, _DuckDBImpl) - assert_type(sqlframe_impl, _SQLFrameImpl) - - modin_native = mpd.DataFrame.from_dict(data) - modin_df = nw.from_native(modin_native) - modin_impl = modin_df.implementation - # TODO @dangotbanned: Is this even possible? - # - `mypy` won't ever work, treats as `Any` - # - `pyright` can resolve `modin_df: narwhals.dataframe.DataFrame[modin.pandas.dataframe.DataFrame]` - # - But we run into variance issues if trying to widen the concrete type again - assert_type(modin_impl, _ModinImpl) # pyright: ignore[reportAssertTypeFailure] - # If ^^^ can be fixed, the next one should be removed - assert_type(modin_impl, _EagerAllowedImpl) - - # NOTE: Constructor returns `Unknown` - dask_native = cast("dd.DataFrame", dd.DataFrame.from_dict(data)) - dask_ldf = nw.from_native(dask_native) - dask_impl = dask_ldf.implementation - # NOTE: Same issue as modin - assert_type(dask_impl, _DaskImpl) # pyright: ignore[reportAssertTypeFailure] - # If ^^^ can be fixed, the next one should be removed - assert_type(dask_impl, _LazyAllowedImpl) - - # NOTE: Also same issue 🤔 - # TODO @dangotbanned: try something else instead - assert_type(ibis_impl, _IbisImpl) # pyright: ignore[reportAssertTypeFailure] - assert_type(dask_impl, _LazyAllowedImpl) + any_df: nw.DataFrame[Any] = cast("nw.DataFrame[Any]", "") + any_ldf: nw.LazyFrame[Any] = cast("nw.LazyFrame[Any]", "") + any_ser: nw.Series[Any] = cast("nw.Series[Any]", "") + bound_df: nw.DataFrame[IntoDataFrame] = cast("nw.DataFrame[IntoDataFrame]", "") + bound_ldf: nw.LazyFrame[IntoLazyFrame] = cast("nw.LazyFrame[IntoLazyFrame]", "") + bound_ser: nw.Series[IntoSeries] = cast("nw.Series[IntoSeries]", "") + + def test_polars_typing(native: pl.DataFrame) -> None: + df = nw.from_native(native) + ldf = nw.from_native(native.lazy()) + ser = nw.from_native(native.to_series(), series_only=True) + + df_impl = df.implementation + ldf_impl = ldf.implementation + ser_impl = ser.implementation - # NOTE: Any combination of eager objects that **does not** include `cuDF`, `modin` should - # preserve that detail - can_lazyframe_collect_dfs: list[ - nw.DataFrame[pl.DataFrame] - | nw.DataFrame[pd.DataFrame] - | nw.DataFrame[pa.Table] - ] = [polars_df, pandas_df, arrow_df] - can_lazyframe_collect_dfs_impl = can_lazyframe_collect_dfs[0].implementation - assert_type( - can_lazyframe_collect_dfs_impl, _PolarsImpl | _PandasImpl | _ArrowImpl - ) - can_lazyframe_collect_sers: list[ - nw.Series[pl.Series] - | nw.Series[pd.Series[Any]] - | nw.Series[pa.ChunkedArray[Any]] - ] = [polars_ser, pandas_ser, arrow_ser] - can_lazyframe_collect_sers_impl = can_lazyframe_collect_sers[0].implementation - assert_type( - can_lazyframe_collect_sers_impl, _PolarsImpl | _PandasImpl | _ArrowImpl - ) - - any_df_impl = any_df.implementation - any_ldf_impl = any_ldf.implementation - any_ser_impl = any_ser.implementation - # TODO @dangotbanned: Is this so bad? - # - Currently `DataFrame[Any] | LazyFrame[Any] | Series[Any]` matches the first overload (`_PolarsImpl`) - # - That is accepted **everywhere** that uses `IntoBackend` - assert_type(any_df_impl, _EagerAllowedImpl) # pyright: ignore[reportAssertTypeFailure] - assert_type(any_ldf_impl, _LazyAllowedImpl) # pyright: ignore[reportAssertTypeFailure] - assert_type(any_ser_impl, _EagerAllowedImpl) # pyright: ignore[reportAssertTypeFailure] - - bound_df_impl = bound_df.implementation - bound_ldf_impl = bound_ldf.implementation - bound_ser_impl = bound_ser.implementation - assert_type(bound_df_impl, _EagerAllowedImpl) - assert_type(bound_ldf_impl, _LazyAllowedImpl) - assert_type(bound_ser_impl, _EagerAllowedImpl) - - # NOTE: `DataFrame.lazy` # [True Positive] - any_df.lazy(polars_ldf.implementation) - any_df.lazy(polars_df.implementation) - any_df.lazy(duckdb_ldf.implementation) + any_df.lazy(df_impl) + any_df.lazy(ldf_impl) + any_df.lazy(ser_impl) + any_ldf.collect(df_impl) + any_ldf.collect(ldf_impl) + any_ldf.collect(ser_impl) + + assert_type(df_impl, _PolarsImpl) + assert_type(ldf_impl, _PolarsImpl) + assert_type(ser_impl, _PolarsImpl) + + def test_pandas_typing(native: pd.DataFrame) -> None: + df = nw.from_native(native) + ldf = nw.from_native(native).lazy() + ser = nw.from_native(native.iloc[0], series_only=True) + df_impl = df.implementation + ldf_impl = ldf.implementation + ser_impl = ser.implementation + + # [True Negative] + any_df.lazy(df_impl) # type: ignore[arg-type] + # [False Positive] + any_df.lazy(ldf_impl) # [True Negative] - any_df.lazy(pandas_df.implementation) # type: ignore[arg-type] - any_df.lazy(arrow_df.implementation) # type: ignore[arg-type] - any_df.lazy(modin_df.implementation) # pyright: ignore[reportArgumentType] - any_df.lazy(sqlframe_ldf.implementation) # type: ignore[arg-type] - any_df.lazy(bound_ldf.implementation) # type: ignore[arg-type] - any_df.lazy(bound_df.implementation) # type: ignore[arg-type] - any_df.lazy(bound_ser.implementation) # type: ignore[arg-type] - any_df.lazy(can_lazyframe_collect_dfs[0].implementation) # type: ignore[arg-type] + any_df.lazy(ser_impl) # pyright: ignore[reportArgumentType] + # [True Positive] + any_ldf.collect(df_impl) + any_ldf.collect(ldf_impl) + any_ldf.collect(ser_impl) + + assert_type(df_impl, _PandasImpl) + # NOTE: Would require adding overloads to `DataFrame.lazy` + assert_type(ldf_impl, _PandasImpl) # pyright: ignore[reportAssertTypeFailure] + assert_type(ser_impl, _PandasImpl) + + def test_arrow_typing(native: pa.Table) -> None: + df = nw.from_native(native) + ldf = nw.from_native(native).lazy() + ser = nw.from_native(native.column(0), series_only=True) + + df_impl = df.implementation + ldf_impl = ldf.implementation + ser_impl = ser.implementation + # [True Negative] + any_df.lazy(df_impl) # type: ignore[arg-type] # [False Positive] - any_df.lazy(any_ldf.implementation) - any_df.lazy(any_df.implementation) - any_df.lazy(any_ser.implementation) + any_df.lazy(ldf_impl) + # [True Negative] + any_df.lazy(ser_impl) # pyright: ignore[reportArgumentType] + # [True Positive] + any_ldf.collect(df_impl) + any_ldf.collect(ldf_impl) + any_ldf.collect(ser_impl) + + assert_type(df_impl, _ArrowImpl) + # NOTE: Would require adding overloads to `DataFrame.lazy` + assert_type(ldf_impl, _ArrowImpl) # pyright: ignore[reportAssertTypeFailure] + assert_type(ser_impl, _ArrowImpl) + + def test_duckdb_typing(native: duckdb.DuckDBPyRelation) -> None: + ldf = nw.from_native(native) + + ldf_impl = ldf.implementation + + # [True Positive] + any_df.lazy(ldf_impl) + # [True Negative] + any_ldf.collect(ldf_impl) # type: ignore[arg-type] + + assert_type(ldf.implementation, _DuckDBImpl) + + def test_sqlframe_typing(native: BaseDataFrame[Any, Any, Any, Any, Any]) -> None: + ldf = nw.from_native(native) + + ldf_impl = ldf.implementation + + # [True Negative] + any_df.lazy(ldf_impl) # pyright: ignore[reportArgumentType] + any_ldf.collect(ldf_impl) # pyright: ignore[reportArgumentType] + + assert_type(ldf.implementation, _SQLFrameImpl) + + def test_ibis_typing(native: ibis.Table) -> None: + ldf = nw.from_native(native) + + ldf_impl = ldf.implementation + + # [False Negative] + any_df.lazy(ldf_impl) # pyright: ignore[reportArgumentType] + # [True Negative] + any_ldf.collect(ldf_impl) # pyright: ignore[reportArgumentType] + + assert_type(ldf.implementation, _IbisImpl) # pyright: ignore[reportAssertTypeFailure] + # Fallback, remove if the above starts passing + assert_type(ldf.implementation, _LazyAllowedImpl) + + def test_dask_typing(native: dd.DataFrame) -> None: + ldf = nw.from_native(native) + + ldf_impl = ldf.implementation # [False Negative] - any_df.lazy(ibis_ldf.implementation) # pyright: ignore[reportArgumentType] - any_df.lazy(dask_ldf.implementation) # pyright: ignore[reportArgumentType] + any_df.lazy(ldf_impl) # pyright: ignore[reportArgumentType] + # [True Negative] + any_ldf.collect(ldf_impl) # pyright: ignore[reportArgumentType] + + assert_type(ldf.implementation, _DaskImpl) # pyright: ignore[reportAssertTypeFailure] + # Fallback, remove if the above starts passing + assert_type(ldf.implementation, _LazyAllowedImpl) + + def test_modin_typing(native: mpd.DataFrame) -> None: + ldf = nw.from_native(native) + + ldf_impl = ldf.implementation + + # [True Negative] + any_df.lazy(ldf_impl) # pyright: ignore[reportArgumentType] + any_ldf.collect(ldf_impl) # pyright: ignore[reportArgumentType] + + assert_type(ldf.implementation, _ModinImpl) # pyright: ignore[reportAssertTypeFailure] + # Fallback, remove if the above starts passing + assert_type(ldf.implementation, _EagerAllowedImpl) + + def test_any_typing() -> None: + df_impl = any_df.implementation + ldf_impl = any_ldf.implementation + ser_impl = any_ser.implementation + + # [False Positive] + any_df.lazy(df_impl) + any_df.lazy(ldf_impl) + any_df.lazy(ser_impl) + any_ldf.collect(df_impl) + any_ldf.collect(ldf_impl) + any_ldf.collect(ser_impl) + + assert_type(df_impl, _EagerAllowedImpl) # pyright: ignore[reportAssertTypeFailure] + assert_type(ldf_impl, _LazyAllowedImpl) # pyright: ignore[reportAssertTypeFailure] + assert_type(ser_impl, _EagerAllowedImpl) # pyright: ignore[reportAssertTypeFailure] + # Fallback, matches the first overload `_PolarsImpl` + assert_type(df_impl, _PolarsImpl) + assert_type(ldf_impl, _PolarsImpl) + assert_type(ser_impl, _PolarsImpl) + + def test_bound_typing() -> None: + df_impl = bound_df.implementation + ldf_impl = bound_ldf.implementation + ser_impl = bound_ser.implementation + + # [True Negative] + any_df.lazy(df_impl) # type: ignore[arg-type] + any_df.lazy(ldf_impl) # type: ignore[arg-type] + any_df.lazy(ser_impl) # type: ignore[arg-type] + any_ldf.collect(df_impl) # type: ignore[arg-type] + any_ldf.collect(ldf_impl) # type: ignore[arg-type] + any_ldf.collect(ser_impl) # type: ignore[arg-type] + + assert_type(df_impl, _EagerAllowedImpl) + assert_type(ldf_impl, _LazyAllowedImpl) + assert_type(ser_impl, _EagerAllowedImpl) + + def test_mixed_eager_typing( + *args: nw.DataFrame[pl.DataFrame | pd.DataFrame | pa.Table] + | nw.Series[pl.Series | pd.Series[Any] | pa.ChunkedArray[Any]], + ) -> None: + # NOTE: Any combination of eager objects that **does not** include `cuDF`, `modin` should + # preserve that detail + mix_impl = args[0].implementation + + # [True Negative] + any_df.lazy(mix_impl) # type: ignore[arg-type] + # [True Positive] + any_ldf.collect(mix_impl) + + assert_type(mix_impl, _PolarsImpl | _PandasImpl | _ArrowImpl) From eaa43c108f53fe505521d926e5c7a338b36e0979 Mon Sep 17 00:00:00 2001 From: dangotbanned <125183946+dangotbanned@users.noreply.github.com> Date: Sat, 23 Aug 2025 13:34:40 +0000 Subject: [PATCH 22/40] docs: Ensure `BaseFrame.implementation` shows in api ref --- docs/api-reference/dataframe.md | 1 + docs/api-reference/lazyframe.md | 1 + 2 files changed, 2 insertions(+) diff --git a/docs/api-reference/dataframe.md b/docs/api-reference/dataframe.md index 1524464c92..34762bdddb 100644 --- a/docs/api-reference/dataframe.md +++ b/docs/api-reference/dataframe.md @@ -58,3 +58,4 @@ - write_parquet show_source: false show_bases: false + inherited_members: true diff --git a/docs/api-reference/lazyframe.md b/docs/api-reference/lazyframe.md index b27800d8d9..970a87639c 100644 --- a/docs/api-reference/lazyframe.md +++ b/docs/api-reference/lazyframe.md @@ -34,3 +34,4 @@ show_root_heading: false show_source: false show_bases: false + inherited_members: true From 5ef8103877d8cd9a51f4745dd7753cfd757a7bea Mon Sep 17 00:00:00 2001 From: dangotbanned <125183946+dangotbanned@users.noreply.github.com> Date: Sat, 23 Aug 2025 15:14:22 +0000 Subject: [PATCH 23/40] fix(typing): `ibis`, `dask` work!!! --- narwhals/_utils.py | 51 ++++++++++++++++++++++-------------- tests/implementation_test.py | 33 ++++++++++++----------- 2 files changed, 49 insertions(+), 35 deletions(-) diff --git a/narwhals/_utils.py b/narwhals/_utils.py index a60be39724..f630ec1b00 100644 --- a/narwhals/_utils.py +++ b/narwhals/_utils.py @@ -78,11 +78,13 @@ Namespace, _CuDFDataFrame, _ModinDataFrame, + _NativeArrow, _NativeDask, _NativeDuckDB, _NativeIbis, - _NativePandasLikeDataFrame, - _NativePandasLikeSeries, + _NativePandas, + _NativePandasLike, + _NativePolars, _NativeSQLFrame, ) from narwhals._translate import ArrowStreamExportable, IntoArrowTable, ToNarwhalsT_co @@ -104,7 +106,7 @@ _PolarsImpl, _SQLFrameImpl, ) - from narwhals.dataframe import BaseFrame, DataFrame, LazyFrame + from narwhals.dataframe import DataFrame, LazyFrame from narwhals.dtypes import DType from narwhals.series import Series from narwhals.typing import ( @@ -2055,6 +2057,18 @@ def deep_getattr(obj: Any, name_1: str, *nested: str) -> Any: return deep_attrgetter(name_1, *nested)(obj) +class NarwhalsObj(Protocol[NativeT_co]): + """Minimal `BaseFrame`, `Series` protocol. + + - `to_native` scopes a covariant type parameter + - `_compliant` describes the route to `Implementation` + """ + + def to_native(self) -> NativeT_co: ... + @property + def _compliant(self) -> _StoresImplementation: ... + + class _ImplDescriptor: def __set_name__(self, owner: type[Any], name: str) -> None: self.__name__: str = name @@ -2067,40 +2081,39 @@ def __get__( ) -> _PolarsImpl: ... @overload def __get__( - self, instance: BaseFrame[pd.DataFrame] | Series[pd.Series[Any]], owner: Any + self, instance: NarwhalsObj[_NativePandas], owner: Any ) -> _PandasImpl: ... @overload - def __get__(self, instance: BaseFrame[_ModinDataFrame], owner: Any) -> _ModinImpl: ... + def __get__( + self, instance: NarwhalsObj[_ModinDataFrame], owner: Any + ) -> _ModinImpl: ... @overload # oof, looks like these two need their names aligned 😅 - def __get__(self, instance: BaseFrame[_CuDFDataFrame], owner: Any) -> _CudfImpl: ... + def __get__(self, instance: NarwhalsObj[_CuDFDataFrame], owner: Any) -> _CudfImpl: ... @overload def __get__( - self, - instance: BaseFrame[_NativePandasLikeDataFrame] | Series[_NativePandasLikeSeries], - owner: Any, + self, instance: NarwhalsObj[_NativePandasLike], owner: Any ) -> _PandasLikeImpl: ... @overload - def __get__( - self, instance: BaseFrame[pa.Table] | Series[pa.ChunkedArray[Any]], owner: Any - ) -> _ArrowImpl: ... + def __get__(self, instance: NarwhalsObj[_NativeArrow], owner: Any) -> _ArrowImpl: ... @overload def __get__( self, - instance: BaseFrame[pl.DataFrame | pd.DataFrame | pa.Table] - | Series[pl.Series | pd.Series[Any] | pa.ChunkedArray[Any]], + instance: NarwhalsObj[_NativePolars | _NativeArrow | _NativePandas], owner: Any, ) -> _PolarsImpl | _PandasImpl | _ArrowImpl: ... @overload - def __get__(self, instance: LazyFrame[_NativeDuckDB], owner: Any) -> _DuckDBImpl: ... + def __get__( + self, instance: NarwhalsObj[_NativeDuckDB], owner: Any + ) -> _DuckDBImpl: ... @overload def __get__( - self, instance: LazyFrame[_NativeSQLFrame], owner: Any + self, instance: NarwhalsObj[_NativeSQLFrame], owner: Any ) -> _SQLFrameImpl: ... @overload - def __get__(self, instance: LazyFrame[_NativeDask], owner: Any) -> _DaskImpl: ... + def __get__(self, instance: NarwhalsObj[_NativeDask], owner: Any) -> _DaskImpl: ... @overload - def __get__(self, instance: LazyFrame[_NativeIbis], owner: Any) -> _IbisImpl: ... + def __get__(self, instance: NarwhalsObj[_NativeIbis], owner: Any) -> _IbisImpl: ... @overload def __get__(self, instance: None, owner: Any) -> Self: ... @overload @@ -2111,7 +2124,7 @@ def __get__( def __get__(self, instance: LazyFrame[Any], owner: Any) -> _LazyAllowedImpl: ... def __get__( self, - instance: DataFrame[Any] | LazyFrame[Any] | BaseFrame[Any] | Series[Any] | None, + instance: DataFrame[Any] | LazyFrame[Any] | Series[Any] | NarwhalsObj[Any] | None, owner: Any, ) -> Any: if instance is None: # pragma: no cover diff --git a/tests/implementation_test.py b/tests/implementation_test.py index 67536b480a..2fab171e59 100644 --- a/tests/implementation_test.py +++ b/tests/implementation_test.py @@ -9,6 +9,7 @@ import narwhals as nw if TYPE_CHECKING: + from narwhals._namespace import _ModinDataFrame from narwhals._typing import ( _ArrowImpl, _DaskImpl, @@ -190,41 +191,41 @@ def test_ibis_typing(native: ibis.Table) -> None: ldf_impl = ldf.implementation - # [False Negative] - any_df.lazy(ldf_impl) # pyright: ignore[reportArgumentType] + # [True Positive] + any_df.lazy(ldf_impl) # [True Negative] any_ldf.collect(ldf_impl) # pyright: ignore[reportArgumentType] - assert_type(ldf.implementation, _IbisImpl) # pyright: ignore[reportAssertTypeFailure] - # Fallback, remove if the above starts passing - assert_type(ldf.implementation, _LazyAllowedImpl) + assert_type(ldf.implementation, _IbisImpl) def test_dask_typing(native: dd.DataFrame) -> None: ldf = nw.from_native(native) ldf_impl = ldf.implementation - # [False Negative] - any_df.lazy(ldf_impl) # pyright: ignore[reportArgumentType] + # [True Positive] + any_df.lazy(ldf_impl) # [True Negative] any_ldf.collect(ldf_impl) # pyright: ignore[reportArgumentType] - assert_type(ldf.implementation, _DaskImpl) # pyright: ignore[reportAssertTypeFailure] - # Fallback, remove if the above starts passing - assert_type(ldf.implementation, _LazyAllowedImpl) + assert_type(ldf.implementation, _DaskImpl) def test_modin_typing(native: mpd.DataFrame) -> None: - ldf = nw.from_native(native) + df = nw.from_native(native) - ldf_impl = ldf.implementation + df_impl = df.implementation # [True Negative] - any_df.lazy(ldf_impl) # pyright: ignore[reportArgumentType] - any_ldf.collect(ldf_impl) # pyright: ignore[reportArgumentType] + any_df.lazy(df_impl) # pyright: ignore[reportArgumentType] + any_ldf.collect(df_impl) # pyright: ignore[reportArgumentType] - assert_type(ldf.implementation, _ModinImpl) # pyright: ignore[reportAssertTypeFailure] + assert_type(df.implementation, _ModinImpl) # pyright: ignore[reportAssertTypeFailure] # Fallback, remove if the above starts passing - assert_type(ldf.implementation, _EagerAllowedImpl) + assert_type(df.implementation, _EagerAllowedImpl) + + # TODO @dangotbanned: Fix incompatible `_BasePandasLike.rename` signature + # When this ignore isn't needed - the overload to `_ModinImpl` will work + oops: _ModinDataFrame = native # pyright: ignore[reportAssignmentType] # noqa: F841 def test_any_typing() -> None: df_impl = any_df.implementation From f55cb3a80829ebf2b718a7fef14767ee10b0b47c Mon Sep 17 00:00:00 2001 From: dangotbanned <125183946+dangotbanned@users.noreply.github.com> Date: Sat, 23 Aug 2025 16:02:33 +0000 Subject: [PATCH 24/40] fix(typing): Unbreak `modin` Resolves (https://github.com/narwhals-dev/narwhals/pull/3016#discussion_r2296139744) --- narwhals/_namespace.py | 13 +++++++++---- narwhals/_pandas_like/utils.py | 8 +++++--- narwhals/_utils.py | 10 ++++------ tests/implementation_test.py | 9 +-------- 4 files changed, 19 insertions(+), 21 deletions(-) diff --git a/narwhals/_namespace.py b/narwhals/_namespace.py index 14a52511e6..5b457f5aae 100644 --- a/narwhals/_namespace.py +++ b/narwhals/_namespace.py @@ -7,7 +7,6 @@ Any, Callable, Generic, - Literal, Protocol, TypeVar, cast, @@ -68,6 +67,8 @@ EagerAllowedNamespace: TypeAlias = "Namespace[PandasLikeNamespace] | Namespace[ArrowNamespace] | Namespace[PolarsNamespace]" + Incomplete: TypeAlias = Any + class _BasePandasLike(Sized, Protocol): index: Any """`mypy` doesn't like the asymmetric `property` setter in `pandas`.""" @@ -81,9 +82,13 @@ def loc(self) -> Any: ... def shape(self) -> tuple[int, ...]: ... def set_axis(self, labels: Any, *, axis: Any = ..., copy: bool = ...) -> Self: ... def copy(self, deep: bool = ...) -> Self: ... # noqa: FBT001 - def rename(self, *args: Any, inplace: Literal[False], **kwds: Any) -> Self: - """`inplace=False` is required to avoid (incorrect?) default overloads.""" - ... + def rename(self, *args: Any, **kwds: Any) -> Self | Incomplete: + """`mypy` & `pyright` disagree on overloads. + + `Incomplete` used to fix [more important issue]. + + [issue]: https://github.com/narwhals-dev/narwhals/pull/3016#discussion_r2296139744 + """ class _BasePandasLikeFrame(NativeDataFrame, _BasePandasLike, Protocol): ... diff --git a/narwhals/_pandas_like/utils.py b/narwhals/_pandas_like/utils.py index 06ba6cdf39..9f108397f9 100644 --- a/narwhals/_pandas_like/utils.py +++ b/narwhals/_pandas_like/utils.py @@ -3,7 +3,7 @@ import functools import operator import re -from typing import TYPE_CHECKING, Any, Callable, Literal, TypeVar +from typing import TYPE_CHECKING, Any, Callable, Literal, TypeVar, cast import pandas as pd @@ -202,8 +202,10 @@ def rename( if implementation is Implementation.PANDAS and ( implementation._backend_version() >= (3,) ): # pragma: no cover - return obj.rename(*args, **kwargs, inplace=False) - return obj.rename(*args, **kwargs, copy=False, inplace=False) + result = obj.rename(*args, **kwargs, inplace=False) + else: + result = obj.rename(*args, **kwargs, copy=False, inplace=False) + return cast("NativeNDFrameT", result) # type: ignore[redundant-cast] @functools.lru_cache(maxsize=16) diff --git a/narwhals/_utils.py b/narwhals/_utils.py index f630ec1b00..24480f408c 100644 --- a/narwhals/_utils.py +++ b/narwhals/_utils.py @@ -76,12 +76,12 @@ from narwhals._compliant.typing import EvalNames, NativeLazyFrameT from narwhals._namespace import ( Namespace, - _CuDFDataFrame, - _ModinDataFrame, _NativeArrow, + _NativeCuDF, _NativeDask, _NativeDuckDB, _NativeIbis, + _NativeModin, _NativePandas, _NativePandasLike, _NativePolars, @@ -2084,12 +2084,10 @@ def __get__( self, instance: NarwhalsObj[_NativePandas], owner: Any ) -> _PandasImpl: ... @overload - def __get__( - self, instance: NarwhalsObj[_ModinDataFrame], owner: Any - ) -> _ModinImpl: ... + def __get__(self, instance: NarwhalsObj[_NativeModin], owner: Any) -> _ModinImpl: ... @overload # oof, looks like these two need their names aligned 😅 - def __get__(self, instance: NarwhalsObj[_CuDFDataFrame], owner: Any) -> _CudfImpl: ... + def __get__(self, instance: NarwhalsObj[_NativeCuDF], owner: Any) -> _CudfImpl: ... @overload def __get__( self, instance: NarwhalsObj[_NativePandasLike], owner: Any diff --git a/tests/implementation_test.py b/tests/implementation_test.py index 2fab171e59..de962758f6 100644 --- a/tests/implementation_test.py +++ b/tests/implementation_test.py @@ -9,7 +9,6 @@ import narwhals as nw if TYPE_CHECKING: - from narwhals._namespace import _ModinDataFrame from narwhals._typing import ( _ArrowImpl, _DaskImpl, @@ -219,13 +218,7 @@ def test_modin_typing(native: mpd.DataFrame) -> None: any_df.lazy(df_impl) # pyright: ignore[reportArgumentType] any_ldf.collect(df_impl) # pyright: ignore[reportArgumentType] - assert_type(df.implementation, _ModinImpl) # pyright: ignore[reportAssertTypeFailure] - # Fallback, remove if the above starts passing - assert_type(df.implementation, _EagerAllowedImpl) - - # TODO @dangotbanned: Fix incompatible `_BasePandasLike.rename` signature - # When this ignore isn't needed - the overload to `_ModinImpl` will work - oops: _ModinDataFrame = native # pyright: ignore[reportAssignmentType] # noqa: F841 + assert_type(df.implementation, _ModinImpl) def test_any_typing() -> None: df_impl = any_df.implementation From 811290c54684ef3f699c99aad24f315be9b9980f Mon Sep 17 00:00:00 2001 From: dangotbanned <125183946+dangotbanned@users.noreply.github.com> Date: Sat, 23 Aug 2025 18:57:50 +0000 Subject: [PATCH 25/40] test(typing): Check `mpd.Series` too MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit ofc this wouldn't *just work* 🤦‍♂️ --- tests/implementation_test.py | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/tests/implementation_test.py b/tests/implementation_test.py index de962758f6..4898d479a5 100644 --- a/tests/implementation_test.py +++ b/tests/implementation_test.py @@ -211,14 +211,23 @@ def test_dask_typing(native: dd.DataFrame) -> None: def test_modin_typing(native: mpd.DataFrame) -> None: df = nw.from_native(native) + # NOTE: Aribitrary method that returns a `Series` + ser = nw.from_native(native.duplicated(), series_only=True) df_impl = df.implementation + ser_impl = ser.implementation # [True Negative] any_df.lazy(df_impl) # pyright: ignore[reportArgumentType] + any_df.lazy(ser_impl) # pyright: ignore[reportArgumentType] any_ldf.collect(df_impl) # pyright: ignore[reportArgumentType] + any_ldf.collect(ser_impl) # pyright: ignore[reportArgumentType] - assert_type(df.implementation, _ModinImpl) + assert_type(df_impl, _ModinImpl) + # TODO @dangotbanned: Investigate where the match fails + assert_type(ser_impl, _ModinImpl) # pyright: ignore[reportAssertTypeFailure] + # Fallback, matches eager allowed + assert_type(ser_impl, _EagerAllowedImpl) def test_any_typing() -> None: df_impl = any_df.implementation From 012c2bf3efc6f6ffe75e3e9d72110e6433a80bd0 Mon Sep 17 00:00:00 2001 From: dangotbanned <125183946+dangotbanned@users.noreply.github.com> Date: Sat, 23 Aug 2025 18:58:53 +0000 Subject: [PATCH 26/40] typo --- tests/implementation_test.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/implementation_test.py b/tests/implementation_test.py index 4898d479a5..5cc125ad82 100644 --- a/tests/implementation_test.py +++ b/tests/implementation_test.py @@ -211,7 +211,7 @@ def test_dask_typing(native: dd.DataFrame) -> None: def test_modin_typing(native: mpd.DataFrame) -> None: df = nw.from_native(native) - # NOTE: Aribitrary method that returns a `Series` + # NOTE: Arbitrary method that returns a `Series` ser = nw.from_native(native.duplicated(), series_only=True) df_impl = df.implementation From b0694d0ac7ea2f6cf74b995079a3a88e9a8a5883 Mon Sep 17 00:00:00 2001 From: dangotbanned <125183946+dangotbanned@users.noreply.github.com> Date: Sat, 23 Aug 2025 19:30:24 +0000 Subject: [PATCH 27/40] fix `mpd.Series` --- narwhals/_namespace.py | 6 +++--- tests/implementation_test.py | 5 +---- 2 files changed, 4 insertions(+), 7 deletions(-) diff --git a/narwhals/_namespace.py b/narwhals/_namespace.py index 5b457f5aae..3a095be398 100644 --- a/narwhals/_namespace.py +++ b/narwhals/_namespace.py @@ -74,8 +74,8 @@ class _BasePandasLike(Sized, Protocol): """`mypy` doesn't like the asymmetric `property` setter in `pandas`.""" def __getitem__(self, key: Any, /) -> Any: ... - def __mul__(self, other: float | Collection[float] | Self) -> Self: ... - def __floordiv__(self, other: float | Collection[float] | Self) -> Self: ... + def __mul__(self, other: float | Collection[float] | Self, /) -> Self: ... + def __floordiv__(self, other: float | Collection[float] | Self, /) -> Self: ... @property def loc(self) -> Any: ... @property @@ -93,7 +93,7 @@ def rename(self, *args: Any, **kwds: Any) -> Self | Incomplete: class _BasePandasLikeFrame(NativeDataFrame, _BasePandasLike, Protocol): ... class _BasePandasLikeSeries(NativeSeries, _BasePandasLike, Protocol): - def where(self, cond: Any, other: Any = ..., **kwds: Any) -> Any: ... + def where(self, cond: Any, other: Any = ..., /) -> Self | Incomplete: ... class _NativeDask(NativeLazyFrame, Protocol): _partition_type: type[pd.DataFrame] diff --git a/tests/implementation_test.py b/tests/implementation_test.py index 5cc125ad82..fdfe538abf 100644 --- a/tests/implementation_test.py +++ b/tests/implementation_test.py @@ -224,10 +224,7 @@ def test_modin_typing(native: mpd.DataFrame) -> None: any_ldf.collect(ser_impl) # pyright: ignore[reportArgumentType] assert_type(df_impl, _ModinImpl) - # TODO @dangotbanned: Investigate where the match fails - assert_type(ser_impl, _ModinImpl) # pyright: ignore[reportAssertTypeFailure] - # Fallback, matches eager allowed - assert_type(ser_impl, _EagerAllowedImpl) + assert_type(ser_impl, _ModinImpl) def test_any_typing() -> None: df_impl = any_df.implementation From 2a755291d56b885bb7041355750088acc598787c Mon Sep 17 00:00:00 2001 From: dangotbanned <125183946+dangotbanned@users.noreply.github.com> Date: Sat, 23 Aug 2025 19:39:31 +0000 Subject: [PATCH 28/40] chore: Add overload for pyspark --- narwhals/_utils.py | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/narwhals/_utils.py b/narwhals/_utils.py index 24480f408c..6aee57a9cd 100644 --- a/narwhals/_utils.py +++ b/narwhals/_utils.py @@ -85,6 +85,8 @@ _NativePandas, _NativePandasLike, _NativePolars, + _NativePySpark, + _NativePySparkConnect, _NativeSQLFrame, ) from narwhals._translate import ArrowStreamExportable, IntoArrowTable, ToNarwhalsT_co @@ -104,6 +106,8 @@ _PandasImpl, _PandasLikeImpl, _PolarsImpl, + _PySparkConnectImpl, + _PySparkImpl, _SQLFrameImpl, ) from narwhals.dataframe import DataFrame, LazyFrame @@ -2112,6 +2116,11 @@ def __get__( def __get__(self, instance: NarwhalsObj[_NativeDask], owner: Any) -> _DaskImpl: ... @overload def __get__(self, instance: NarwhalsObj[_NativeIbis], owner: Any) -> _IbisImpl: ... + # NOTE: pyspark isn't installed for typing ci + @overload + def __get__( + self, instance: NarwhalsObj[_NativePySpark | _NativePySparkConnect], owner: Any + ) -> _PySparkImpl | _PySparkConnectImpl: ... @overload def __get__(self, instance: None, owner: Any) -> Self: ... @overload From 7d42972d6969bf9794bd8974804dbd78d3f0caf2 Mon Sep 17 00:00:00 2001 From: dangotbanned <125183946+dangotbanned@users.noreply.github.com> Date: Sat, 23 Aug 2025 19:46:24 +0000 Subject: [PATCH 29/40] simplify, add notes --- narwhals/_utils.py | 12 +++++------- 1 file changed, 5 insertions(+), 7 deletions(-) diff --git a/narwhals/_utils.py b/narwhals/_utils.py index 6aee57a9cd..2ff315dffb 100644 --- a/narwhals/_utils.py +++ b/narwhals/_utils.py @@ -2061,6 +2061,7 @@ def deep_getattr(obj: Any, name_1: str, *nested: str) -> Any: return deep_attrgetter(name_1, *nested)(obj) +# TODO @dangotbanned: Rename and give a better doc class NarwhalsObj(Protocol[NativeT_co]): """Minimal `BaseFrame`, `Series` protocol. @@ -2073,15 +2074,14 @@ def to_native(self) -> NativeT_co: ... def _compliant(self) -> _StoresImplementation: ... +# TODO @dangotbanned: Rename class _ImplDescriptor: def __set_name__(self, owner: type[Any], name: str) -> None: self.__name__: str = name @overload def __get__( - self, - instance: DataFrame[pl.DataFrame] | LazyFrame[pl.LazyFrame] | Series[pl.Series], - owner: Any, + self, instance: NarwhalsObj[_NativePolars], owner: Any ) -> _PolarsImpl: ... @overload def __get__( @@ -2090,7 +2090,7 @@ def __get__( @overload def __get__(self, instance: NarwhalsObj[_NativeModin], owner: Any) -> _ModinImpl: ... - @overload # oof, looks like these two need their names aligned 😅 + @overload # TODO @dangotbanned: Rename `_typing` `*Cudf*` aliases to `*CuDF*` def __get__(self, instance: NarwhalsObj[_NativeCuDF], owner: Any) -> _CudfImpl: ... @overload def __get__( @@ -2134,6 +2134,4 @@ def __get__( instance: DataFrame[Any] | LazyFrame[Any] | Series[Any] | NarwhalsObj[Any] | None, owner: Any, ) -> Any: - if instance is None: # pragma: no cover - return self - return instance._compliant._implementation + return self if instance is None else instance._compliant._implementation From 05d4115346b2b91175e777c38b575288f2f6963d Mon Sep 17 00:00:00 2001 From: dangotbanned <125183946+dangotbanned@users.noreply.github.com> Date: Sun, 24 Aug 2025 15:03:36 +0000 Subject: [PATCH 30/40] rename, add brief doc to `_Implementation` --- narwhals/_utils.py | 8 ++++++-- narwhals/dataframe.py | 4 ++-- narwhals/series.py | 4 ++-- 3 files changed, 10 insertions(+), 6 deletions(-) diff --git a/narwhals/_utils.py b/narwhals/_utils.py index 2ff315dffb..cd868877f9 100644 --- a/narwhals/_utils.py +++ b/narwhals/_utils.py @@ -2074,8 +2074,12 @@ def to_native(self) -> NativeT_co: ... def _compliant(self) -> _StoresImplementation: ... -# TODO @dangotbanned: Rename -class _ImplDescriptor: +class _Implementation: + """Descriptor for matching an opaque `Implementation` on a generic class. + + Based on [pyright comment](https://github.com/microsoft/pyright/issues/3071#issuecomment-1043978070) + """ + def __set_name__(self, owner: type[Any], name: str) -> None: self.__name__: str = name diff --git a/narwhals/dataframe.py b/narwhals/dataframe.py index ed75eb0d9a..6c04c141ef 100644 --- a/narwhals/dataframe.py +++ b/narwhals/dataframe.py @@ -26,7 +26,7 @@ from narwhals._utils import ( Implementation, Version, - _ImplDescriptor, + _Implementation, can_dataframe_lazy, can_lazyframe_collect, check_columns_exist, @@ -109,7 +109,7 @@ class BaseFrame(Generic[_FrameT]): _compliant_frame: Any _level: Literal["full", "lazy", "interchange"] - implementation: _ImplDescriptor = _ImplDescriptor() + implementation: _Implementation = _Implementation() """Return implementation of native frame. This can be useful when you need to use special-casing for features outside of diff --git a/narwhals/series.py b/narwhals/series.py index 006ed8584c..43cbc6dd13 100644 --- a/narwhals/series.py +++ b/narwhals/series.py @@ -7,7 +7,7 @@ from narwhals._utils import ( Implementation, Version, - _ImplDescriptor, + _Implementation, _validate_rolling_arguments, ensure_type, generate_repr, @@ -231,7 +231,7 @@ def from_iterable( ) raise ValueError(msg) - implementation: _ImplDescriptor = _ImplDescriptor() + implementation: _Implementation = _Implementation() """Return implementation of native Series. This can be useful when you need to use special-casing for features outside of From 87d4439dba20d019572e23d2cc1dfb12b63e31fe Mon Sep 17 00:00:00 2001 From: dangotbanned <125183946+dangotbanned@users.noreply.github.com> Date: Sun, 24 Aug 2025 15:20:44 +0000 Subject: [PATCH 31/40] refactor: Rename `NarwhalsObj` -> `Narwhals` --- narwhals/_utils.py | 38 +++++++++++++++----------------------- 1 file changed, 15 insertions(+), 23 deletions(-) diff --git a/narwhals/_utils.py b/narwhals/_utils.py index cd868877f9..be21a003e7 100644 --- a/narwhals/_utils.py +++ b/narwhals/_utils.py @@ -2061,8 +2061,8 @@ def deep_getattr(obj: Any, name_1: str, *nested: str) -> Any: return deep_attrgetter(name_1, *nested)(obj) -# TODO @dangotbanned: Rename and give a better doc -class NarwhalsObj(Protocol[NativeT_co]): +# TODO @dangotbanned: give a better doc +class Narwhals(Protocol[NativeT_co]): """Minimal `BaseFrame`, `Series` protocol. - `to_native` scopes a covariant type parameter @@ -2084,46 +2084,38 @@ def __set_name__(self, owner: type[Any], name: str) -> None: self.__name__: str = name @overload - def __get__( - self, instance: NarwhalsObj[_NativePolars], owner: Any - ) -> _PolarsImpl: ... + def __get__(self, instance: Narwhals[_NativePolars], owner: Any) -> _PolarsImpl: ... @overload - def __get__( - self, instance: NarwhalsObj[_NativePandas], owner: Any - ) -> _PandasImpl: ... + def __get__(self, instance: Narwhals[_NativePandas], owner: Any) -> _PandasImpl: ... @overload - def __get__(self, instance: NarwhalsObj[_NativeModin], owner: Any) -> _ModinImpl: ... + def __get__(self, instance: Narwhals[_NativeModin], owner: Any) -> _ModinImpl: ... @overload # TODO @dangotbanned: Rename `_typing` `*Cudf*` aliases to `*CuDF*` - def __get__(self, instance: NarwhalsObj[_NativeCuDF], owner: Any) -> _CudfImpl: ... + def __get__(self, instance: Narwhals[_NativeCuDF], owner: Any) -> _CudfImpl: ... @overload def __get__( - self, instance: NarwhalsObj[_NativePandasLike], owner: Any + self, instance: Narwhals[_NativePandasLike], owner: Any ) -> _PandasLikeImpl: ... @overload - def __get__(self, instance: NarwhalsObj[_NativeArrow], owner: Any) -> _ArrowImpl: ... + def __get__(self, instance: Narwhals[_NativeArrow], owner: Any) -> _ArrowImpl: ... @overload def __get__( - self, - instance: NarwhalsObj[_NativePolars | _NativeArrow | _NativePandas], - owner: Any, + self, instance: Narwhals[_NativePolars | _NativeArrow | _NativePandas], owner: Any ) -> _PolarsImpl | _PandasImpl | _ArrowImpl: ... @overload - def __get__( - self, instance: NarwhalsObj[_NativeDuckDB], owner: Any - ) -> _DuckDBImpl: ... + def __get__(self, instance: Narwhals[_NativeDuckDB], owner: Any) -> _DuckDBImpl: ... @overload def __get__( - self, instance: NarwhalsObj[_NativeSQLFrame], owner: Any + self, instance: Narwhals[_NativeSQLFrame], owner: Any ) -> _SQLFrameImpl: ... @overload - def __get__(self, instance: NarwhalsObj[_NativeDask], owner: Any) -> _DaskImpl: ... + def __get__(self, instance: Narwhals[_NativeDask], owner: Any) -> _DaskImpl: ... @overload - def __get__(self, instance: NarwhalsObj[_NativeIbis], owner: Any) -> _IbisImpl: ... + def __get__(self, instance: Narwhals[_NativeIbis], owner: Any) -> _IbisImpl: ... # NOTE: pyspark isn't installed for typing ci @overload def __get__( - self, instance: NarwhalsObj[_NativePySpark | _NativePySparkConnect], owner: Any + self, instance: Narwhals[_NativePySpark | _NativePySparkConnect], owner: Any ) -> _PySparkImpl | _PySparkConnectImpl: ... @overload def __get__(self, instance: None, owner: Any) -> Self: ... @@ -2135,7 +2127,7 @@ def __get__( def __get__(self, instance: LazyFrame[Any], owner: Any) -> _LazyAllowedImpl: ... def __get__( self, - instance: DataFrame[Any] | LazyFrame[Any] | Series[Any] | NarwhalsObj[Any] | None, + instance: DataFrame[Any] | LazyFrame[Any] | Series[Any] | Narwhals[Any] | None, owner: Any, ) -> Any: return self if instance is None else instance._compliant._implementation From bee6984d8abc08d16cf1c05cd7357f036db4e083 Mon Sep 17 00:00:00 2001 From: dangotbanned <125183946+dangotbanned@users.noreply.github.com> Date: Sun, 24 Aug 2025 16:28:42 +0000 Subject: [PATCH 32/40] tighten up `Narwhals` w/ `Compliant` Removes the need for `to_native` --- narwhals/_utils.py | 15 ++++++--------- 1 file changed, 6 insertions(+), 9 deletions(-) diff --git a/narwhals/_utils.py b/narwhals/_utils.py index be21a003e7..00ee527049 100644 --- a/narwhals/_utils.py +++ b/narwhals/_utils.py @@ -169,7 +169,7 @@ def columns(self) -> Sequence[str]: ... _Constructor: TypeAlias = "Callable[Concatenate[_T, P], R2]" -class _StoresNative(Protocol[NativeT_co]): # noqa: PYI046 +class _StoresNative(Protocol[NativeT_co]): """Provides access to a native object. Native objects have types like: @@ -2061,17 +2061,14 @@ def deep_getattr(obj: Any, name_1: str, *nested: str) -> Any: return deep_attrgetter(name_1, *nested)(obj) -# TODO @dangotbanned: give a better doc -class Narwhals(Protocol[NativeT_co]): - """Minimal `BaseFrame`, `Series` protocol. +class Compliant( + _StoresNative[NativeT_co], _StoresImplementation, Protocol[NativeT_co] +): ... - - `to_native` scopes a covariant type parameter - - `_compliant` describes the route to `Implementation` - """ - def to_native(self) -> NativeT_co: ... +class Narwhals(Protocol[NativeT_co]): @property - def _compliant(self) -> _StoresImplementation: ... + def _compliant(self) -> Compliant[NativeT_co]: ... class _Implementation: From 1c68c6824687a2a7f1f43e87d00b2234d004b150 Mon Sep 17 00:00:00 2001 From: dangotbanned <125183946+dangotbanned@users.noreply.github.com> Date: Sun, 24 Aug 2025 16:43:06 +0000 Subject: [PATCH 33/40] docs(typing): Add `Narwhals` explainer --- narwhals/_utils.py | 24 ++++++++++++++++++++++++ 1 file changed, 24 insertions(+) diff --git a/narwhals/_utils.py b/narwhals/_utils.py index 00ee527049..86a23dc6df 100644 --- a/narwhals/_utils.py +++ b/narwhals/_utils.py @@ -2067,6 +2067,30 @@ class Compliant( class Narwhals(Protocol[NativeT_co]): + """Minimal *Narwhals-level* protocol. + + Provides access to a compliant object: + + obj: Narwhals[NativeT_co]] + compliant: Compliant[NativeT_co] = obj._compliant + + Which itself exposes: + + implementation: Implementation = compliant.implementation + native: NativeT_co = compliant.native + + This interface is used for revealing which `Implementation` member is associated with **either**: + - One or more [nominal] native type(s) + - One or more [structural] type(s) + - where the true native type(s) are [assignable to] *at least* one of them + + These relationships are defined in the `@overload`s of `_Implementation.__get__(...)`. + + [nominal]: https://typing.python.org/en/latest/spec/glossary.html#term-nominal + [structural]: https://typing.python.org/en/latest/spec/glossary.html#term-structural + [assignable to]: https://typing.python.org/en/latest/spec/glossary.html#term-assignable + """ + @property def _compliant(self) -> Compliant[NativeT_co]: ... From fcafec6319c598ba5c57b11399d8c48f220c88ba Mon Sep 17 00:00:00 2001 From: dangotbanned <125183946+dangotbanned@users.noreply.github.com> Date: Sun, 24 Aug 2025 16:53:31 +0000 Subject: [PATCH 34/40] docs: Add crossref to `Implementation` --- narwhals/dataframe.py | 2 +- narwhals/series.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/narwhals/dataframe.py b/narwhals/dataframe.py index 6c04c141ef..5905387b12 100644 --- a/narwhals/dataframe.py +++ b/narwhals/dataframe.py @@ -110,7 +110,7 @@ class BaseFrame(Generic[_FrameT]): _level: Literal["full", "lazy", "interchange"] implementation: _Implementation = _Implementation() - """Return implementation of native frame. + """Return [`narwhals.Implementation`][] of native frame. This can be useful when you need to use special-casing for features outside of Narwhals' scope - for example, when dealing with pandas' Period Dtype. diff --git a/narwhals/series.py b/narwhals/series.py index 43cbc6dd13..83d7b7071a 100644 --- a/narwhals/series.py +++ b/narwhals/series.py @@ -232,7 +232,7 @@ def from_iterable( raise ValueError(msg) implementation: _Implementation = _Implementation() - """Return implementation of native Series. + """Return [`narwhals.Implementation`][] of native Series. This can be useful when you need to use special-casing for features outside of Narwhals' scope - for example, when dealing with pandas' Period Dtype. From 7157bbdc4a9d952ab99ef34ecdecd326103a30a8 Mon Sep 17 00:00:00 2001 From: dangotbanned <125183946+dangotbanned@users.noreply.github.com> Date: Sun, 24 Aug 2025 17:08:26 +0000 Subject: [PATCH 35/40] refactor: shrinking --- narwhals/_namespace.py | 8 ++------ narwhals/_utils.py | 8 +------- 2 files changed, 3 insertions(+), 13 deletions(-) diff --git a/narwhals/_namespace.py b/narwhals/_namespace.py index 3a095be398..ffd7a79390 100644 --- a/narwhals/_namespace.py +++ b/narwhals/_namespace.py @@ -66,7 +66,6 @@ _Guard: TypeAlias = "Callable[[Any], TypeIs[T]]" EagerAllowedNamespace: TypeAlias = "Namespace[PandasLikeNamespace] | Namespace[ArrowNamespace] | Namespace[PolarsNamespace]" - Incomplete: TypeAlias = Any class _BasePandasLike(Sized, Protocol): @@ -85,9 +84,7 @@ def copy(self, deep: bool = ...) -> Self: ... # noqa: FBT001 def rename(self, *args: Any, **kwds: Any) -> Self | Incomplete: """`mypy` & `pyright` disagree on overloads. - `Incomplete` used to fix [more important issue]. - - [issue]: https://github.com/narwhals-dev/narwhals/pull/3016#discussion_r2296139744 + `Incomplete` used to fix [more important issue](https://github.com/narwhals-dev/narwhals/pull/3016#discussion_r2296139744). """ class _BasePandasLikeFrame(NativeDataFrame, _BasePandasLike, Protocol): ... @@ -118,8 +115,7 @@ class _ModinSeries(_BasePandasLikeSeries, Protocol): # NOTE: Using `pyspark.sql.DataFrame` creates false positives in overloads when not installed class _PySparkDataFrame(NativeLazyFrame, Protocol): - # Not on sqlframe classes - # Insane method name that no other framework would clobber + # Arbitrary method that `sqlframe` doesn't have and unlikely to appear anywhere else # https://github.com/apache/spark/blob/8530444e25b83971da4314c608aa7d763adeceb3/python/pyspark/sql/dataframe.py#L4875 def dropDuplicatesWithinWatermark(self, *arg: Any, **kwargs: Any) -> Any: ... # noqa: N802 diff --git a/narwhals/_utils.py b/narwhals/_utils.py index 86a23dc6df..29d00d0472 100644 --- a/narwhals/_utils.py +++ b/narwhals/_utils.py @@ -2110,7 +2110,6 @@ def __get__(self, instance: Narwhals[_NativePolars], owner: Any) -> _PolarsImpl: def __get__(self, instance: Narwhals[_NativePandas], owner: Any) -> _PandasImpl: ... @overload def __get__(self, instance: Narwhals[_NativeModin], owner: Any) -> _ModinImpl: ... - @overload # TODO @dangotbanned: Rename `_typing` `*Cudf*` aliases to `*CuDF*` def __get__(self, instance: Narwhals[_NativeCuDF], owner: Any) -> _CudfImpl: ... @overload @@ -2133,7 +2132,6 @@ def __get__( def __get__(self, instance: Narwhals[_NativeDask], owner: Any) -> _DaskImpl: ... @overload def __get__(self, instance: Narwhals[_NativeIbis], owner: Any) -> _IbisImpl: ... - # NOTE: pyspark isn't installed for typing ci @overload def __get__( self, instance: Narwhals[_NativePySpark | _NativePySparkConnect], owner: Any @@ -2146,9 +2144,5 @@ def __get__( ) -> _EagerAllowedImpl: ... @overload def __get__(self, instance: LazyFrame[Any], owner: Any) -> _LazyAllowedImpl: ... - def __get__( - self, - instance: DataFrame[Any] | LazyFrame[Any] | Series[Any] | Narwhals[Any] | None, - owner: Any, - ) -> Any: + def __get__(self, instance: Narwhals[Any] | None, owner: Any) -> Any: return self if instance is None else instance._compliant._implementation From 5049a2a015009d6afb25c4bb39b944db2ed1ff4d Mon Sep 17 00:00:00 2001 From: dangotbanned <125183946+dangotbanned@users.noreply.github.com> Date: Tue, 26 Aug 2025 12:11:34 +0000 Subject: [PATCH 36/40] docs: Explain typing test structure > I would add an extra line of comment, the following would be enough I guess: Woops https://github.com/narwhals-dev/narwhals/pull/3016#discussion_r2299044974 --- tests/implementation_test.py | 35 +++++++++++++++++++++++++++++++++++ 1 file changed, 35 insertions(+) diff --git a/tests/implementation_test.py b/tests/implementation_test.py index fdfe538abf..40dfd41216 100644 --- a/tests/implementation_test.py +++ b/tests/implementation_test.py @@ -73,6 +73,41 @@ def test_implementation_new(member: str, value: str) -> None: assert nw.Implementation(value) is getattr(nw.Implementation, member) +_TYPING_ONLY_TESTS = "_" +"""Exhaustive checks for overload matching native -> implementation. + +## Arrange +Each test defines a function accepting a `native` frame. + +Important: + We *must* use the concrete types and therefore the type checker *needs* the package installed. + +Next, wrap `native` as a `nw.{Data,Lazy}Frame`. + +Note: + If we support multiple native types, use `native` to generate `nw.{LazyFrame,Series}` as well. + +Finally, look-up `.implementation` on all wrapped objects. + +## Act +Try passing every result (`*_impl`) to functions that *only* accept a **subset** of `Implementation`. + +This step *may require* a `# (type|pyright): ignore` directive, which defines the `# [... Negative]` result. +Otherwise, results are labelled with `# [... Positive]`. + +If this *static* label matches *runtime* we use `# [True ...]`, otherwise `# [False ...]`. + +Tip: + `# [False Negative]`s are the most frustrating for users. + Always try to minimize warning on safe code. + +## Assert +The action determined whether or not our typing warns on an `@overload` match. + +We still need to use [`assert_type`] to verify which `Implementation`(s) were returned as a result. + +[`assert_type`]: https://typing-extensions.readthedocs.io/en/latest/#typing_extensions.assert_type +""" if TYPE_CHECKING: import dask.dataframe as dd import duckdb From 4b78837d3919a81ef5dc3c4b2bbaac4ef605a75f Mon Sep 17 00:00:00 2001 From: Dan Redding <125183946+dangotbanned@users.noreply.github.com> Date: Tue, 26 Aug 2025 21:34:26 +0000 Subject: [PATCH 37/40] Update narwhals/_utils.py --- narwhals/_utils.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/narwhals/_utils.py b/narwhals/_utils.py index 29d00d0472..e32682e4a1 100644 --- a/narwhals/_utils.py +++ b/narwhals/_utils.py @@ -2136,8 +2136,9 @@ def __get__(self, instance: Narwhals[_NativeIbis], owner: Any) -> _IbisImpl: ... def __get__( self, instance: Narwhals[_NativePySpark | _NativePySparkConnect], owner: Any ) -> _PySparkImpl | _PySparkConnectImpl: ... + # NOTE: https://docs.python.org/3/howto/descriptor.html#invocation-from-a-class @overload - def __get__(self, instance: None, owner: Any) -> Self: ... + def __get__(self, instance: None, owner: type[Narwhals[Any]]) -> Self: ... @overload def __get__( self, instance: DataFrame[Any] | Series[Any], owner: Any From a94a0f8eecda63d606cad575f2593ae71bee1f49 Mon Sep 17 00:00:00 2001 From: dangotbanned <125183946+dangotbanned@users.noreply.github.com> Date: Wed, 27 Aug 2025 18:47:48 +0000 Subject: [PATCH 38/40] test(typing): Update for (#3032) https://github.com/narwhals-dev/narwhals/pull/3016#discussion_r2304969848 --- tests/implementation_test.py | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/tests/implementation_test.py b/tests/implementation_test.py index 40dfd41216..1915e112d0 100644 --- a/tests/implementation_test.py +++ b/tests/implementation_test.py @@ -214,8 +214,9 @@ def test_sqlframe_typing(native: BaseDataFrame[Any, Any, Any, Any, Any]) -> None ldf_impl = ldf.implementation + # [True Positive] + any_df.lazy(ldf_impl) # [True Negative] - any_df.lazy(ldf_impl) # pyright: ignore[reportArgumentType] any_ldf.collect(ldf_impl) # pyright: ignore[reportArgumentType] assert_type(ldf.implementation, _SQLFrameImpl) @@ -289,7 +290,9 @@ def test_bound_typing() -> None: # [True Negative] any_df.lazy(df_impl) # type: ignore[arg-type] - any_df.lazy(ldf_impl) # type: ignore[arg-type] + # [True Positive] + any_df.lazy(ldf_impl) + # [True Negative] any_df.lazy(ser_impl) # type: ignore[arg-type] any_ldf.collect(df_impl) # type: ignore[arg-type] any_ldf.collect(ldf_impl) # type: ignore[arg-type] From 791ecaea14dddc9b6e41408beb3787e160b36b20 Mon Sep 17 00:00:00 2001 From: dangotbanned <125183946+dangotbanned@users.noreply.github.com> Date: Thu, 28 Aug 2025 15:10:22 +0000 Subject: [PATCH 39/40] ci: Exclude `OrderedDict` methods from `check-api-reference` Check added in #2957 --- utils/check_api_reference.py | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/utils/check_api_reference.py b/utils/check_api_reference.py index f0fc72c78a..3233b24b1c 100644 --- a/utils/check_api_reference.py +++ b/utils/check_api_reference.py @@ -5,7 +5,7 @@ import sys # ruff: noqa: N806 -from collections import deque +from collections import OrderedDict, deque from inspect import isfunction, ismethoddescriptor from pathlib import Path from types import MethodType, ModuleType @@ -223,7 +223,11 @@ def read_documented_members(source: str | Path) -> list[str]: # Schema schema_methods = list(iter_api_reference_names(nw.Schema)) documented = read_documented_members(DIR_API_REF / "schema.md") -if missing := set(schema_methods).difference(documented): +if ( + missing := set(schema_methods) + .difference(documented) + .difference(iter_api_reference_names(OrderedDict)) +): print("Schema: not documented") # noqa: T201 print(missing) # noqa: T201 ret = 1 From 21800fee152d9b27cb98ec01a6dee8126cd17ca4 Mon Sep 17 00:00:00 2001 From: dangotbanned <125183946+dangotbanned@users.noreply.github.com> Date: Wed, 3 Sep 2025 10:00:14 +0000 Subject: [PATCH 40/40] ci: Try adding `--group 'typing-ci'` Maybe solution for https://github.com/narwhals-dev/narwhals/pull/3016#discussion_r2318016460 --- .github/workflows/typing.yml | 2 +- Makefile | 2 +- pyproject.toml | 6 +++++- 3 files changed, 7 insertions(+), 3 deletions(-) diff --git a/.github/workflows/typing.yml b/.github/workflows/typing.yml index 43c81d9a2f..33532531cd 100644 --- a/.github/workflows/typing.yml +++ b/.github/workflows/typing.yml @@ -31,7 +31,7 @@ jobs: run: uv venv .venv - name: install-reqs # TODO: add more dependencies/backends incrementally - run: uv pip install -e ".[pyspark]" --group core --group typing + run: uv pip install -e ".[pyspark]" --group core --group typing-ci - name: show-deps run: uv pip freeze - name: Run mypy and pyright diff --git a/Makefile b/Makefile index 44c744614f..c404003756 100644 --- a/Makefile +++ b/Makefile @@ -20,6 +20,6 @@ help: ## Display this help screen .PHONY: typing typing: ## Run typing checks - $(VENV_BIN)/uv pip install -e . --group typing + $(VENV_BIN)/uv pip install -e . --group typing-ci $(VENV_BIN)/pyright $(VENV_BIN)/mypy diff --git a/pyproject.toml b/pyproject.toml index 94219da94c..303e0a0bf4 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -74,7 +74,11 @@ typing = [ # keep some of these pinned and bump periodically so there's fewer s "sqlframe", "polars==1.32.2", "uv", - "narwhals[ibis,dask,modin]", + "narwhals[ibis]", +] +typing-ci = [ + "narwhals[dask,modin]", + {include-group = "typing"} ] docs = [ "black", # required by mkdocstrings_handlers