From 028098e4b5091f73fca2f812830d54f109875777 Mon Sep 17 00:00:00 2001
From: dangotbanned <125183946+dangotbanned@users.noreply.github.com>
Date: Thu, 20 Feb 2025 18:39:59 +0000
Subject: [PATCH 01/55] refactor: replace `lambda df: df.columns`

https://github.com/narwhals-dev/narwhals/pull/2058#discussion_r1964137667
---
 narwhals/_arrow/namespace.py       |  3 ++-
 narwhals/_arrow/selectors.py       |  3 ++-
 narwhals/_dask/namespace.py        |  3 ++-
 narwhals/_dask/selectors.py        |  3 ++-
 narwhals/_duckdb/namespace.py      |  3 ++-
 narwhals/_duckdb/selectors.py      |  3 ++-
 narwhals/_pandas_like/namespace.py |  3 ++-
 narwhals/_pandas_like/selectors.py |  3 ++-
 narwhals/_spark_like/namespace.py  |  3 ++-
 narwhals/_spark_like/selectors.py  |  3 ++-
 narwhals/utils.py                  | 10 ++++++++++
 11 files changed, 30 insertions(+), 10 deletions(-)

diff --git a/narwhals/_arrow/namespace.py b/narwhals/_arrow/namespace.py
index f11e69af8e..1efbb69751 100644
--- a/narwhals/_arrow/namespace.py
+++ b/narwhals/_arrow/namespace.py
@@ -26,6 +26,7 @@
 from narwhals._expression_parsing import combine_evaluate_output_names
 from narwhals.typing import CompliantNamespace
 from narwhals.utils import Implementation
+from narwhals.utils import get_columns
 from narwhals.utils import import_dtypes_module
 from narwhals.utils import is_compliant_expr
 
@@ -159,7 +160,7 @@ def all(self: Self) -> ArrowExpr:
             ],
             depth=0,
             function_name="all",
-            evaluate_output_names=lambda df: df.columns,
+            evaluate_output_names=get_columns,
             alias_output_names=None,
             backend_version=self._backend_version,
             version=self._version,
diff --git a/narwhals/_arrow/selectors.py b/narwhals/_arrow/selectors.py
index ec045c9e15..adf2374568 100644
--- a/narwhals/_arrow/selectors.py
+++ b/narwhals/_arrow/selectors.py
@@ -10,6 +10,7 @@
 from narwhals._arrow.expr import ArrowExpr
 from narwhals.utils import _parse_time_unit_and_time_zone
 from narwhals.utils import dtype_matches_time_unit_and_time_zone
+from narwhals.utils import get_columns
 from narwhals.utils import import_dtypes_module
 
 if TYPE_CHECKING:
@@ -82,7 +83,7 @@ def all(self: Self) -> ArrowSelector:
         def func(df: ArrowDataFrame) -> list[ArrowSeries]:
             return [df[col] for col in df.columns]
 
-        return selector(self, func, lambda df: df.columns)
+        return selector(self, func, get_columns)
 
     def datetime(
         self: Self,
diff --git a/narwhals/_dask/namespace.py b/narwhals/_dask/namespace.py
index a8c3e41798..28fcd8c07f 100644
--- a/narwhals/_dask/namespace.py
+++ b/narwhals/_dask/namespace.py
@@ -24,6 +24,7 @@
 from narwhals._expression_parsing import combine_alias_output_names
 from narwhals._expression_parsing import combine_evaluate_output_names
 from narwhals.typing import CompliantNamespace
+from narwhals.utils import get_columns
 from narwhals.utils import is_compliant_expr
 
 if TYPE_CHECKING:
@@ -57,7 +58,7 @@ def func(df: DaskLazyFrame) -> list[dx.Series]:
             func,
             depth=0,
             function_name="all",
-            evaluate_output_names=lambda df: df.columns,
+            evaluate_output_names=get_columns,
             alias_output_names=None,
             backend_version=self._backend_version,
             version=self._version,
diff --git a/narwhals/_dask/selectors.py b/narwhals/_dask/selectors.py
index 123da1212d..d1ec3d4a69 100644
--- a/narwhals/_dask/selectors.py
+++ b/narwhals/_dask/selectors.py
@@ -10,6 +10,7 @@
 from narwhals._dask.expr import DaskExpr
 from narwhals.utils import _parse_time_unit_and_time_zone
 from narwhals.utils import dtype_matches_time_unit_and_time_zone
+from narwhals.utils import get_columns
 from narwhals.utils import import_dtypes_module
 
 if TYPE_CHECKING:
@@ -95,7 +96,7 @@ def all(self: Self) -> DaskSelector:
         def func(df: DaskLazyFrame) -> list[dx.Series]:
             return [df._native_frame[col] for col in df.columns]
 
-        return selector(self, func, lambda df: df.columns)
+        return selector(self, func, get_columns)
 
     def datetime(
         self: Self,
diff --git a/narwhals/_duckdb/namespace.py b/narwhals/_duckdb/namespace.py
index 2f50a70724..3acf9a2c85 100644
--- a/narwhals/_duckdb/namespace.py
+++ b/narwhals/_duckdb/namespace.py
@@ -24,6 +24,7 @@
 from narwhals._expression_parsing import combine_alias_output_names
 from narwhals._expression_parsing import combine_evaluate_output_names
 from narwhals.typing import CompliantNamespace
+from narwhals.utils import get_columns
 
 if TYPE_CHECKING:
     import duckdb
@@ -52,7 +53,7 @@ def _all(df: DuckDBLazyFrame) -> list[duckdb.Expression]:
         return DuckDBExpr(
             call=_all,
             function_name="all",
-            evaluate_output_names=lambda df: df.columns,
+            evaluate_output_names=get_columns,
             alias_output_names=None,
             backend_version=self._backend_version,
             version=self._version,
diff --git a/narwhals/_duckdb/selectors.py b/narwhals/_duckdb/selectors.py
index a30cec06cb..1617d60c56 100644
--- a/narwhals/_duckdb/selectors.py
+++ b/narwhals/_duckdb/selectors.py
@@ -12,6 +12,7 @@
 from narwhals._duckdb.expr import DuckDBExpr
 from narwhals.utils import _parse_time_unit_and_time_zone
 from narwhals.utils import dtype_matches_time_unit_and_time_zone
+from narwhals.utils import get_columns
 from narwhals.utils import import_dtypes_module
 
 if TYPE_CHECKING:
@@ -88,7 +89,7 @@ def all(self: Self) -> DuckDBSelector:
         def func(df: DuckDBLazyFrame) -> list[duckdb.Expression]:
             return [ColumnExpression(col) for col in df.columns]
 
-        return selector(self, func, lambda df: df.columns)
+        return selector(self, func, get_columns)
 
     def datetime(
         self: Self,
diff --git a/narwhals/_pandas_like/namespace.py b/narwhals/_pandas_like/namespace.py
index a0c8a3ac3c..3d7b2514c2 100644
--- a/narwhals/_pandas_like/namespace.py
+++ b/narwhals/_pandas_like/namespace.py
@@ -22,6 +22,7 @@
 from narwhals._pandas_like.utils import horizontal_concat
 from narwhals._pandas_like.utils import vertical_concat
 from narwhals.typing import CompliantNamespace
+from narwhals.utils import get_columns
 from narwhals.utils import import_dtypes_module
 from narwhals.utils import is_compliant_expr
 
@@ -134,7 +135,7 @@ def all(self: Self) -> PandasLikeExpr:
             ],
             depth=0,
             function_name="all",
-            evaluate_output_names=lambda df: df.columns,
+            evaluate_output_names=get_columns,
             alias_output_names=None,
             implementation=self._implementation,
             backend_version=self._backend_version,
diff --git a/narwhals/_pandas_like/selectors.py b/narwhals/_pandas_like/selectors.py
index 4b7a2ef2e0..c31e351916 100644
--- a/narwhals/_pandas_like/selectors.py
+++ b/narwhals/_pandas_like/selectors.py
@@ -10,6 +10,7 @@
 from narwhals._pandas_like.expr import PandasLikeExpr
 from narwhals.utils import _parse_time_unit_and_time_zone
 from narwhals.utils import dtype_matches_time_unit_and_time_zone
+from narwhals.utils import get_columns
 from narwhals.utils import import_dtypes_module
 
 if TYPE_CHECKING:
@@ -83,7 +84,7 @@ def all(self: Self) -> PandasSelector:
         def func(df: PandasLikeDataFrame) -> list[PandasLikeSeries]:
             return [df[col] for col in df.columns]
 
-        return selector(self, func, lambda df: df.columns)
+        return selector(self, func, get_columns)
 
     def datetime(
         self: Self,
diff --git a/narwhals/_spark_like/namespace.py b/narwhals/_spark_like/namespace.py
index b6f51a60f8..ac779730e8 100644
--- a/narwhals/_spark_like/namespace.py
+++ b/narwhals/_spark_like/namespace.py
@@ -17,6 +17,7 @@
 from narwhals._spark_like.utils import maybe_evaluate_expr
 from narwhals._spark_like.utils import narwhals_to_native_dtype
 from narwhals.typing import CompliantNamespace
+from narwhals.utils import get_columns
 
 if TYPE_CHECKING:
     from pyspark.sql import Column
@@ -51,7 +52,7 @@ def _all(df: SparkLikeLazyFrame) -> list[Column]:
         return SparkLikeExpr(
             call=_all,
             function_name="all",
-            evaluate_output_names=lambda df: df.columns,
+            evaluate_output_names=get_columns,
             alias_output_names=None,
             backend_version=self._backend_version,
             version=self._version,
diff --git a/narwhals/_spark_like/selectors.py b/narwhals/_spark_like/selectors.py
index e037e1f8a3..9aedceab3a 100644
--- a/narwhals/_spark_like/selectors.py
+++ b/narwhals/_spark_like/selectors.py
@@ -10,6 +10,7 @@
 from narwhals._spark_like.expr import SparkLikeExpr
 from narwhals.utils import _parse_time_unit_and_time_zone
 from narwhals.utils import dtype_matches_time_unit_and_time_zone
+from narwhals.utils import get_columns
 from narwhals.utils import import_dtypes_module
 
 if TYPE_CHECKING:
@@ -83,7 +84,7 @@ def all(self: Self) -> SparkLikeSelector:
         def func(df: SparkLikeLazyFrame) -> list[Column]:
             return [df._F.col(col) for col in df.columns]
 
-        return selector(self, func, lambda df: df.columns)
+        return selector(self, func, get_columns)
 
     def datetime(
         self: Self,
diff --git a/narwhals/utils.py b/narwhals/utils.py
index cb33a603d6..6167e73207 100644
--- a/narwhals/utils.py
+++ b/narwhals/utils.py
@@ -10,6 +10,7 @@
 from typing import TYPE_CHECKING
 from typing import Any
 from typing import Iterable
+from typing import Iterator
 from typing import Sequence
 from typing import TypeVar
 from typing import Union
@@ -59,6 +60,7 @@
     from narwhals.typing import DataFrameLike
     from narwhals.typing import DTypes
     from narwhals.typing import IntoSeriesT
+    from narwhals.typing import NativeFrame
     from narwhals.typing import SizeUnit
     from narwhals.typing import SupportsNativeNamespace
     from narwhals.typing import TimeUnit
@@ -1303,6 +1305,14 @@ def dtype_matches_time_unit_and_time_zone(
     )
 
 
+def get_columns(df: NativeFrame) -> Sequence[str]:
+    return df.columns
+
+
+def iter_columns(df: NativeFrame) -> Iterator[str]:  # pragma: no cover
+    yield from df.columns
+
+
 def _hasattr_static(obj: Any, attr: str) -> bool:
     sentinel = object()
     return getattr_static(obj, attr, sentinel) is not sentinel

From c597ba7c2b06b1d1db29e9b7f254aa3be47b5d86 Mon Sep 17 00:00:00 2001
From: dangotbanned <125183946+dangotbanned@users.noreply.github.com>
Date: Thu, 20 Feb 2025 21:30:05 +0000
Subject: [PATCH 02/55] refactor: rename `get_columns` -> `get_column_names`

Less ambiguous, thinking `iter_columns` will be a better name to reserve for https://docs.pola.rs/api/python/stable/reference/dataframe/api/polars.DataFrame.iter_columns.html
---
 narwhals/_arrow/namespace.py       | 4 ++--
 narwhals/_arrow/selectors.py       | 4 ++--
 narwhals/_dask/namespace.py        | 4 ++--
 narwhals/_dask/selectors.py        | 4 ++--
 narwhals/_duckdb/namespace.py      | 4 ++--
 narwhals/_duckdb/selectors.py      | 4 ++--
 narwhals/_pandas_like/namespace.py | 4 ++--
 narwhals/_pandas_like/selectors.py | 4 ++--
 narwhals/_spark_like/namespace.py  | 4 ++--
 narwhals/_spark_like/selectors.py  | 4 ++--
 narwhals/utils.py                  | 2 +-
 11 files changed, 21 insertions(+), 21 deletions(-)

diff --git a/narwhals/_arrow/namespace.py b/narwhals/_arrow/namespace.py
index 1efbb69751..28c22ab62f 100644
--- a/narwhals/_arrow/namespace.py
+++ b/narwhals/_arrow/namespace.py
@@ -26,7 +26,7 @@
 from narwhals._expression_parsing import combine_evaluate_output_names
 from narwhals.typing import CompliantNamespace
 from narwhals.utils import Implementation
-from narwhals.utils import get_columns
+from narwhals.utils import get_column_names
 from narwhals.utils import import_dtypes_module
 from narwhals.utils import is_compliant_expr
 
@@ -160,7 +160,7 @@ def all(self: Self) -> ArrowExpr:
             ],
             depth=0,
             function_name="all",
-            evaluate_output_names=get_columns,
+            evaluate_output_names=get_column_names,
             alias_output_names=None,
             backend_version=self._backend_version,
             version=self._version,
diff --git a/narwhals/_arrow/selectors.py b/narwhals/_arrow/selectors.py
index adf2374568..ef2a760364 100644
--- a/narwhals/_arrow/selectors.py
+++ b/narwhals/_arrow/selectors.py
@@ -10,7 +10,7 @@
 from narwhals._arrow.expr import ArrowExpr
 from narwhals.utils import _parse_time_unit_and_time_zone
 from narwhals.utils import dtype_matches_time_unit_and_time_zone
-from narwhals.utils import get_columns
+from narwhals.utils import get_column_names
 from narwhals.utils import import_dtypes_module
 
 if TYPE_CHECKING:
@@ -83,7 +83,7 @@ def all(self: Self) -> ArrowSelector:
         def func(df: ArrowDataFrame) -> list[ArrowSeries]:
             return [df[col] for col in df.columns]
 
-        return selector(self, func, get_columns)
+        return selector(self, func, get_column_names)
 
     def datetime(
         self: Self,
diff --git a/narwhals/_dask/namespace.py b/narwhals/_dask/namespace.py
index 28fcd8c07f..5777fd6d5d 100644
--- a/narwhals/_dask/namespace.py
+++ b/narwhals/_dask/namespace.py
@@ -24,7 +24,7 @@
 from narwhals._expression_parsing import combine_alias_output_names
 from narwhals._expression_parsing import combine_evaluate_output_names
 from narwhals.typing import CompliantNamespace
-from narwhals.utils import get_columns
+from narwhals.utils import get_column_names
 from narwhals.utils import is_compliant_expr
 
 if TYPE_CHECKING:
@@ -58,7 +58,7 @@ def func(df: DaskLazyFrame) -> list[dx.Series]:
             func,
             depth=0,
             function_name="all",
-            evaluate_output_names=get_columns,
+            evaluate_output_names=get_column_names,
             alias_output_names=None,
             backend_version=self._backend_version,
             version=self._version,
diff --git a/narwhals/_dask/selectors.py b/narwhals/_dask/selectors.py
index d1ec3d4a69..599c8835cb 100644
--- a/narwhals/_dask/selectors.py
+++ b/narwhals/_dask/selectors.py
@@ -10,7 +10,7 @@
 from narwhals._dask.expr import DaskExpr
 from narwhals.utils import _parse_time_unit_and_time_zone
 from narwhals.utils import dtype_matches_time_unit_and_time_zone
-from narwhals.utils import get_columns
+from narwhals.utils import get_column_names
 from narwhals.utils import import_dtypes_module
 
 if TYPE_CHECKING:
@@ -96,7 +96,7 @@ def all(self: Self) -> DaskSelector:
         def func(df: DaskLazyFrame) -> list[dx.Series]:
             return [df._native_frame[col] for col in df.columns]
 
-        return selector(self, func, get_columns)
+        return selector(self, func, get_column_names)
 
     def datetime(
         self: Self,
diff --git a/narwhals/_duckdb/namespace.py b/narwhals/_duckdb/namespace.py
index 3acf9a2c85..56b3bf4a4f 100644
--- a/narwhals/_duckdb/namespace.py
+++ b/narwhals/_duckdb/namespace.py
@@ -24,7 +24,7 @@
 from narwhals._expression_parsing import combine_alias_output_names
 from narwhals._expression_parsing import combine_evaluate_output_names
 from narwhals.typing import CompliantNamespace
-from narwhals.utils import get_columns
+from narwhals.utils import get_column_names
 
 if TYPE_CHECKING:
     import duckdb
@@ -53,7 +53,7 @@ def _all(df: DuckDBLazyFrame) -> list[duckdb.Expression]:
         return DuckDBExpr(
             call=_all,
             function_name="all",
-            evaluate_output_names=get_columns,
+            evaluate_output_names=get_column_names,
             alias_output_names=None,
             backend_version=self._backend_version,
             version=self._version,
diff --git a/narwhals/_duckdb/selectors.py b/narwhals/_duckdb/selectors.py
index 1617d60c56..254f41152a 100644
--- a/narwhals/_duckdb/selectors.py
+++ b/narwhals/_duckdb/selectors.py
@@ -12,7 +12,7 @@
 from narwhals._duckdb.expr import DuckDBExpr
 from narwhals.utils import _parse_time_unit_and_time_zone
 from narwhals.utils import dtype_matches_time_unit_and_time_zone
-from narwhals.utils import get_columns
+from narwhals.utils import get_column_names
 from narwhals.utils import import_dtypes_module
 
 if TYPE_CHECKING:
@@ -89,7 +89,7 @@ def all(self: Self) -> DuckDBSelector:
         def func(df: DuckDBLazyFrame) -> list[duckdb.Expression]:
             return [ColumnExpression(col) for col in df.columns]
 
-        return selector(self, func, get_columns)
+        return selector(self, func, get_column_names)
 
     def datetime(
         self: Self,
diff --git a/narwhals/_pandas_like/namespace.py b/narwhals/_pandas_like/namespace.py
index 3d7b2514c2..b936be3467 100644
--- a/narwhals/_pandas_like/namespace.py
+++ b/narwhals/_pandas_like/namespace.py
@@ -22,7 +22,7 @@
 from narwhals._pandas_like.utils import horizontal_concat
 from narwhals._pandas_like.utils import vertical_concat
 from narwhals.typing import CompliantNamespace
-from narwhals.utils import get_columns
+from narwhals.utils import get_column_names
 from narwhals.utils import import_dtypes_module
 from narwhals.utils import is_compliant_expr
 
@@ -135,7 +135,7 @@ def all(self: Self) -> PandasLikeExpr:
             ],
             depth=0,
             function_name="all",
-            evaluate_output_names=get_columns,
+            evaluate_output_names=get_column_names,
             alias_output_names=None,
             implementation=self._implementation,
             backend_version=self._backend_version,
diff --git a/narwhals/_pandas_like/selectors.py b/narwhals/_pandas_like/selectors.py
index c31e351916..0653ed2e40 100644
--- a/narwhals/_pandas_like/selectors.py
+++ b/narwhals/_pandas_like/selectors.py
@@ -10,7 +10,7 @@
 from narwhals._pandas_like.expr import PandasLikeExpr
 from narwhals.utils import _parse_time_unit_and_time_zone
 from narwhals.utils import dtype_matches_time_unit_and_time_zone
-from narwhals.utils import get_columns
+from narwhals.utils import get_column_names
 from narwhals.utils import import_dtypes_module
 
 if TYPE_CHECKING:
@@ -84,7 +84,7 @@ def all(self: Self) -> PandasSelector:
         def func(df: PandasLikeDataFrame) -> list[PandasLikeSeries]:
             return [df[col] for col in df.columns]
 
-        return selector(self, func, get_columns)
+        return selector(self, func, get_column_names)
 
     def datetime(
         self: Self,
diff --git a/narwhals/_spark_like/namespace.py b/narwhals/_spark_like/namespace.py
index ac779730e8..30e7fe3955 100644
--- a/narwhals/_spark_like/namespace.py
+++ b/narwhals/_spark_like/namespace.py
@@ -17,7 +17,7 @@
 from narwhals._spark_like.utils import maybe_evaluate_expr
 from narwhals._spark_like.utils import narwhals_to_native_dtype
 from narwhals.typing import CompliantNamespace
-from narwhals.utils import get_columns
+from narwhals.utils import get_column_names
 
 if TYPE_CHECKING:
     from pyspark.sql import Column
@@ -52,7 +52,7 @@ def _all(df: SparkLikeLazyFrame) -> list[Column]:
         return SparkLikeExpr(
             call=_all,
             function_name="all",
-            evaluate_output_names=get_columns,
+            evaluate_output_names=get_column_names,
             alias_output_names=None,
             backend_version=self._backend_version,
             version=self._version,
diff --git a/narwhals/_spark_like/selectors.py b/narwhals/_spark_like/selectors.py
index 9aedceab3a..95ad0407d1 100644
--- a/narwhals/_spark_like/selectors.py
+++ b/narwhals/_spark_like/selectors.py
@@ -10,7 +10,7 @@
 from narwhals._spark_like.expr import SparkLikeExpr
 from narwhals.utils import _parse_time_unit_and_time_zone
 from narwhals.utils import dtype_matches_time_unit_and_time_zone
-from narwhals.utils import get_columns
+from narwhals.utils import get_column_names
 from narwhals.utils import import_dtypes_module
 
 if TYPE_CHECKING:
@@ -84,7 +84,7 @@ def all(self: Self) -> SparkLikeSelector:
         def func(df: SparkLikeLazyFrame) -> list[Column]:
             return [df._F.col(col) for col in df.columns]
 
-        return selector(self, func, get_columns)
+        return selector(self, func, get_column_names)
 
     def datetime(
         self: Self,
diff --git a/narwhals/utils.py b/narwhals/utils.py
index 6167e73207..b5fbb28100 100644
--- a/narwhals/utils.py
+++ b/narwhals/utils.py
@@ -1305,7 +1305,7 @@ def dtype_matches_time_unit_and_time_zone(
     )
 
 
-def get_columns(df: NativeFrame) -> Sequence[str]:
+def get_column_names(df: NativeFrame) -> Sequence[str]:
     return df.columns
 
 

From d26fd813238e37305860ce9cd6be45ce99f0bb9d Mon Sep 17 00:00:00 2001
From: dangotbanned <125183946+dangotbanned@users.noreply.github.com>
Date: Fri, 21 Feb 2025 15:51:40 +0000
Subject: [PATCH 03/55] chore(typing): add missing context for
 `CompliantDataFrame`

Column names and selecting series is the core part of `selectors`
---
 narwhals/typing.py | 16 ++++++++++------
 narwhals/utils.py  |  2 +-
 2 files changed, 11 insertions(+), 7 deletions(-)

diff --git a/narwhals/typing.py b/narwhals/typing.py
index ebd89e7732..92ce71b683 100644
--- a/narwhals/typing.py
+++ b/narwhals/typing.py
@@ -49,7 +49,12 @@ def __narwhals_series__(self) -> CompliantSeries: ...
     def alias(self, name: str) -> Self: ...
 
 
-class CompliantDataFrame(Protocol):
+CompliantSeriesT_co = TypeVar(
+    "CompliantSeriesT_co", bound=CompliantSeries, covariant=True
+)
+
+
+class CompliantDataFrame(Generic[CompliantSeriesT_co], Protocol):
     def __narwhals_dataframe__(self) -> Self: ...
     def __narwhals_namespace__(self) -> Any: ...
     def simple_select(
@@ -59,6 +64,10 @@ def aggregate(self, *exprs: Any) -> Self:
         ...  # `select` where all args are aggregations or literals
         # (so, no broadcasting is necessary).
 
+    @property
+    def columns(self) -> Sequence[str]: ...
+    def get_column(self, name: str) -> CompliantSeriesT_co: ...
+
 
 class CompliantLazyFrame(Protocol):
     def __narwhals_lazyframe__(self) -> Self: ...
@@ -71,11 +80,6 @@ def aggregate(self, *exprs: Any) -> Self:
         # (so, no broadcasting is necessary).
 
 
-CompliantSeriesT_co = TypeVar(
-    "CompliantSeriesT_co", bound=CompliantSeries, covariant=True
-)
-
-
 class CompliantExpr(Protocol, Generic[CompliantSeriesT_co]):
     _implementation: Implementation
     _backend_version: tuple[int, ...]
diff --git a/narwhals/utils.py b/narwhals/utils.py
index b5fbb28100..141423d200 100644
--- a/narwhals/utils.py
+++ b/narwhals/utils.py
@@ -1305,7 +1305,7 @@ def dtype_matches_time_unit_and_time_zone(
     )
 
 
-def get_column_names(df: NativeFrame) -> Sequence[str]:
+def get_column_names(df: NativeFrame | CompliantDataFrame) -> Sequence[str]:
     return df.columns
 
 

From 230daf9e55f22b09316f71a2c4e6120909a18f1d Mon Sep 17 00:00:00 2001
From: dangotbanned <125183946+dangotbanned@users.noreply.github.com>
Date: Fri, 21 Feb 2025 15:53:12 +0000
Subject: [PATCH 04/55] fix(typing): temp widen `.collect` from
 `CompliantDataFrame`

Will investigate later
---
 narwhals/_duckdb/dataframe.py      | 3 ++-
 narwhals/_pandas_like/dataframe.py | 3 ++-
 narwhals/_polars/dataframe.py      | 2 +-
 3 files changed, 5 insertions(+), 3 deletions(-)

diff --git a/narwhals/_duckdb/dataframe.py b/narwhals/_duckdb/dataframe.py
index b4ee8c2825..1e0972e5e8 100644
--- a/narwhals/_duckdb/dataframe.py
+++ b/narwhals/_duckdb/dataframe.py
@@ -36,6 +36,7 @@
     from narwhals._duckdb.group_by import DuckDBGroupBy
     from narwhals._duckdb.namespace import DuckDBNamespace
     from narwhals._duckdb.series import DuckDBInterchangeSeries
+    from narwhals._polars.dataframe import PolarsDataFrame
     from narwhals.dtypes import DType
 
 from narwhals.typing import CompliantLazyFrame
@@ -90,7 +91,7 @@ def collect(
         self: Self,
         backend: ModuleType | Implementation | str | None,
         **kwargs: Any,
-    ) -> CompliantDataFrame:
+    ) -> CompliantDataFrame[Any] | PolarsDataFrame:
         if backend is None or backend is Implementation.PYARROW:
             import pyarrow as pa  # ignore-banned-import
 
diff --git a/narwhals/_pandas_like/dataframe.py b/narwhals/_pandas_like/dataframe.py
index 306a37fd8f..d333ee1459 100644
--- a/narwhals/_pandas_like/dataframe.py
+++ b/narwhals/_pandas_like/dataframe.py
@@ -48,6 +48,7 @@
     from narwhals._pandas_like.expr import PandasLikeExpr
     from narwhals._pandas_like.group_by import PandasLikeGroupBy
     from narwhals._pandas_like.namespace import PandasLikeNamespace
+    from narwhals._polars.dataframe import PolarsDataFrame
     from narwhals.dtypes import DType
     from narwhals.typing import SizeUnit
     from narwhals.typing import _1DArray
@@ -519,7 +520,7 @@ def collect(
         self: Self,
         backend: Implementation | None,
         **kwargs: Any,
-    ) -> CompliantDataFrame:
+    ) -> CompliantDataFrame[Any] | PolarsDataFrame:
         if backend is None:
             return PandasLikeDataFrame(
                 self._native_frame,
diff --git a/narwhals/_polars/dataframe.py b/narwhals/_polars/dataframe.py
index 070d658343..7ec7423106 100644
--- a/narwhals/_polars/dataframe.py
+++ b/narwhals/_polars/dataframe.py
@@ -465,7 +465,7 @@ def collect(
         self: Self,
         backend: Implementation | None,
         **kwargs: Any,
-    ) -> CompliantDataFrame:
+    ) -> PolarsDataFrame | CompliantDataFrame[Any]:
         try:
             result = self._native_frame.collect(**kwargs)
         except Exception as e:  # noqa: BLE001

From 2ab3305064bb093b5d98422c88b9a583c86f11a7 Mon Sep 17 00:00:00 2001
From: dangotbanned <125183946+dangotbanned@users.noreply.github.com>
Date: Fri, 21 Feb 2025 16:23:35 +0000
Subject: [PATCH 05/55] feat(typing): provide `PandasLikeSeries` to
 `PandasLikeDataFrame`

Experimenting with `pandas` only to start
---
 narwhals/_pandas_like/dataframe.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/narwhals/_pandas_like/dataframe.py b/narwhals/_pandas_like/dataframe.py
index d333ee1459..e235fe27ee 100644
--- a/narwhals/_pandas_like/dataframe.py
+++ b/narwhals/_pandas_like/dataframe.py
@@ -84,7 +84,7 @@
 )
 
 
-class PandasLikeDataFrame(CompliantDataFrame, CompliantLazyFrame):
+class PandasLikeDataFrame(CompliantDataFrame["PandasLikeSeries"], CompliantLazyFrame):
     # --- not in the spec ---
     def __init__(
         self: Self,

From 47816b988bf623b65471593e7fdd8295d83a7477 Mon Sep 17 00:00:00 2001
From: dangotbanned <125183946+dangotbanned@users.noreply.github.com>
Date: Fri, 21 Feb 2025 16:36:04 +0000
Subject: [PATCH 06/55] feat: Adds generic `CompliantSelector` & `Namespace`

---
 narwhals/_selectors.py | 257 +++++++++++++++++++++++++++++++++++++++++
 1 file changed, 257 insertions(+)
 create mode 100644 narwhals/_selectors.py

diff --git a/narwhals/_selectors.py b/narwhals/_selectors.py
new file mode 100644
index 0000000000..89a77f9148
--- /dev/null
+++ b/narwhals/_selectors.py
@@ -0,0 +1,257 @@
+"""Almost entirely complete, generic `selectors` implementation.
+
+- Focusing on eager-only for now
+"""
+
+from __future__ import annotations
+
+import re
+from functools import partial
+from typing import TYPE_CHECKING
+from typing import Callable
+from typing import Collection
+from typing import Generic
+from typing import Iterable
+from typing import Iterator
+from typing import Protocol
+from typing import Sequence
+from typing import TypeVar
+from typing import overload
+
+from narwhals.typing import CompliantExpr
+from narwhals.utils import _parse_time_unit_and_time_zone
+from narwhals.utils import dtype_matches_time_unit_and_time_zone
+from narwhals.utils import get_column_names
+from narwhals.utils import import_dtypes_module
+
+if TYPE_CHECKING:
+    from datetime import timezone
+
+    from typing_extensions import Self
+    from typing_extensions import TypeAlias
+    from typing_extensions import TypeIs
+
+    from narwhals.dtypes import DType
+    from narwhals.typing import CompliantDataFrame
+    from narwhals.typing import CompliantSeries
+    from narwhals.typing import TimeUnit
+    from narwhals.utils import Implementation
+    from narwhals.utils import Version
+    from narwhals.utils import _FullContext
+
+    # NOTE: Plugging the gap of this not being defined in `CompliantSeries`
+    class CompliantSeriesWithDType(CompliantSeries, Protocol):
+        @property
+        def dtype(self) -> DType: ...
+
+
+SeriesT = TypeVar("SeriesT", bound="CompliantSeriesWithDType")
+DataFrameT = TypeVar("DataFrameT", bound="CompliantDataFrame")
+SelectorOrExpr: TypeAlias = (
+    "CompliantSelector[DataFrameT, SeriesT] | CompliantExpr[SeriesT]"
+)
+EvalSeries: TypeAlias = Callable[[DataFrameT], Sequence[SeriesT]]
+EvalNames: TypeAlias = Callable[[DataFrameT], Sequence[str]]
+
+
+# NOTE: Pretty much finished generic for eager backends
+class CompliantSelectorNamespace(Generic[DataFrameT, SeriesT], Protocol):
+    _implementation: Implementation
+    _backend_version: tuple[int, ...]
+    _version: Version
+
+    # TODO @dangotbanned: push for adding to public API for `DataFrame`
+    # Only need internally, but it plugs so many holes that it must be useful beyond that
+    # https://docs.pola.rs/api/python/stable/reference/dataframe/api/polars.DataFrame.iter_columns.html
+    def _iter_columns(self, df: DataFrameT, /) -> Iterator[SeriesT]: ...
+
+    def _selector(
+        self,
+        context: _FullContext,
+        call: EvalSeries[DataFrameT, SeriesT],
+        evaluate_output_names: EvalNames[DataFrameT],
+        /,
+    ) -> CompliantSelector[DataFrameT, SeriesT]: ...
+
+    def _is_dtype(
+        self: CompliantSelectorNamespace[DataFrameT, SeriesT], dtype: type[DType], /
+    ) -> CompliantSelector[DataFrameT, SeriesT]:
+        def series(df: DataFrameT) -> Sequence[SeriesT]:
+            return [ser for ser in self._iter_columns(df) if isinstance(ser.dtype, dtype)]
+
+        def names(df: DataFrameT) -> Sequence[str]:
+            return [
+                ser.name for ser in self._iter_columns(df) if isinstance(ser.dtype, dtype)
+            ]
+
+        return self._selector(self, series, names)
+
+    def by_dtype(
+        self: Self, dtypes: Collection[DType | type[DType]]
+    ) -> CompliantSelector[DataFrameT, SeriesT]:
+        def series(df: DataFrameT) -> Sequence[SeriesT]:
+            return [ser for ser in self._iter_columns(df) if ser.dtype in dtypes]
+
+        def names(df: DataFrameT) -> Sequence[str]:
+            return [ser.name for ser in self._iter_columns(df) if ser.dtype in dtypes]
+
+        return self._selector(self, series, names)
+
+    def matches(self: Self, pattern: str) -> CompliantSelector[DataFrameT, SeriesT]:
+        p = re.compile(pattern)
+
+        def series(df: DataFrameT) -> Sequence[SeriesT]:
+            return [df.get_column(col) for col in df.columns if p.search(col)]
+
+        def names(df: DataFrameT) -> Sequence[str]:
+            return [col for col in df.columns if p.search(col)]
+
+        return self._selector(self, series, names)
+
+    def numeric(self: Self) -> CompliantSelector[DataFrameT, SeriesT]:
+        def series(df: DataFrameT) -> Sequence[SeriesT]:
+            return [ser for ser in self._iter_columns(df) if ser.dtype.is_numeric()]
+
+        def names(df: DataFrameT) -> Sequence[str]:
+            return [ser.name for ser in self._iter_columns(df) if ser.dtype.is_numeric()]
+
+        return self._selector(self, series, names)
+
+    def categorical(self: Self) -> CompliantSelector[DataFrameT, SeriesT]:
+        return self._is_dtype(import_dtypes_module(self._version).Categorical)
+
+    def string(self: Self) -> CompliantSelector[DataFrameT, SeriesT]:
+        return self._is_dtype(import_dtypes_module(self._version).String)
+
+    def boolean(self: Self) -> CompliantSelector[DataFrameT, SeriesT]:
+        return self._is_dtype(import_dtypes_module(self._version).Boolean)
+
+    def all(self: Self) -> CompliantSelector[DataFrameT, SeriesT]:
+        def series(df: DataFrameT) -> Sequence[SeriesT]:
+            return list(self._iter_columns(df))
+
+        return self._selector(self, series, get_column_names)
+
+    def datetime(
+        self: Self,
+        time_unit: TimeUnit | Iterable[TimeUnit] | None,
+        time_zone: str | timezone | Iterable[str | timezone | None] | None,
+    ) -> CompliantSelector[DataFrameT, SeriesT]:
+        time_units, time_zones = _parse_time_unit_and_time_zone(time_unit, time_zone)
+        matches = partial(
+            dtype_matches_time_unit_and_time_zone,
+            dtypes=import_dtypes_module(version=self._version),
+            time_units=time_units,
+            time_zones=time_zones,
+        )
+
+        def series(df: DataFrameT) -> Sequence[SeriesT]:
+            return [ser for ser in self._iter_columns(df) if matches(ser.dtype)]
+
+        def names(df: DataFrameT) -> Sequence[str]:
+            return [ser.name for ser in self._iter_columns(df) if matches(ser.dtype)]
+
+        return self._selector(self, series, names)
+
+    def __init__(self: Self, context: _FullContext, /) -> None:
+        self._implementation = context._implementation
+        self._backend_version = context._backend_version
+        self._version = context._version
+
+
+# NOTE: CompliantExpr already provides `_implementation`, `_backend_version`
+# https://github.com/narwhals-dev/narwhals/pull/2060
+class CompliantSelector(CompliantExpr[SeriesT], Generic[DataFrameT, SeriesT], Protocol):
+    _version: Version
+
+    @property
+    def selectors(self) -> CompliantSelectorNamespace[DataFrameT, SeriesT]: ...
+    def __repr__(self: Self) -> str: ...
+    def _to_expr(self: Self) -> CompliantExpr[SeriesT]: ...
+
+    def _is_selector(
+        self: Self,
+        other: Self | CompliantExpr[SeriesT],
+    ) -> TypeIs[CompliantSelector[DataFrameT, SeriesT]]:
+        return isinstance(other, type(self))
+
+    @overload
+    def __sub__(self: Self, other: Self) -> Self: ...
+    @overload
+    def __sub__(self: Self, other: CompliantExpr[SeriesT]) -> CompliantExpr[SeriesT]: ...
+    def __sub__(
+        self: Self, other: SelectorOrExpr[DataFrameT, SeriesT]
+    ) -> SelectorOrExpr[DataFrameT, SeriesT]:
+        if self._is_selector(other):
+
+            def series(df: DataFrameT) -> Sequence[SeriesT]:
+                lhs_names, rhs_names = _eval_lhs_rhs(df, self, other)
+                return [
+                    x for x, name in zip(self(df), lhs_names) if name not in rhs_names
+                ]
+
+            def names(df: DataFrameT) -> Sequence[str]:
+                lhs_names, rhs_names = _eval_lhs_rhs(df, self, other)
+                return [x for x in lhs_names if x not in rhs_names]
+
+            return self.selectors._selector(self, series, names)
+        else:
+            return self._to_expr() - other
+
+    @overload
+    def __or__(self: Self, other: Self) -> Self: ...
+    @overload
+    def __or__(self: Self, other: CompliantExpr[SeriesT]) -> CompliantExpr[SeriesT]: ...
+    def __or__(
+        self: Self, other: SelectorOrExpr[DataFrameT, SeriesT]
+    ) -> SelectorOrExpr[DataFrameT, SeriesT]:
+        if self._is_selector(other):
+
+            def names(df: DataFrameT) -> Sequence[SeriesT]:
+                lhs_names, rhs_names = _eval_lhs_rhs(df, self, other)
+                return [
+                    *(x for x, name in zip(self(df), lhs_names) if name not in rhs_names),
+                    *other(df),
+                ]
+
+            def series(df: DataFrameT) -> Sequence[str]:
+                lhs_names, rhs_names = _eval_lhs_rhs(df, self, other)
+                return [*(x for x in lhs_names if x not in rhs_names), *rhs_names]
+
+            return self.selectors._selector(self, names, series)
+        else:
+            return self._to_expr() | other
+
+    @overload
+    def __and__(self: Self, other: Self) -> Self: ...
+    @overload
+    def __and__(self: Self, other: CompliantExpr[SeriesT]) -> CompliantExpr[SeriesT]: ...
+    def __and__(
+        self: Self, other: SelectorOrExpr[DataFrameT, SeriesT]
+    ) -> SelectorOrExpr[DataFrameT, SeriesT]:
+        if self._is_selector(other):
+
+            def series(df: DataFrameT) -> Sequence[SeriesT]:
+                lhs_names, rhs_names = _eval_lhs_rhs(df, self, other)
+                return [x for x, name in zip(self(df), lhs_names) if name in rhs_names]
+
+            def names(df: DataFrameT) -> Sequence[str]:
+                lhs_names, rhs_names = _eval_lhs_rhs(df, self, other)
+                return [x for x in lhs_names if x in rhs_names]
+
+            return self.selectors._selector(self, series, names)
+        else:
+            return self._to_expr() & other
+
+    def __invert__(
+        self: Self,
+    ) -> CompliantSelector[DataFrameT, SeriesT]:
+        return self.selectors.all() - self
+
+
+# NOTE: Should probably be a `DataFrame` method
+# Using `Expr` because this doesn't require `Selector` attrs/methods
+def _eval_lhs_rhs(
+    df: CompliantDataFrame, lhs: CompliantExpr, rhs: CompliantExpr
+) -> tuple[Sequence[str], Sequence[str]]:
+    return lhs._evaluate_output_names(df), rhs._evaluate_output_names(df)

From c2e56d1405db1ed9ae4e952431b73e16807fe86e Mon Sep 17 00:00:00 2001
From: dangotbanned <125183946+dangotbanned@users.noreply.github.com>
Date: Fri, 21 Feb 2025 16:44:58 +0000
Subject: [PATCH 07/55] feat: reimplement `_pandas_like.selectors`

All tests are passing locally, hoping to do `_arrow` next and see if this holds up
---
 narwhals/_pandas_like/selectors.py | 236 +++++++----------------------
 1 file changed, 52 insertions(+), 184 deletions(-)

diff --git a/narwhals/_pandas_like/selectors.py b/narwhals/_pandas_like/selectors.py
index 0653ed2e40..6404a6f4b1 100644
--- a/narwhals/_pandas_like/selectors.py
+++ b/narwhals/_pandas_like/selectors.py
@@ -1,129 +1,77 @@
 from __future__ import annotations
 
-import re
+from functools import partial
 from typing import TYPE_CHECKING
 from typing import Any
-from typing import Callable
-from typing import Iterable
-from typing import Sequence
+from typing import Iterator
 
+from narwhals._pandas_like.dataframe import PandasLikeDataFrame
 from narwhals._pandas_like.expr import PandasLikeExpr
-from narwhals.utils import _parse_time_unit_and_time_zone
-from narwhals.utils import dtype_matches_time_unit_and_time_zone
-from narwhals.utils import get_column_names
-from narwhals.utils import import_dtypes_module
+from narwhals._pandas_like.series import PandasLikeSeries
+from narwhals._selectors import CompliantSelector
+from narwhals._selectors import CompliantSelectorNamespace
 
 if TYPE_CHECKING:
-    from datetime import timezone
-
     from typing_extensions import Self
 
     from narwhals._pandas_like.dataframe import PandasLikeDataFrame
     from narwhals._pandas_like.series import PandasLikeSeries
-    from narwhals.dtypes import DType
-    from narwhals.typing import TimeUnit
+    from narwhals._selectors import EvalNames
+    from narwhals._selectors import EvalSeries
+    from narwhals.utils import Version
     from narwhals.utils import _FullContext
 
 
-class PandasSelectorNamespace:
-    def __init__(self: Self, context: _FullContext, /) -> None:
-        self._implementation = context._implementation
-        self._backend_version = context._backend_version
-        self._version = context._version
-
-    def by_dtype(self: Self, dtypes: Iterable[DType | type[DType]]) -> PandasSelector:
-        def func(df: PandasLikeDataFrame) -> list[PandasLikeSeries]:
-            return [df[col] for col in df.columns if df.schema[col] in dtypes]
-
-        def evaluate_output_names(df: PandasLikeDataFrame) -> Sequence[str]:
-            return [col for col in df.columns if df.schema[col] in dtypes]
-
-        return selector(self, func, evaluate_output_names)
+class PandasSelectorNamespace(
+    CompliantSelectorNamespace["PandasLikeDataFrame", "PandasLikeSeries"]
+):
+    def _iter_columns(self, df: PandasLikeDataFrame) -> Iterator[PandasLikeSeries]:
+        from narwhals._pandas_like.series import PandasLikeSeries
 
-    def matches(self: Self, pattern: str) -> PandasSelector:
-        def func(df: PandasLikeDataFrame) -> list[PandasLikeSeries]:
-            return [df[col] for col in df.columns if re.search(pattern, col)]
-
-        def evaluate_output_names(df: PandasLikeDataFrame) -> Sequence[str]:
-            return [col for col in df.columns if re.search(pattern, col)]
-
-        return selector(self, func, evaluate_output_names)
-
-    def numeric(self: Self) -> PandasSelector:
-        dtypes = import_dtypes_module(self._version)
-        return self.by_dtype(
-            {
-                dtypes.Int128,
-                dtypes.Int64,
-                dtypes.Int32,
-                dtypes.Int16,
-                dtypes.Int8,
-                dtypes.UInt128,
-                dtypes.UInt64,
-                dtypes.UInt32,
-                dtypes.UInt16,
-                dtypes.UInt8,
-                dtypes.Float64,
-                dtypes.Float32,
-            }
+        series = partial(
+            PandasLikeSeries,
+            implementation=df._implementation,
+            backend_version=df._backend_version,
+            version=df._version,
         )
-
-    def categorical(self: Self) -> PandasSelector:
-        dtypes = import_dtypes_module(self._version)
-        return self.by_dtype({dtypes.Categorical})
-
-    def string(self: Self) -> PandasSelector:
-        dtypes = import_dtypes_module(self._version)
-        return self.by_dtype({dtypes.String})
-
-    def boolean(self: Self) -> PandasSelector:
-        dtypes = import_dtypes_module(self._version)
-        return self.by_dtype({dtypes.Boolean})
-
-    def all(self: Self) -> PandasSelector:
-        def func(df: PandasLikeDataFrame) -> list[PandasLikeSeries]:
-            return [df[col] for col in df.columns]
-
-        return selector(self, func, get_column_names)
-
-    def datetime(
-        self: Self,
-        time_unit: TimeUnit | Iterable[TimeUnit] | None,
-        time_zone: str | timezone | Iterable[str | timezone | None] | None,
-    ) -> PandasSelector:
-        dtypes = import_dtypes_module(version=self._version)
-        time_units, time_zones = _parse_time_unit_and_time_zone(
-            time_unit=time_unit, time_zone=time_zone
+        # NOTE: (PERF102) is a false-positive
+        # .items() -> (str, pd.Series)
+        # .values() -> np.ndarray
+        for _col, ser in df._native_frame.items():  # noqa: PERF102
+            yield series(ser)
+
+    def _selector(
+        self,
+        context: _FullContext,
+        call: EvalSeries[PandasLikeDataFrame, PandasLikeSeries],
+        evaluate_output_names: EvalNames[PandasLikeDataFrame],
+        /,
+    ) -> CompliantSelector[PandasLikeDataFrame, PandasLikeSeries]:
+        return PandasSelector(
+            call,
+            depth=0,
+            function_name="selector",
+            evaluate_output_names=evaluate_output_names,
+            alias_output_names=None,
+            implementation=context._implementation,
+            backend_version=context._backend_version,
+            version=context._version,
+            kwargs={},
         )
 
-        def func(df: PandasLikeDataFrame) -> list[PandasLikeSeries]:
-            return [
-                df[col]
-                for col in df.columns
-                if dtype_matches_time_unit_and_time_zone(
-                    dtype=df.schema[col],
-                    dtypes=dtypes,
-                    time_units=time_units,
-                    time_zones=time_zones,
-                )
-            ]
-
-        def evaluate_output_names(df: PandasLikeDataFrame) -> Sequence[str]:
-            return [
-                col
-                for col in df.columns
-                if dtype_matches_time_unit_and_time_zone(
-                    dtype=df.schema[col],
-                    dtypes=dtypes,
-                    time_units=time_units,
-                    time_zones=time_zones,
-                )
-            ]
 
-        return selector(self, func, evaluate_output_names)
+class PandasSelector(  # type: ignore[misc]
+    CompliantSelector["PandasLikeDataFrame", "PandasLikeSeries"], PandasLikeExpr
+):
+    # TODO @dangotbanned: Remove after merging (https://github.com/narwhals-dev/narwhals/pull/2060)
+    def __init__(self: Self, *args: Any, version: Version, **kwds: Any) -> None:
+        super().__init__(*args, version=version, **kwds)
+        self._version = version
 
+    @property
+    def selectors(self) -> PandasSelectorNamespace:
+        return PandasSelectorNamespace(self)
 
-class PandasSelector(PandasLikeExpr):
     def __repr__(self) -> str:  # pragma: no cover
         return (
             f"PandasSelector(depth={self._depth}, function_name={self._function_name}, "
@@ -141,83 +89,3 @@ def _to_expr(self: Self) -> PandasLikeExpr:
             version=self._version,
             kwargs=self._kwargs,
         )
-
-    def __sub__(self: Self, other: PandasSelector | Any) -> PandasSelector | Any:
-        if isinstance(other, PandasSelector):
-
-            def call(df: PandasLikeDataFrame) -> list[PandasLikeSeries]:
-                lhs_names = self._evaluate_output_names(df)
-                rhs_names = other._evaluate_output_names(df)
-                lhs = self._call(df)
-                return [x for x, name in zip(lhs, lhs_names) if name not in rhs_names]
-
-            def evaluate_output_names(df: PandasLikeDataFrame) -> list[str]:
-                lhs_names = self._evaluate_output_names(df)
-                rhs_names = other._evaluate_output_names(df)
-                return [x for x in lhs_names if x not in rhs_names]
-
-            return selector(self, call, evaluate_output_names)
-        else:
-            return self._to_expr() - other
-
-    def __or__(self: Self, other: PandasSelector | Any) -> PandasSelector | Any:
-        if isinstance(other, PandasSelector):
-
-            def call(df: PandasLikeDataFrame) -> list[PandasLikeSeries]:
-                lhs_names = self._evaluate_output_names(df)
-                rhs_names = other._evaluate_output_names(df)
-                lhs = self._call(df)
-                rhs = other._call(df)
-                return [
-                    *(x for x, name in zip(lhs, lhs_names) if name not in rhs_names),
-                    *rhs,
-                ]
-
-            def evaluate_output_names(df: PandasLikeDataFrame) -> list[str]:
-                lhs_names = self._evaluate_output_names(df)
-                rhs_names = other._evaluate_output_names(df)
-                return [*(x for x in lhs_names if x not in rhs_names), *rhs_names]
-
-            return selector(self, call, evaluate_output_names)
-        else:
-            return self._to_expr() | other
-
-    def __and__(self: Self, other: PandasSelector | Any) -> PandasSelector | Any:
-        if isinstance(other, PandasSelector):
-
-            def call(df: PandasLikeDataFrame) -> list[PandasLikeSeries]:
-                lhs_names = self._evaluate_output_names(df)
-                rhs_names = other._evaluate_output_names(df)
-                lhs = self._call(df)
-                return [x for x, name in zip(lhs, lhs_names) if name in rhs_names]
-
-            def evaluate_output_names(df: PandasLikeDataFrame) -> list[str]:
-                lhs_names = self._evaluate_output_names(df)
-                rhs_names = other._evaluate_output_names(df)
-                return [x for x in lhs_names if x in rhs_names]
-
-            return selector(self, call, evaluate_output_names)
-        else:
-            return self._to_expr() & other
-
-    def __invert__(self: Self) -> PandasSelector:
-        return PandasSelectorNamespace(self).all() - self
-
-
-def selector(
-    context: _FullContext,
-    call: Callable[[PandasLikeDataFrame], Sequence[PandasLikeSeries]],
-    evaluate_output_names: Callable[[PandasLikeDataFrame], Sequence[str]],
-    /,
-) -> PandasSelector:
-    return PandasSelector(
-        call,
-        depth=0,
-        function_name="selector",
-        evaluate_output_names=evaluate_output_names,
-        alias_output_names=None,
-        implementation=context._implementation,
-        backend_version=context._backend_version,
-        version=context._version,
-        kwargs={},
-    )

From 706732a5e129ec9b9884c5795bc31b51f7475079 Mon Sep 17 00:00:00 2001
From: dangotbanned <125183946+dangotbanned@users.noreply.github.com>
Date: Fri, 21 Feb 2025 17:04:18 +0000
Subject: [PATCH 08/55] feat: reimplement `_arrow.selectors`

Well that went smoothly
---
 narwhals/_arrow/dataframe.py |   2 +-
 narwhals/_arrow/selectors.py | 228 +++++++----------------------------
 2 files changed, 42 insertions(+), 188 deletions(-)

diff --git a/narwhals/_arrow/dataframe.py b/narwhals/_arrow/dataframe.py
index 30c3f511ad..1d7e3d1e53 100644
--- a/narwhals/_arrow/dataframe.py
+++ b/narwhals/_arrow/dataframe.py
@@ -71,7 +71,7 @@
 from narwhals.typing import CompliantLazyFrame
 
 
-class ArrowDataFrame(CompliantDataFrame, CompliantLazyFrame):
+class ArrowDataFrame(CompliantDataFrame["ArrowSeries"], CompliantLazyFrame):
     # --- not in the spec ---
     def __init__(
         self: Self,
diff --git a/narwhals/_arrow/selectors.py b/narwhals/_arrow/selectors.py
index ef2a760364..663945a0f3 100644
--- a/narwhals/_arrow/selectors.py
+++ b/narwhals/_arrow/selectors.py
@@ -1,128 +1,61 @@
 from __future__ import annotations
 
-import re
 from typing import TYPE_CHECKING
 from typing import Any
-from typing import Callable
-from typing import Iterable
-from typing import Sequence
+from typing import Iterator
 
 from narwhals._arrow.expr import ArrowExpr
-from narwhals.utils import _parse_time_unit_and_time_zone
-from narwhals.utils import dtype_matches_time_unit_and_time_zone
-from narwhals.utils import get_column_names
-from narwhals.utils import import_dtypes_module
+from narwhals._selectors import CompliantSelector
+from narwhals._selectors import CompliantSelectorNamespace
 
 if TYPE_CHECKING:
-    from datetime import timezone
-
     from typing_extensions import Self
 
     from narwhals._arrow.dataframe import ArrowDataFrame
     from narwhals._arrow.series import ArrowSeries
-    from narwhals.dtypes import DType
-    from narwhals.typing import TimeUnit
-    from narwhals.utils import _LimitedContext
-
-
-class ArrowSelectorNamespace:
-    def __init__(self: Self, context: _LimitedContext, /) -> None:
-        self._backend_version = context._backend_version
-        self._version = context._version
-
-    def by_dtype(self: Self, dtypes: Iterable[DType | type[DType]]) -> ArrowSelector:
-        def func(df: ArrowDataFrame) -> list[ArrowSeries]:
-            return [df[col] for col in df.columns if df.schema[col] in dtypes]
-
-        def evaluate_output_names(df: ArrowDataFrame) -> Sequence[str]:
-            return [col for col in df.columns if df.schema[col] in dtypes]
-
-        return selector(self, func, evaluate_output_names)
-
-    def matches(self: Self, pattern: str) -> ArrowSelector:
-        def func(df: ArrowDataFrame) -> list[ArrowSeries]:
-            return [df[col] for col in df.columns if re.search(pattern, col)]
-
-        def evaluate_output_names(df: ArrowDataFrame) -> Sequence[str]:
-            return [col for col in df.columns if re.search(pattern, col)]
-
-        return selector(self, func, evaluate_output_names)
-
-    def numeric(self: Self) -> ArrowSelector:
-        dtypes = import_dtypes_module(self._version)
-        return self.by_dtype(
-            [
-                dtypes.Int128,
-                dtypes.Int64,
-                dtypes.Int32,
-                dtypes.Int16,
-                dtypes.Int8,
-                dtypes.UInt128,
-                dtypes.UInt64,
-                dtypes.UInt32,
-                dtypes.UInt16,
-                dtypes.UInt8,
-                dtypes.Float64,
-                dtypes.Float32,
-            ],
-        )
-
-    def categorical(self: Self) -> ArrowSelector:
-        dtypes = import_dtypes_module(self._version)
-        return self.by_dtype([dtypes.Categorical])
-
-    def string(self: Self) -> ArrowSelector:
-        dtypes = import_dtypes_module(self._version)
-        return self.by_dtype([dtypes.String])
-
-    def boolean(self: Self) -> ArrowSelector:
-        dtypes = import_dtypes_module(self._version)
-        return self.by_dtype([dtypes.Boolean])
-
-    def all(self: Self) -> ArrowSelector:
-        def func(df: ArrowDataFrame) -> list[ArrowSeries]:
-            return [df[col] for col in df.columns]
-
-        return selector(self, func, get_column_names)
-
-    def datetime(
-        self: Self,
-        time_unit: TimeUnit | Iterable[TimeUnit] | None,
-        time_zone: str | timezone | Iterable[str | timezone | None] | None,
-    ) -> ArrowSelector:
-        dtypes = import_dtypes_module(version=self._version)
-        time_units, time_zones = _parse_time_unit_and_time_zone(
-            time_unit=time_unit, time_zone=time_zone
+    from narwhals._selectors import EvalNames
+    from narwhals._selectors import EvalSeries
+    from narwhals.utils import Version
+    from narwhals.utils import _FullContext
+
+
+class ArrowSelectorNamespace(CompliantSelectorNamespace["ArrowDataFrame", "ArrowSeries"]):
+    def _iter_columns(self, df: ArrowDataFrame) -> Iterator[ArrowSeries]:
+        from narwhals._arrow.series import ArrowSeries
+
+        for col, ser in zip(df.columns, df._native_frame.itercolumns()):
+            yield ArrowSeries(
+                ser, name=col, backend_version=df._backend_version, version=df._version
+            )
+
+    def _selector(
+        self,
+        context: _FullContext,
+        call: EvalSeries[ArrowDataFrame, ArrowSeries],
+        evaluate_output_names: EvalNames[ArrowDataFrame],
+        /,
+    ) -> CompliantSelector[ArrowDataFrame, ArrowSeries]:
+        return ArrowSelector(
+            call,
+            depth=0,
+            function_name="selector",
+            evaluate_output_names=evaluate_output_names,
+            alias_output_names=None,
+            backend_version=context._backend_version,
+            version=context._version,
         )
 
-        def func(df: ArrowDataFrame) -> list[ArrowSeries]:
-            return [
-                df[col]
-                for col in df.columns
-                if dtype_matches_time_unit_and_time_zone(
-                    dtype=df.schema[col],
-                    dtypes=dtypes,
-                    time_units=time_units,
-                    time_zones=time_zones,
-                )
-            ]
 
-        def evaluate_output_names(df: ArrowDataFrame) -> Sequence[str]:
-            return [
-                col
-                for col in df.columns
-                if dtype_matches_time_unit_and_time_zone(
-                    dtype=df.schema[col],
-                    dtypes=dtypes,
-                    time_units=time_units,
-                    time_zones=time_zones,
-                )
-            ]
+class ArrowSelector(CompliantSelector["ArrowDataFrame", "ArrowSeries"], ArrowExpr):  # type: ignore[misc]
+    # TODO @dangotbanned: Remove after merging (https://github.com/narwhals-dev/narwhals/pull/2060)
+    def __init__(self: Self, *args: Any, version: Version, **kwds: Any) -> None:
+        super().__init__(*args, version=version, **kwds)
+        self._version = version
 
-        return selector(self, func, evaluate_output_names)
+    @property
+    def selectors(self) -> ArrowSelectorNamespace:
+        return ArrowSelectorNamespace(self)
 
-
-class ArrowSelector(ArrowExpr):
     def __repr__(self: Self) -> str:  # pragma: no cover
         return f"ArrowSelector(depth={self._depth}, function_name={self._function_name})"
 
@@ -136,82 +69,3 @@ def _to_expr(self: Self) -> ArrowExpr:
             backend_version=self._backend_version,
             version=self._version,
         )
-
-    def __sub__(self: Self, other: Self | Any) -> ArrowSelector | Any:
-        if isinstance(other, ArrowSelector):
-
-            def call(df: ArrowDataFrame) -> list[ArrowSeries]:
-                lhs_names = self._evaluate_output_names(df)
-                rhs_names = other._evaluate_output_names(df)
-                lhs = self._call(df)
-                return [x for x, name in zip(lhs, lhs_names) if name not in rhs_names]
-
-            def evaluate_output_names(df: ArrowDataFrame) -> list[str]:
-                lhs_names = self._evaluate_output_names(df)
-                rhs_names = other._evaluate_output_names(df)
-                return [x for x in lhs_names if x not in rhs_names]
-
-            return selector(self, call, evaluate_output_names)
-        else:
-            return self._to_expr() - other
-
-    def __or__(self: Self, other: Self | Any) -> ArrowSelector | Any:
-        if isinstance(other, ArrowSelector):
-
-            def call(df: ArrowDataFrame) -> list[ArrowSeries]:
-                lhs_names = self._evaluate_output_names(df)
-                rhs_names = other._evaluate_output_names(df)
-                lhs = self._call(df)
-                rhs = other._call(df)
-                return [
-                    *(x for x, name in zip(lhs, lhs_names) if name not in rhs_names),
-                    *rhs,
-                ]
-
-            def evaluate_output_names(df: ArrowDataFrame) -> list[str]:
-                lhs_names = self._evaluate_output_names(df)
-                rhs_names = other._evaluate_output_names(df)
-                return [*(x for x in lhs_names if x not in rhs_names), *rhs_names]
-
-            return selector(self, call, evaluate_output_names)
-        else:
-            return self._to_expr() | other
-
-    def __and__(self: Self, other: Self | Any) -> ArrowSelector | Any:
-        if isinstance(other, ArrowSelector):
-
-            def call(df: ArrowDataFrame) -> list[ArrowSeries]:
-                lhs_names = self._evaluate_output_names(df)
-                rhs_names = other._evaluate_output_names(df)
-                lhs = self._call(df)
-                return [x for x, name in zip(lhs, lhs_names) if name in rhs_names]
-
-            def evaluate_output_names(df: ArrowDataFrame) -> list[str]:
-                lhs_names = self._evaluate_output_names(df)
-                rhs_names = other._evaluate_output_names(df)
-                return [x for x in lhs_names if x in rhs_names]
-
-            return selector(self, call, evaluate_output_names)
-
-        else:
-            return self._to_expr() & other
-
-    def __invert__(self: Self) -> ArrowSelector:
-        return ArrowSelectorNamespace(self).all() - self
-
-
-def selector(
-    context: _LimitedContext,
-    call: Callable[[ArrowDataFrame], Sequence[ArrowSeries]],
-    evaluate_output_names: Callable[[ArrowDataFrame], Sequence[str]],
-    /,
-) -> ArrowSelector:
-    return ArrowSelector(
-        call,
-        depth=0,
-        function_name="selector",
-        evaluate_output_names=evaluate_output_names,
-        alias_output_names=None,
-        backend_version=context._backend_version,
-        version=context._version,
-    )

From 8bf4a491eb2ea8355ae6a0f8ade6067412392ef6 Mon Sep 17 00:00:00 2001
From: dangotbanned <125183946+dangotbanned@users.noreply.github.com>
Date: Fri, 21 Feb 2025 17:07:51 +0000
Subject: [PATCH 09/55] refactor: utilize updated `CompliantExpr`

Possible via #2060
---
 narwhals/_arrow/selectors.py       | 7 -------
 narwhals/_pandas_like/selectors.py | 7 -------
 narwhals/_selectors.py             | 4 ----
 3 files changed, 18 deletions(-)

diff --git a/narwhals/_arrow/selectors.py b/narwhals/_arrow/selectors.py
index 663945a0f3..896db5bf21 100644
--- a/narwhals/_arrow/selectors.py
+++ b/narwhals/_arrow/selectors.py
@@ -1,7 +1,6 @@
 from __future__ import annotations
 
 from typing import TYPE_CHECKING
-from typing import Any
 from typing import Iterator
 
 from narwhals._arrow.expr import ArrowExpr
@@ -15,7 +14,6 @@
     from narwhals._arrow.series import ArrowSeries
     from narwhals._selectors import EvalNames
     from narwhals._selectors import EvalSeries
-    from narwhals.utils import Version
     from narwhals.utils import _FullContext
 
 
@@ -47,11 +45,6 @@ def _selector(
 
 
 class ArrowSelector(CompliantSelector["ArrowDataFrame", "ArrowSeries"], ArrowExpr):  # type: ignore[misc]
-    # TODO @dangotbanned: Remove after merging (https://github.com/narwhals-dev/narwhals/pull/2060)
-    def __init__(self: Self, *args: Any, version: Version, **kwds: Any) -> None:
-        super().__init__(*args, version=version, **kwds)
-        self._version = version
-
     @property
     def selectors(self) -> ArrowSelectorNamespace:
         return ArrowSelectorNamespace(self)
diff --git a/narwhals/_pandas_like/selectors.py b/narwhals/_pandas_like/selectors.py
index 6404a6f4b1..2a71f9a1f3 100644
--- a/narwhals/_pandas_like/selectors.py
+++ b/narwhals/_pandas_like/selectors.py
@@ -2,7 +2,6 @@
 
 from functools import partial
 from typing import TYPE_CHECKING
-from typing import Any
 from typing import Iterator
 
 from narwhals._pandas_like.dataframe import PandasLikeDataFrame
@@ -18,7 +17,6 @@
     from narwhals._pandas_like.series import PandasLikeSeries
     from narwhals._selectors import EvalNames
     from narwhals._selectors import EvalSeries
-    from narwhals.utils import Version
     from narwhals.utils import _FullContext
 
 
@@ -63,11 +61,6 @@ def _selector(
 class PandasSelector(  # type: ignore[misc]
     CompliantSelector["PandasLikeDataFrame", "PandasLikeSeries"], PandasLikeExpr
 ):
-    # TODO @dangotbanned: Remove after merging (https://github.com/narwhals-dev/narwhals/pull/2060)
-    def __init__(self: Self, *args: Any, version: Version, **kwds: Any) -> None:
-        super().__init__(*args, version=version, **kwds)
-        self._version = version
-
     @property
     def selectors(self) -> PandasSelectorNamespace:
         return PandasSelectorNamespace(self)
diff --git a/narwhals/_selectors.py b/narwhals/_selectors.py
index 89a77f9148..707715fac2 100644
--- a/narwhals/_selectors.py
+++ b/narwhals/_selectors.py
@@ -159,11 +159,7 @@ def __init__(self: Self, context: _FullContext, /) -> None:
         self._version = context._version
 
 
-# NOTE: CompliantExpr already provides `_implementation`, `_backend_version`
-# https://github.com/narwhals-dev/narwhals/pull/2060
 class CompliantSelector(CompliantExpr[SeriesT], Generic[DataFrameT, SeriesT], Protocol):
-    _version: Version
-
     @property
     def selectors(self) -> CompliantSelectorNamespace[DataFrameT, SeriesT]: ...
     def __repr__(self: Self) -> str: ...

From 1e2e151e2a2a43ba0c24847108fe4213fdeb3943 Mon Sep 17 00:00:00 2001
From: dangotbanned <125183946+dangotbanned@users.noreply.github.com>
Date: Fri, 21 Feb 2025 17:19:42 +0000
Subject: [PATCH 10/55] fix(typing): update `_dask` collect as well

https://github.com/narwhals-dev/narwhals/actions/runs/13461786363/job/37618653099?pr=2064
---
 narwhals/_dask/dataframe.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/narwhals/_dask/dataframe.py b/narwhals/_dask/dataframe.py
index 935624c4fa..910c7781e5 100644
--- a/narwhals/_dask/dataframe.py
+++ b/narwhals/_dask/dataframe.py
@@ -30,6 +30,7 @@
     from narwhals._dask.expr import DaskExpr
     from narwhals._dask.group_by import DaskLazyGroupBy
     from narwhals._dask.namespace import DaskNamespace
+    from narwhals._polars.dataframe import PolarsDataFrame
     from narwhals.dtypes import DType
     from narwhals.utils import Version
 
@@ -94,7 +95,7 @@ def collect(
         self: Self,
         backend: Implementation | None,
         **kwargs: Any,
-    ) -> CompliantDataFrame:
+    ) -> CompliantDataFrame[Any] | PolarsDataFrame:
         import pandas as pd
 
         result = self._native_frame.compute(**kwargs)

From 5a36c81b4e336c8b1ce115effbad88803e782bfa Mon Sep 17 00:00:00 2001
From: dangotbanned <125183946+dangotbanned@users.noreply.github.com>
Date: Fri, 21 Feb 2025 18:16:26 +0000
Subject: [PATCH 11/55] fix: maybe resolve `<3.11` protocol bug

https://github.com/narwhals-dev/narwhals/pull/2064#discussion_r1965921386
---
 narwhals/_arrow/selectors.py       | 5 +++++
 narwhals/_pandas_like/selectors.py | 5 +++++
 narwhals/_selectors.py             | 3 +++
 3 files changed, 13 insertions(+)

diff --git a/narwhals/_arrow/selectors.py b/narwhals/_arrow/selectors.py
index 896db5bf21..408e753d55 100644
--- a/narwhals/_arrow/selectors.py
+++ b/narwhals/_arrow/selectors.py
@@ -43,6 +43,11 @@ def _selector(
             version=context._version,
         )
 
+    def __init__(self: Self, context: _FullContext, /) -> None:
+        self._implementation = context._implementation
+        self._backend_version = context._backend_version
+        self._version = context._version
+
 
 class ArrowSelector(CompliantSelector["ArrowDataFrame", "ArrowSeries"], ArrowExpr):  # type: ignore[misc]
     @property
diff --git a/narwhals/_pandas_like/selectors.py b/narwhals/_pandas_like/selectors.py
index 2a71f9a1f3..4dcace8386 100644
--- a/narwhals/_pandas_like/selectors.py
+++ b/narwhals/_pandas_like/selectors.py
@@ -57,6 +57,11 @@ def _selector(
             kwargs={},
         )
 
+    def __init__(self: Self, context: _FullContext, /) -> None:
+        self._implementation = context._implementation
+        self._backend_version = context._backend_version
+        self._version = context._version
+
 
 class PandasSelector(  # type: ignore[misc]
     CompliantSelector["PandasLikeDataFrame", "PandasLikeSeries"], PandasLikeExpr
diff --git a/narwhals/_selectors.py b/narwhals/_selectors.py
index 707715fac2..5d3d92b21c 100644
--- a/narwhals/_selectors.py
+++ b/narwhals/_selectors.py
@@ -153,6 +153,9 @@ def names(df: DataFrameT) -> Sequence[str]:
 
         return self._selector(self, series, names)
 
+    # NOTE: Can't reuse for `<3.11`
+    # - https://github.com/python/cpython/issues/88970
+    # - https://github.com/python/cpython/pull/31628
     def __init__(self: Self, context: _FullContext, /) -> None:
         self._implementation = context._implementation
         self._backend_version = context._backend_version

From 5e453500ea05eaefb316fd0904b01c856e4e15ef Mon Sep 17 00:00:00 2001
From: dangotbanned <125183946+dangotbanned@users.noreply.github.com>
Date: Fri, 21 Feb 2025 18:25:37 +0000
Subject: [PATCH 12/55] ignore coverage on init

https://github.com/narwhals-dev/narwhals/actions/runs/13462760425/job/37621753052?pr=2064
---
 narwhals/_selectors.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/narwhals/_selectors.py b/narwhals/_selectors.py
index 5d3d92b21c..6e4ed6d67d 100644
--- a/narwhals/_selectors.py
+++ b/narwhals/_selectors.py
@@ -156,7 +156,7 @@ def names(df: DataFrameT) -> Sequence[str]:
     # NOTE: Can't reuse for `<3.11`
     # - https://github.com/python/cpython/issues/88970
     # - https://github.com/python/cpython/pull/31628
-    def __init__(self: Self, context: _FullContext, /) -> None:
+    def __init__(self: Self, context: _FullContext, /) -> None:  # pragma: no cover
         self._implementation = context._implementation
         self._backend_version = context._backend_version
         self._version = context._version

From 42c2f4171c71db4781910945492a79d939066912 Mon Sep 17 00:00:00 2001
From: dangotbanned <125183946+dangotbanned@users.noreply.github.com>
Date: Fri, 21 Feb 2025 19:06:59 +0000
Subject: [PATCH 13/55] refactor: Also drop `kwargs` in `_to_expr`

Now is always empty #2059
---
 narwhals/_pandas_like/selectors.py | 1 -
 1 file changed, 1 deletion(-)

diff --git a/narwhals/_pandas_like/selectors.py b/narwhals/_pandas_like/selectors.py
index 316365d01f..d764b80751 100644
--- a/narwhals/_pandas_like/selectors.py
+++ b/narwhals/_pandas_like/selectors.py
@@ -84,5 +84,4 @@ def _to_expr(self: Self) -> PandasLikeExpr:
             implementation=self._implementation,
             backend_version=self._backend_version,
             version=self._version,
-            kwargs=self._kwargs,
         )

From 4e89a2e8f03e3ccc72e45ea8d9c207b86744f312 Mon Sep 17 00:00:00 2001
From: dangotbanned <125183946+dangotbanned@users.noreply.github.com>
Date: Fri, 21 Feb 2025 22:20:40 +0000
Subject: [PATCH 14/55] add `CompliantLazyFrame.columns`

---
 narwhals/typing.py | 2 ++
 narwhals/utils.py  | 9 +++------
 2 files changed, 5 insertions(+), 6 deletions(-)

diff --git a/narwhals/typing.py b/narwhals/typing.py
index 22095eb0a7..38b66db09a 100644
--- a/narwhals/typing.py
+++ b/narwhals/typing.py
@@ -80,6 +80,8 @@ def aggregate(self, *exprs: Any) -> Self:
         ...  # `select` where all args are aggregations or literals
         # (so, no broadcasting is necessary).
 
+    @property
+    def columns(self) -> Sequence[str]: ...
 
 class CompliantExpr(Protocol, Generic[CompliantSeriesT_co]):
     _implementation: Implementation
diff --git a/narwhals/utils.py b/narwhals/utils.py
index 141423d200..bddc9ad1cd 100644
--- a/narwhals/utils.py
+++ b/narwhals/utils.py
@@ -10,7 +10,6 @@
 from typing import TYPE_CHECKING
 from typing import Any
 from typing import Iterable
-from typing import Iterator
 from typing import Sequence
 from typing import TypeVar
 from typing import Union
@@ -1305,14 +1304,12 @@ def dtype_matches_time_unit_and_time_zone(
     )
 
 
-def get_column_names(df: NativeFrame | CompliantDataFrame) -> Sequence[str]:
+def get_column_names(
+    df: NativeFrame | CompliantDataFrame | CompliantLazyFrame,
+) -> Sequence[str]:
     return df.columns
 
 
-def iter_columns(df: NativeFrame) -> Iterator[str]:  # pragma: no cover
-    yield from df.columns
-
-
 def _hasattr_static(obj: Any, attr: str) -> bool:
     sentinel = object()
     return getattr_static(obj, attr, sentinel) is not sentinel

From 9210a17cfc213a6d60849baea6bda2377af6d770 Mon Sep 17 00:00:00 2001
From: dangotbanned <125183946+dangotbanned@users.noreply.github.com>
Date: Fri, 21 Feb 2025 22:21:53 +0000
Subject: [PATCH 15/55] chore(typing): add `Compliant(Lazy|Data)Frame.schema`

---
 narwhals/typing.py | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/narwhals/typing.py b/narwhals/typing.py
index 38b66db09a..3b7075cc06 100644
--- a/narwhals/typing.py
+++ b/narwhals/typing.py
@@ -12,6 +12,7 @@
 
 if TYPE_CHECKING:
     from types import ModuleType
+    from typing import Mapping
 
     import numpy as np
     from typing_extensions import Self
@@ -67,6 +68,8 @@ def aggregate(self, *exprs: Any) -> Self:
 
     @property
     def columns(self) -> Sequence[str]: ...
+    @property
+    def schema(self) -> Mapping[str, DType]: ...
     def get_column(self, name: str) -> CompliantSeriesT_co: ...
 
 
@@ -82,6 +85,9 @@ def aggregate(self, *exprs: Any) -> Self:
 
     @property
     def columns(self) -> Sequence[str]: ...
+    @property
+    def schema(self) -> Mapping[str, DType]: ...
+
 
 class CompliantExpr(Protocol, Generic[CompliantSeriesT_co]):
     _implementation: Implementation

From 5382778ac66ce6636e669e94a686a366dd977413 Mon Sep 17 00:00:00 2001
From: dangotbanned <125183946+dangotbanned@users.noreply.github.com>
Date: Fri, 21 Feb 2025 22:24:23 +0000
Subject: [PATCH 16/55] feat: add some default iteration methods

Lazy-support needs to be able to override them
---
 narwhals/_selectors.py | 13 +++++++++++++
 1 file changed, 13 insertions(+)

diff --git a/narwhals/_selectors.py b/narwhals/_selectors.py
index 6e4ed6d67d..7900dd8dc6 100644
--- a/narwhals/_selectors.py
+++ b/narwhals/_selectors.py
@@ -64,6 +64,16 @@ class CompliantSelectorNamespace(Generic[DataFrameT, SeriesT], Protocol):
     # Only need internally, but it plugs so many holes that it must be useful beyond that
     # https://docs.pola.rs/api/python/stable/reference/dataframe/api/polars.DataFrame.iter_columns.html
     def _iter_columns(self, df: DataFrameT, /) -> Iterator[SeriesT]: ...
+    def _iter_schema(self, df: DataFrameT, /) -> Iterator[tuple[str, DType]]:
+        for ser in self._iter_columns(df):
+            yield ser.name, ser.dtype
+
+    def _iter_columns_dtypes(self, df: DataFrameT, /) -> Iterator[tuple[SeriesT, DType]]:
+        for ser in self._iter_columns(df):
+            yield ser, ser.dtype
+
+    def _iter_columns_names(self, df: DataFrameT, /) -> Iterator[tuple[SeriesT, str]]:
+        yield from zip(self._iter_columns(df), df.columns)
 
     def _selector(
         self,
@@ -73,6 +83,9 @@ def _selector(
         /,
     ) -> CompliantSelector[DataFrameT, SeriesT]: ...
 
+    # NOTE: `.dtype` won't return a `nw.DType` (or maybe anything) for lazy backends
+    # - Their `SeriesT` is a native object
+    # - See (https://github.com/narwhals-dev/narwhals/issues/2044)
     def _is_dtype(
         self: CompliantSelectorNamespace[DataFrameT, SeriesT], dtype: type[DType], /
     ) -> CompliantSelector[DataFrameT, SeriesT]:

From 91c86a08d11b8bd5a2e80aebdcc0ddb3cc44fb3d Mon Sep 17 00:00:00 2001
From: dangotbanned <125183946+dangotbanned@users.noreply.github.com>
Date: Fri, 21 Feb 2025 22:25:49 +0000
Subject: [PATCH 17/55] feat: build out lazy-support

---
 narwhals/_selectors.py | 44 +++++++++++++++++++++++++++++-------------
 1 file changed, 31 insertions(+), 13 deletions(-)

diff --git a/narwhals/_selectors.py b/narwhals/_selectors.py
index 7900dd8dc6..a6532d1072 100644
--- a/narwhals/_selectors.py
+++ b/narwhals/_selectors.py
@@ -23,6 +23,7 @@
 from narwhals.utils import dtype_matches_time_unit_and_time_zone
 from narwhals.utils import get_column_names
 from narwhals.utils import import_dtypes_module
+from narwhals.utils import is_compliant_dataframe
 
 if TYPE_CHECKING:
     from datetime import timezone
@@ -33,6 +34,7 @@
 
     from narwhals.dtypes import DType
     from narwhals.typing import CompliantDataFrame
+    from narwhals.typing import CompliantLazyFrame
     from narwhals.typing import CompliantSeries
     from narwhals.typing import TimeUnit
     from narwhals.utils import Implementation
@@ -46,7 +48,7 @@ def dtype(self) -> DType: ...
 
 
 SeriesT = TypeVar("SeriesT", bound="CompliantSeriesWithDType")
-DataFrameT = TypeVar("DataFrameT", bound="CompliantDataFrame")
+DataFrameT = TypeVar("DataFrameT", bound="CompliantDataFrame | CompliantLazyFrame")
 SelectorOrExpr: TypeAlias = (
     "CompliantSelector[DataFrameT, SeriesT] | CompliantExpr[SeriesT]"
 )
@@ -90,23 +92,23 @@ def _is_dtype(
         self: CompliantSelectorNamespace[DataFrameT, SeriesT], dtype: type[DType], /
     ) -> CompliantSelector[DataFrameT, SeriesT]:
         def series(df: DataFrameT) -> Sequence[SeriesT]:
-            return [ser for ser in self._iter_columns(df) if isinstance(ser.dtype, dtype)]
-
-        def names(df: DataFrameT) -> Sequence[str]:
             return [
-                ser.name for ser in self._iter_columns(df) if isinstance(ser.dtype, dtype)
+                ser for ser, tp in self._iter_columns_dtypes(df) if isinstance(tp, dtype)
             ]
 
+        def names(df: DataFrameT) -> Sequence[str]:
+            return [name for name, tp in self._iter_schema(df) if isinstance(tp, dtype)]
+
         return self._selector(self, series, names)
 
     def by_dtype(
         self: Self, dtypes: Collection[DType | type[DType]]
     ) -> CompliantSelector[DataFrameT, SeriesT]:
         def series(df: DataFrameT) -> Sequence[SeriesT]:
-            return [ser for ser in self._iter_columns(df) if ser.dtype in dtypes]
+            return [ser for ser, tp in self._iter_columns_dtypes(df) if tp in dtypes]
 
         def names(df: DataFrameT) -> Sequence[str]:
-            return [ser.name for ser in self._iter_columns(df) if ser.dtype in dtypes]
+            return [name for name, tp in self._iter_schema(df) if tp in dtypes]
 
         return self._selector(self, series, names)
 
@@ -114,7 +116,11 @@ def matches(self: Self, pattern: str) -> CompliantSelector[DataFrameT, SeriesT]:
         p = re.compile(pattern)
 
         def series(df: DataFrameT) -> Sequence[SeriesT]:
-            return [df.get_column(col) for col in df.columns if p.search(col)]
+            # NOTE: Possibly cheaper than lazyframe?
+            if is_compliant_dataframe(df):
+                return [df.get_column(col) for col in df.columns if p.search(col)]
+
+            return [ser for ser, name in self._iter_columns_names(df) if p.search(name)]
 
         def names(df: DataFrameT) -> Sequence[str]:
             return [col for col in df.columns if p.search(col)]
@@ -123,10 +129,10 @@ def names(df: DataFrameT) -> Sequence[str]:
 
     def numeric(self: Self) -> CompliantSelector[DataFrameT, SeriesT]:
         def series(df: DataFrameT) -> Sequence[SeriesT]:
-            return [ser for ser in self._iter_columns(df) if ser.dtype.is_numeric()]
+            return [ser for ser, tp in self._iter_columns_dtypes(df) if tp.is_numeric()]
 
         def names(df: DataFrameT) -> Sequence[str]:
-            return [ser.name for ser in self._iter_columns(df) if ser.dtype.is_numeric()]
+            return [name for name, tp in self._iter_schema(df) if tp.is_numeric()]
 
         return self._selector(self, series, names)
 
@@ -159,10 +165,10 @@ def datetime(
         )
 
         def series(df: DataFrameT) -> Sequence[SeriesT]:
-            return [ser for ser in self._iter_columns(df) if matches(ser.dtype)]
+            return [ser for ser, tp in self._iter_columns_dtypes(df) if matches(tp)]
 
         def names(df: DataFrameT) -> Sequence[str]:
-            return [ser.name for ser in self._iter_columns(df) if matches(ser.dtype)]
+            return [name for name, tp in self._iter_schema(df) if matches(tp)]
 
         return self._selector(self, series, names)
 
@@ -175,6 +181,18 @@ def __init__(self: Self, context: _FullContext, /) -> None:  # pragma: no cover
         self._version = context._version
 
 
+class LazySelectorNamespace(
+    CompliantSelectorNamespace[DataFrameT, SeriesT],
+    Generic[DataFrameT, SeriesT],
+    Protocol,
+):
+    def _iter_schema(self, df: DataFrameT) -> Iterator[tuple[str, DType]]:
+        yield from df.schema.items()
+
+    def _iter_columns_dtypes(self, df: DataFrameT, /) -> Iterator[tuple[SeriesT, DType]]:
+        yield from zip(self._iter_columns(df), df.schema.values())
+
+
 class CompliantSelector(CompliantExpr[SeriesT], Generic[DataFrameT, SeriesT], Protocol):
     @property
     def selectors(self) -> CompliantSelectorNamespace[DataFrameT, SeriesT]: ...
@@ -264,6 +282,6 @@ def __invert__(
 # NOTE: Should probably be a `DataFrame` method
 # Using `Expr` because this doesn't require `Selector` attrs/methods
 def _eval_lhs_rhs(
-    df: CompliantDataFrame, lhs: CompliantExpr, rhs: CompliantExpr
+    df: CompliantDataFrame | CompliantLazyFrame, lhs: CompliantExpr, rhs: CompliantExpr
 ) -> tuple[Sequence[str], Sequence[str]]:
     return lhs._evaluate_output_names(df), rhs._evaluate_output_names(df)

From a8c9d1358f83eaa3b530cd44090d492fcac4aab8 Mon Sep 17 00:00:00 2001
From: dangotbanned <125183946+dangotbanned@users.noreply.github.com>
Date: Fri, 21 Feb 2025 22:27:49 +0000
Subject: [PATCH 18/55] feat: reimplement `_dask.selectors`

- Working locally
- `pyright` is very unhappy with `dx.Series` being used
---
 narwhals/_dask/namespace.py |   3 +
 narwhals/_dask/selectors.py | 224 ++++++------------------------------
 2 files changed, 38 insertions(+), 189 deletions(-)

diff --git a/narwhals/_dask/namespace.py b/narwhals/_dask/namespace.py
index 5946496957..dc35657b4a 100644
--- a/narwhals/_dask/namespace.py
+++ b/narwhals/_dask/namespace.py
@@ -24,6 +24,7 @@
 from narwhals._expression_parsing import combine_alias_output_names
 from narwhals._expression_parsing import combine_evaluate_output_names
 from narwhals.typing import CompliantNamespace
+from narwhals.utils import Implementation
 from narwhals.utils import get_column_names
 from narwhals.utils import is_compliant_expr
 
@@ -40,6 +41,8 @@
 
 
 class DaskNamespace(CompliantNamespace["dx.Series"]):
+    _implementation: Implementation = Implementation.DASK
+
     @property
     def selectors(self: Self) -> DaskSelectorNamespace:
         return DaskSelectorNamespace(self)
diff --git a/narwhals/_dask/selectors.py b/narwhals/_dask/selectors.py
index 1ea275aca8..ee85158758 100644
--- a/narwhals/_dask/selectors.py
+++ b/narwhals/_dask/selectors.py
@@ -1,17 +1,12 @@
 from __future__ import annotations
 
-import re
 from typing import TYPE_CHECKING
-from typing import Any
-from typing import Callable
-from typing import Iterable
-from typing import Sequence
+from typing import Iterator
 
+from narwhals._dask.dataframe import DaskLazyFrame
 from narwhals._dask.expr import DaskExpr
-from narwhals.utils import _parse_time_unit_and_time_zone
-from narwhals.utils import dtype_matches_time_unit_and_time_zone
-from narwhals.utils import get_column_names
-from narwhals.utils import import_dtypes_module
+from narwhals._selectors import CompliantSelector
+from narwhals._selectors import LazySelectorNamespace
 
 if TYPE_CHECKING:
     try:
@@ -19,14 +14,12 @@
     except ModuleNotFoundError:
         import dask_expr as dx
 
-    from datetime import timezone
-
     from typing_extensions import Self
 
     from narwhals._dask.dataframe import DaskLazyFrame
-    from narwhals.dtypes import DType
-    from narwhals.typing import TimeUnit
-    from narwhals.utils import _LimitedContext
+    from narwhals._selectors import EvalNames
+    from narwhals._selectors import EvalSeries
+    from narwhals.utils import _FullContext
 
     try:
         import dask.dataframe.dask_expr as dx
@@ -34,108 +27,39 @@
         import dask_expr as dx
 
 
-class DaskSelectorNamespace:
-    def __init__(self: Self, context: _LimitedContext, /) -> None:
-        self._backend_version = context._backend_version
-        self._version = context._version
-
-    def by_dtype(self: Self, dtypes: Iterable[DType | type[DType]]) -> DaskSelector:
-        def func(df: DaskLazyFrame) -> list[dx.Series]:
-            return [
-                df._native_frame[col] for col in df.columns if df.schema[col] in dtypes
-            ]
-
-        def evaluate_output_names(df: DaskLazyFrame) -> Sequence[str]:
-            return [col for col in df.columns if df.schema[col] in dtypes]
-
-        return selector(self, func, evaluate_output_names)
-
-    def matches(self: Self, pattern: str) -> DaskSelector:
-        def func(df: DaskLazyFrame) -> list[dx.Series]:
-            return [
-                df._native_frame[col] for col in df.columns if re.search(pattern, col)
-            ]
-
-        def evaluate_output_names(df: DaskLazyFrame) -> Sequence[str]:
-            return [col for col in df.columns if re.search(pattern, col)]
-
-        return selector(self, func, evaluate_output_names)
-
-    def numeric(self: Self) -> DaskSelector:
-        dtypes = import_dtypes_module(self._version)
-        return self.by_dtype(
-            {
-                dtypes.Int128,
-                dtypes.Int64,
-                dtypes.Int32,
-                dtypes.Int16,
-                dtypes.Int8,
-                dtypes.UInt128,
-                dtypes.UInt64,
-                dtypes.UInt32,
-                dtypes.UInt16,
-                dtypes.UInt8,
-                dtypes.Float64,
-                dtypes.Float32,
-            },
+class DaskSelectorNamespace(LazySelectorNamespace["DaskLazyFrame", "dx.Series"]):
+    def _iter_columns(self, df: DaskLazyFrame) -> Iterator[dx.Series]:
+        for _col, ser in df._native_frame.items():  # noqa: PERF102
+            yield ser
+
+    def _selector(
+        self,
+        context: _FullContext,
+        call: EvalSeries[DaskLazyFrame, dx.Series],
+        evaluate_output_names: EvalNames[DaskLazyFrame],
+        /,
+    ) -> CompliantSelector[DaskLazyFrame, dx.Series]:
+        return DaskSelector(
+            call,
+            depth=0,
+            function_name="selector",
+            evaluate_output_names=evaluate_output_names,
+            alias_output_names=None,
+            backend_version=context._backend_version,
+            version=context._version,
         )
 
-    def categorical(self: Self) -> DaskSelector:
-        dtypes = import_dtypes_module(self._version)
-        return self.by_dtype({dtypes.Categorical})
-
-    def string(self: Self) -> DaskSelector:
-        dtypes = import_dtypes_module(self._version)
-        return self.by_dtype({dtypes.String})
-
-    def boolean(self: Self) -> DaskSelector:
-        dtypes = import_dtypes_module(self._version)
-        return self.by_dtype({dtypes.Boolean})
-
-    def all(self: Self) -> DaskSelector:
-        def func(df: DaskLazyFrame) -> list[dx.Series]:
-            return [df._native_frame[col] for col in df.columns]
-
-        return selector(self, func, get_column_names)
-
-    def datetime(
-        self: Self,
-        time_unit: TimeUnit | Iterable[TimeUnit] | None,
-        time_zone: str | timezone | Iterable[str | timezone | None] | None,
-    ) -> DaskSelector:  # pragma: no cover
-        dtypes = import_dtypes_module(version=self._version)
-        time_units, time_zones = _parse_time_unit_and_time_zone(
-            time_unit=time_unit, time_zone=time_zone
-        )
-
-        def func(df: DaskLazyFrame) -> list[dx.Series]:
-            return [
-                df._native_frame[col]
-                for col in df.columns
-                if dtype_matches_time_unit_and_time_zone(
-                    dtype=df.schema[col],
-                    dtypes=dtypes,
-                    time_units=time_units,
-                    time_zones=time_zones,
-                )
-            ]
-
-        def evaluate_output_names(df: DaskLazyFrame) -> Sequence[str]:
-            return [
-                col
-                for col in df.columns
-                if dtype_matches_time_unit_and_time_zone(
-                    dtype=df.schema[col],
-                    dtypes=dtypes,
-                    time_units=time_units,
-                    time_zones=time_zones,
-                )
-            ]
+    def __init__(self: Self, context: _FullContext, /) -> None:
+        self._implementation = context._implementation
+        self._backend_version = context._backend_version
+        self._version = context._version
 
-        return selector(self, func, evaluate_output_names)
 
+class DaskSelector(CompliantSelector["DaskLazyFrame", "dx.Series"], DaskExpr):  # type: ignore[misc]
+    @property
+    def selectors(self) -> DaskSelectorNamespace:
+        return DaskSelectorNamespace(self)
 
-class DaskSelector(DaskExpr):
     def __repr__(self: Self) -> str:  # pragma: no cover
         return f"DaskSelector(depth={self._depth}, function_name={self._function_name})"
 
@@ -149,81 +73,3 @@ def _to_expr(self: Self) -> DaskExpr:
             backend_version=self._backend_version,
             version=self._version,
         )
-
-    def __sub__(self: Self, other: DaskSelector | Any) -> DaskSelector | Any:
-        if isinstance(other, DaskSelector):
-
-            def call(df: DaskLazyFrame) -> list[dx.Series]:
-                lhs_names = self._evaluate_output_names(df)
-                rhs_names = other._evaluate_output_names(df)
-                lhs = self._call(df)
-                return [x for x, name in zip(lhs, lhs_names) if name not in rhs_names]
-
-            def evaluate_output_names(df: DaskLazyFrame) -> list[str]:
-                lhs_names = self._evaluate_output_names(df)
-                rhs_names = other._evaluate_output_names(df)
-                return [x for x in lhs_names if x not in rhs_names]
-
-            return selector(self, call, evaluate_output_names)
-        else:
-            return self._to_expr() - other
-
-    def __or__(self: Self, other: DaskSelector | Any) -> DaskSelector | Any:
-        if isinstance(other, DaskSelector):
-
-            def call(df: DaskLazyFrame) -> list[dx.Series]:
-                lhs_names = self._evaluate_output_names(df)
-                rhs_names = other._evaluate_output_names(df)
-                lhs = self._call(df)
-                rhs = other._call(df)
-                return [
-                    *(x for x, name in zip(lhs, lhs_names) if name not in rhs_names),
-                    *rhs,
-                ]
-
-            def evaluate_output_names(df: DaskLazyFrame) -> list[str]:
-                lhs_names = self._evaluate_output_names(df)
-                rhs_names = other._evaluate_output_names(df)
-                return [*(x for x in lhs_names if x not in rhs_names), *rhs_names]
-
-            return selector(self, call, evaluate_output_names)
-        else:
-            return self._to_expr() | other
-
-    def __and__(self: Self, other: DaskSelector | Any) -> DaskSelector | Any:
-        if isinstance(other, DaskSelector):
-
-            def call(df: DaskLazyFrame) -> list[dx.Series]:
-                lhs_names = self._evaluate_output_names(df)
-                rhs_names = other._evaluate_output_names(df)
-                lhs = self._call(df)
-                return [x for x, name in zip(lhs, lhs_names) if name in rhs_names]
-
-            def evaluate_output_names(df: DaskLazyFrame) -> list[str]:
-                lhs_names = self._evaluate_output_names(df)
-                rhs_names = other._evaluate_output_names(df)
-                return [x for x in lhs_names if x in rhs_names]
-
-            return selector(self, call, evaluate_output_names)
-        else:
-            return self._to_expr() & other
-
-    def __invert__(self: Self) -> DaskSelector:
-        return DaskSelectorNamespace(self).all() - self
-
-
-def selector(
-    context: _LimitedContext,
-    call: Callable[[DaskLazyFrame], Sequence[dx.Series]],
-    evaluate_output_names: Callable[[DaskLazyFrame], Sequence[str]],
-    /,
-) -> DaskSelector:
-    return DaskSelector(
-        call,
-        depth=0,
-        function_name="selector",
-        evaluate_output_names=evaluate_output_names,
-        alias_output_names=None,
-        backend_version=context._backend_version,
-        version=context._version,
-    )

From d6a2e4138ff56c77b30f2f23c5562ca4568f6d2d Mon Sep 17 00:00:00 2001
From: dangotbanned <125183946+dangotbanned@users.noreply.github.com>
Date: Fri, 21 Feb 2025 22:32:10 +0000
Subject: [PATCH 19/55] refactor: rename `DataFrameT` -> `FrameT`

---
 narwhals/_selectors.py | 113 +++++++++++++++++++----------------------
 1 file changed, 53 insertions(+), 60 deletions(-)

diff --git a/narwhals/_selectors.py b/narwhals/_selectors.py
index a6532d1072..37429bdc70 100644
--- a/narwhals/_selectors.py
+++ b/narwhals/_selectors.py
@@ -48,16 +48,14 @@ def dtype(self) -> DType: ...
 
 
 SeriesT = TypeVar("SeriesT", bound="CompliantSeriesWithDType")
-DataFrameT = TypeVar("DataFrameT", bound="CompliantDataFrame | CompliantLazyFrame")
-SelectorOrExpr: TypeAlias = (
-    "CompliantSelector[DataFrameT, SeriesT] | CompliantExpr[SeriesT]"
-)
-EvalSeries: TypeAlias = Callable[[DataFrameT], Sequence[SeriesT]]
-EvalNames: TypeAlias = Callable[[DataFrameT], Sequence[str]]
+FrameT = TypeVar("FrameT", bound="CompliantDataFrame | CompliantLazyFrame")
+SelectorOrExpr: TypeAlias = "CompliantSelector[FrameT, SeriesT] | CompliantExpr[SeriesT]"
+EvalSeries: TypeAlias = Callable[[FrameT], Sequence[SeriesT]]
+EvalNames: TypeAlias = Callable[[FrameT], Sequence[str]]
 
 
 # NOTE: Pretty much finished generic for eager backends
-class CompliantSelectorNamespace(Generic[DataFrameT, SeriesT], Protocol):
+class CompliantSelectorNamespace(Generic[FrameT, SeriesT], Protocol):
     _implementation: Implementation
     _backend_version: tuple[int, ...]
     _version: Version
@@ -65,88 +63,88 @@ class CompliantSelectorNamespace(Generic[DataFrameT, SeriesT], Protocol):
     # TODO @dangotbanned: push for adding to public API for `DataFrame`
     # Only need internally, but it plugs so many holes that it must be useful beyond that
     # https://docs.pola.rs/api/python/stable/reference/dataframe/api/polars.DataFrame.iter_columns.html
-    def _iter_columns(self, df: DataFrameT, /) -> Iterator[SeriesT]: ...
-    def _iter_schema(self, df: DataFrameT, /) -> Iterator[tuple[str, DType]]:
+    def _iter_columns(self, df: FrameT, /) -> Iterator[SeriesT]: ...
+    def _iter_schema(self, df: FrameT, /) -> Iterator[tuple[str, DType]]:
         for ser in self._iter_columns(df):
             yield ser.name, ser.dtype
 
-    def _iter_columns_dtypes(self, df: DataFrameT, /) -> Iterator[tuple[SeriesT, DType]]:
+    def _iter_columns_dtypes(self, df: FrameT, /) -> Iterator[tuple[SeriesT, DType]]:
         for ser in self._iter_columns(df):
             yield ser, ser.dtype
 
-    def _iter_columns_names(self, df: DataFrameT, /) -> Iterator[tuple[SeriesT, str]]:
+    def _iter_columns_names(self, df: FrameT, /) -> Iterator[tuple[SeriesT, str]]:
         yield from zip(self._iter_columns(df), df.columns)
 
     def _selector(
         self,
         context: _FullContext,
-        call: EvalSeries[DataFrameT, SeriesT],
-        evaluate_output_names: EvalNames[DataFrameT],
+        call: EvalSeries[FrameT, SeriesT],
+        evaluate_output_names: EvalNames[FrameT],
         /,
-    ) -> CompliantSelector[DataFrameT, SeriesT]: ...
+    ) -> CompliantSelector[FrameT, SeriesT]: ...
 
     # NOTE: `.dtype` won't return a `nw.DType` (or maybe anything) for lazy backends
     # - Their `SeriesT` is a native object
     # - See (https://github.com/narwhals-dev/narwhals/issues/2044)
     def _is_dtype(
-        self: CompliantSelectorNamespace[DataFrameT, SeriesT], dtype: type[DType], /
-    ) -> CompliantSelector[DataFrameT, SeriesT]:
-        def series(df: DataFrameT) -> Sequence[SeriesT]:
+        self: CompliantSelectorNamespace[FrameT, SeriesT], dtype: type[DType], /
+    ) -> CompliantSelector[FrameT, SeriesT]:
+        def series(df: FrameT) -> Sequence[SeriesT]:
             return [
                 ser for ser, tp in self._iter_columns_dtypes(df) if isinstance(tp, dtype)
             ]
 
-        def names(df: DataFrameT) -> Sequence[str]:
+        def names(df: FrameT) -> Sequence[str]:
             return [name for name, tp in self._iter_schema(df) if isinstance(tp, dtype)]
 
         return self._selector(self, series, names)
 
     def by_dtype(
         self: Self, dtypes: Collection[DType | type[DType]]
-    ) -> CompliantSelector[DataFrameT, SeriesT]:
-        def series(df: DataFrameT) -> Sequence[SeriesT]:
+    ) -> CompliantSelector[FrameT, SeriesT]:
+        def series(df: FrameT) -> Sequence[SeriesT]:
             return [ser for ser, tp in self._iter_columns_dtypes(df) if tp in dtypes]
 
-        def names(df: DataFrameT) -> Sequence[str]:
+        def names(df: FrameT) -> Sequence[str]:
             return [name for name, tp in self._iter_schema(df) if tp in dtypes]
 
         return self._selector(self, series, names)
 
-    def matches(self: Self, pattern: str) -> CompliantSelector[DataFrameT, SeriesT]:
+    def matches(self: Self, pattern: str) -> CompliantSelector[FrameT, SeriesT]:
         p = re.compile(pattern)
 
-        def series(df: DataFrameT) -> Sequence[SeriesT]:
+        def series(df: FrameT) -> Sequence[SeriesT]:
             # NOTE: Possibly cheaper than lazyframe?
             if is_compliant_dataframe(df):
                 return [df.get_column(col) for col in df.columns if p.search(col)]
 
             return [ser for ser, name in self._iter_columns_names(df) if p.search(name)]
 
-        def names(df: DataFrameT) -> Sequence[str]:
+        def names(df: FrameT) -> Sequence[str]:
             return [col for col in df.columns if p.search(col)]
 
         return self._selector(self, series, names)
 
-    def numeric(self: Self) -> CompliantSelector[DataFrameT, SeriesT]:
-        def series(df: DataFrameT) -> Sequence[SeriesT]:
+    def numeric(self: Self) -> CompliantSelector[FrameT, SeriesT]:
+        def series(df: FrameT) -> Sequence[SeriesT]:
             return [ser for ser, tp in self._iter_columns_dtypes(df) if tp.is_numeric()]
 
-        def names(df: DataFrameT) -> Sequence[str]:
+        def names(df: FrameT) -> Sequence[str]:
             return [name for name, tp in self._iter_schema(df) if tp.is_numeric()]
 
         return self._selector(self, series, names)
 
-    def categorical(self: Self) -> CompliantSelector[DataFrameT, SeriesT]:
+    def categorical(self: Self) -> CompliantSelector[FrameT, SeriesT]:
         return self._is_dtype(import_dtypes_module(self._version).Categorical)
 
-    def string(self: Self) -> CompliantSelector[DataFrameT, SeriesT]:
+    def string(self: Self) -> CompliantSelector[FrameT, SeriesT]:
         return self._is_dtype(import_dtypes_module(self._version).String)
 
-    def boolean(self: Self) -> CompliantSelector[DataFrameT, SeriesT]:
+    def boolean(self: Self) -> CompliantSelector[FrameT, SeriesT]:
         return self._is_dtype(import_dtypes_module(self._version).Boolean)
 
-    def all(self: Self) -> CompliantSelector[DataFrameT, SeriesT]:
-        def series(df: DataFrameT) -> Sequence[SeriesT]:
+    def all(self: Self) -> CompliantSelector[FrameT, SeriesT]:
+        def series(df: FrameT) -> Sequence[SeriesT]:
             return list(self._iter_columns(df))
 
         return self._selector(self, series, get_column_names)
@@ -155,7 +153,7 @@ def datetime(
         self: Self,
         time_unit: TimeUnit | Iterable[TimeUnit] | None,
         time_zone: str | timezone | Iterable[str | timezone | None] | None,
-    ) -> CompliantSelector[DataFrameT, SeriesT]:
+    ) -> CompliantSelector[FrameT, SeriesT]:
         time_units, time_zones = _parse_time_unit_and_time_zone(time_unit, time_zone)
         matches = partial(
             dtype_matches_time_unit_and_time_zone,
@@ -164,10 +162,10 @@ def datetime(
             time_zones=time_zones,
         )
 
-        def series(df: DataFrameT) -> Sequence[SeriesT]:
+        def series(df: FrameT) -> Sequence[SeriesT]:
             return [ser for ser, tp in self._iter_columns_dtypes(df) if matches(tp)]
 
-        def names(df: DataFrameT) -> Sequence[str]:
+        def names(df: FrameT) -> Sequence[str]:
             return [name for name, tp in self._iter_schema(df) if matches(tp)]
 
         return self._selector(self, series, names)
@@ -182,27 +180,24 @@ def __init__(self: Self, context: _FullContext, /) -> None:  # pragma: no cover
 
 
 class LazySelectorNamespace(
-    CompliantSelectorNamespace[DataFrameT, SeriesT],
-    Generic[DataFrameT, SeriesT],
-    Protocol,
+    CompliantSelectorNamespace[FrameT, SeriesT], Generic[FrameT, SeriesT], Protocol
 ):
-    def _iter_schema(self, df: DataFrameT) -> Iterator[tuple[str, DType]]:
+    def _iter_schema(self, df: FrameT) -> Iterator[tuple[str, DType]]:
         yield from df.schema.items()
 
-    def _iter_columns_dtypes(self, df: DataFrameT, /) -> Iterator[tuple[SeriesT, DType]]:
+    def _iter_columns_dtypes(self, df: FrameT, /) -> Iterator[tuple[SeriesT, DType]]:
         yield from zip(self._iter_columns(df), df.schema.values())
 
 
-class CompliantSelector(CompliantExpr[SeriesT], Generic[DataFrameT, SeriesT], Protocol):
+class CompliantSelector(CompliantExpr[SeriesT], Generic[FrameT, SeriesT], Protocol):
     @property
-    def selectors(self) -> CompliantSelectorNamespace[DataFrameT, SeriesT]: ...
+    def selectors(self) -> CompliantSelectorNamespace[FrameT, SeriesT]: ...
     def __repr__(self: Self) -> str: ...
     def _to_expr(self: Self) -> CompliantExpr[SeriesT]: ...
 
     def _is_selector(
-        self: Self,
-        other: Self | CompliantExpr[SeriesT],
-    ) -> TypeIs[CompliantSelector[DataFrameT, SeriesT]]:
+        self: Self, other: Self | CompliantExpr[SeriesT]
+    ) -> TypeIs[CompliantSelector[FrameT, SeriesT]]:
         return isinstance(other, type(self))
 
     @overload
@@ -210,17 +205,17 @@ def __sub__(self: Self, other: Self) -> Self: ...
     @overload
     def __sub__(self: Self, other: CompliantExpr[SeriesT]) -> CompliantExpr[SeriesT]: ...
     def __sub__(
-        self: Self, other: SelectorOrExpr[DataFrameT, SeriesT]
-    ) -> SelectorOrExpr[DataFrameT, SeriesT]:
+        self: Self, other: SelectorOrExpr[FrameT, SeriesT]
+    ) -> SelectorOrExpr[FrameT, SeriesT]:
         if self._is_selector(other):
 
-            def series(df: DataFrameT) -> Sequence[SeriesT]:
+            def series(df: FrameT) -> Sequence[SeriesT]:
                 lhs_names, rhs_names = _eval_lhs_rhs(df, self, other)
                 return [
                     x for x, name in zip(self(df), lhs_names) if name not in rhs_names
                 ]
 
-            def names(df: DataFrameT) -> Sequence[str]:
+            def names(df: FrameT) -> Sequence[str]:
                 lhs_names, rhs_names = _eval_lhs_rhs(df, self, other)
                 return [x for x in lhs_names if x not in rhs_names]
 
@@ -233,18 +228,18 @@ def __or__(self: Self, other: Self) -> Self: ...
     @overload
     def __or__(self: Self, other: CompliantExpr[SeriesT]) -> CompliantExpr[SeriesT]: ...
     def __or__(
-        self: Self, other: SelectorOrExpr[DataFrameT, SeriesT]
-    ) -> SelectorOrExpr[DataFrameT, SeriesT]:
+        self: Self, other: SelectorOrExpr[FrameT, SeriesT]
+    ) -> SelectorOrExpr[FrameT, SeriesT]:
         if self._is_selector(other):
 
-            def names(df: DataFrameT) -> Sequence[SeriesT]:
+            def names(df: FrameT) -> Sequence[SeriesT]:
                 lhs_names, rhs_names = _eval_lhs_rhs(df, self, other)
                 return [
                     *(x for x, name in zip(self(df), lhs_names) if name not in rhs_names),
                     *other(df),
                 ]
 
-            def series(df: DataFrameT) -> Sequence[str]:
+            def series(df: FrameT) -> Sequence[str]:
                 lhs_names, rhs_names = _eval_lhs_rhs(df, self, other)
                 return [*(x for x in lhs_names if x not in rhs_names), *rhs_names]
 
@@ -257,15 +252,15 @@ def __and__(self: Self, other: Self) -> Self: ...
     @overload
     def __and__(self: Self, other: CompliantExpr[SeriesT]) -> CompliantExpr[SeriesT]: ...
     def __and__(
-        self: Self, other: SelectorOrExpr[DataFrameT, SeriesT]
-    ) -> SelectorOrExpr[DataFrameT, SeriesT]:
+        self: Self, other: SelectorOrExpr[FrameT, SeriesT]
+    ) -> SelectorOrExpr[FrameT, SeriesT]:
         if self._is_selector(other):
 
-            def series(df: DataFrameT) -> Sequence[SeriesT]:
+            def series(df: FrameT) -> Sequence[SeriesT]:
                 lhs_names, rhs_names = _eval_lhs_rhs(df, self, other)
                 return [x for x, name in zip(self(df), lhs_names) if name in rhs_names]
 
-            def names(df: DataFrameT) -> Sequence[str]:
+            def names(df: FrameT) -> Sequence[str]:
                 lhs_names, rhs_names = _eval_lhs_rhs(df, self, other)
                 return [x for x in lhs_names if x in rhs_names]
 
@@ -273,9 +268,7 @@ def names(df: DataFrameT) -> Sequence[str]:
         else:
             return self._to_expr() & other
 
-    def __invert__(
-        self: Self,
-    ) -> CompliantSelector[DataFrameT, SeriesT]:
+    def __invert__(self: Self) -> CompliantSelector[FrameT, SeriesT]:
         return self.selectors.all() - self
 
 

From f32c3487e8f5f78ae88efcf8204655e9140cf28e Mon Sep 17 00:00:00 2001
From: dangotbanned <125183946+dangotbanned@users.noreply.github.com>
Date: Fri, 21 Feb 2025 23:33:15 +0000
Subject: [PATCH 20/55] feat: reimplement `_duckdb.selectors`

- Haven't got a local install
- Expecting to work the same as `_dask` in CI
---
 narwhals/_duckdb/namespace.py |   3 +
 narwhals/_duckdb/selectors.py | 228 ++++++----------------------------
 2 files changed, 42 insertions(+), 189 deletions(-)

diff --git a/narwhals/_duckdb/namespace.py b/narwhals/_duckdb/namespace.py
index 56b3bf4a4f..fa5a9ee7fd 100644
--- a/narwhals/_duckdb/namespace.py
+++ b/narwhals/_duckdb/namespace.py
@@ -24,6 +24,7 @@
 from narwhals._expression_parsing import combine_alias_output_names
 from narwhals._expression_parsing import combine_evaluate_output_names
 from narwhals.typing import CompliantNamespace
+from narwhals.utils import Implementation
 from narwhals.utils import get_column_names
 
 if TYPE_CHECKING:
@@ -36,6 +37,8 @@
 
 
 class DuckDBNamespace(CompliantNamespace["duckdb.Expression"]):  # type: ignore[type-var]
+    _implementation: Implementation = Implementation.DUCKDB
+
     def __init__(
         self: Self, *, backend_version: tuple[int, ...], version: Version
     ) -> None:
diff --git a/narwhals/_duckdb/selectors.py b/narwhals/_duckdb/selectors.py
index 254f41152a..5b4627abac 100644
--- a/narwhals/_duckdb/selectors.py
+++ b/narwhals/_duckdb/selectors.py
@@ -1,134 +1,61 @@
 from __future__ import annotations
 
-import re
 from typing import TYPE_CHECKING
-from typing import Any
-from typing import Callable
-from typing import Iterable
-from typing import Sequence
+from typing import Iterator
 
 from duckdb import ColumnExpression
 
 from narwhals._duckdb.expr import DuckDBExpr
-from narwhals.utils import _parse_time_unit_and_time_zone
-from narwhals.utils import dtype_matches_time_unit_and_time_zone
-from narwhals.utils import get_column_names
-from narwhals.utils import import_dtypes_module
+from narwhals._selectors import CompliantSelector
+from narwhals._selectors import LazySelectorNamespace
 
 if TYPE_CHECKING:
-    from datetime import timezone
-
     import duckdb
     from typing_extensions import Self
 
     from narwhals._duckdb.dataframe import DuckDBLazyFrame
-    from narwhals.dtypes import DType
-    from narwhals.typing import TimeUnit
-    from narwhals.utils import _LimitedContext
-
+    from narwhals._selectors import EvalNames
+    from narwhals._selectors import EvalSeries
+    from narwhals.utils import _FullContext
+
+
+class DuckDBSelectorNamespace(
+    LazySelectorNamespace["DuckDBLazyFrame", "duckdb.Expression"]  # type: ignore[type-var]
+):
+    def _iter_columns(self, df: DuckDBLazyFrame) -> Iterator[duckdb.Expression]:
+        for col in df.columns:
+            yield ColumnExpression(col)
+
+    def _selector(
+        self,
+        context: _FullContext,
+        call: EvalSeries[DuckDBLazyFrame, duckdb.Expression],  # type: ignore[type-var]
+        evaluate_output_names: EvalNames[DuckDBLazyFrame],
+        /,
+    ) -> CompliantSelector[DuckDBLazyFrame, duckdb.Expression]:  # type: ignore[type-var]
+        return DuckDBSelector(
+            call,
+            function_name="selector",
+            evaluate_output_names=evaluate_output_names,
+            alias_output_names=None,
+            backend_version=context._backend_version,
+            version=context._version,
+        )
 
-class DuckDBSelectorNamespace:
-    def __init__(self: Self, context: _LimitedContext, /) -> None:
+    def __init__(self: Self, context: _FullContext, /) -> None:
+        self._implementation = context._implementation
         self._backend_version = context._backend_version
         self._version = context._version
 
-    def by_dtype(self: Self, dtypes: Iterable[DType | type[DType]]) -> DuckDBSelector:
-        def func(df: DuckDBLazyFrame) -> list[duckdb.Expression]:
-            return [
-                ColumnExpression(col) for col in df.columns if df.schema[col] in dtypes
-            ]
-
-        def evaluate_output_names(df: DuckDBLazyFrame) -> Sequence[str]:
-            return [col for col in df.columns if df.schema[col] in dtypes]
-
-        return selector(self, func, evaluate_output_names)
-
-    def matches(self: Self, pattern: str) -> DuckDBSelector:
-        def func(df: DuckDBLazyFrame) -> list[duckdb.Expression]:
-            return [
-                ColumnExpression(col) for col in df.columns if re.search(pattern, col)
-            ]
-
-        def evaluate_output_names(df: DuckDBLazyFrame) -> Sequence[str]:
-            return [col for col in df.columns if re.search(pattern, col)]
-
-        return selector(self, func, evaluate_output_names)
-
-    def numeric(self: Self) -> DuckDBSelector:
-        dtypes = import_dtypes_module(self._version)
-        return self.by_dtype(
-            {
-                dtypes.Int128,
-                dtypes.Int64,
-                dtypes.Int32,
-                dtypes.Int16,
-                dtypes.Int8,
-                dtypes.UInt128,
-                dtypes.UInt64,
-                dtypes.UInt32,
-                dtypes.UInt16,
-                dtypes.UInt8,
-                dtypes.Float64,
-                dtypes.Float32,
-            },
-        )
-
-    def categorical(self: Self) -> DuckDBSelector:  # pragma: no cover
-        dtypes = import_dtypes_module(self._version)
-        return self.by_dtype({dtypes.Categorical})
-
-    def string(self: Self) -> DuckDBSelector:
-        dtypes = import_dtypes_module(self._version)
-        return self.by_dtype({dtypes.String})
-
-    def boolean(self: Self) -> DuckDBSelector:
-        dtypes = import_dtypes_module(self._version)
-        return self.by_dtype({dtypes.Boolean})
-
-    def all(self: Self) -> DuckDBSelector:
-        def func(df: DuckDBLazyFrame) -> list[duckdb.Expression]:
-            return [ColumnExpression(col) for col in df.columns]
-
-        return selector(self, func, get_column_names)
-
-    def datetime(
-        self: Self,
-        time_unit: TimeUnit | Iterable[TimeUnit] | None,
-        time_zone: str | timezone | Iterable[str | timezone | None] | None,
-    ) -> DuckDBSelector:
-        dtypes = import_dtypes_module(version=self._version)
-        time_units, time_zones = _parse_time_unit_and_time_zone(
-            time_unit=time_unit, time_zone=time_zone
-        )
-
-        def func(df: DuckDBLazyFrame) -> list[duckdb.Expression]:
-            return [
-                ColumnExpression(col)
-                for col in df.columns
-                if dtype_matches_time_unit_and_time_zone(
-                    dtype=df.schema[col],
-                    dtypes=dtypes,
-                    time_units=time_units,
-                    time_zones=time_zones,
-                )
-            ]
 
-        def evaluate_output_names(df: DuckDBLazyFrame) -> Sequence[str]:
-            return [
-                col
-                for col in df.columns
-                if dtype_matches_time_unit_and_time_zone(
-                    dtype=df.schema[col],
-                    dtypes=dtypes,
-                    time_units=time_units,
-                    time_zones=time_zones,
-                )
-            ]
+class DuckDBSelector(  # type: ignore[misc]
+    CompliantSelector["DuckDBLazyFrame", "duckdb.Expression"],  # type: ignore[type-var]
+    DuckDBExpr,
+):
+    @property
+    def selectors(self) -> DuckDBSelectorNamespace:
+        return DuckDBSelectorNamespace(self)
 
-        return selector(self, func, evaluate_output_names)
-
-
-class DuckDBSelector(DuckDBExpr):
     def __repr__(self: Self) -> str:  # pragma: no cover
         return f"DuckDBSelector(function_name={self._function_name})"
 
@@ -141,80 +68,3 @@ def _to_expr(self: Self) -> DuckDBExpr:
             backend_version=self._backend_version,
             version=self._version,
         )
-
-    def __sub__(self: Self, other: DuckDBSelector | Any) -> DuckDBSelector | Any:
-        if isinstance(other, DuckDBSelector):
-
-            def call(df: DuckDBLazyFrame) -> list[duckdb.Expression]:
-                lhs_names = self._evaluate_output_names(df)
-                rhs_names = other._evaluate_output_names(df)
-                lhs = self._call(df)
-                return [x for x, name in zip(lhs, lhs_names) if name not in rhs_names]
-
-            def evaluate_output_names(df: DuckDBLazyFrame) -> list[str]:
-                lhs_names = self._evaluate_output_names(df)
-                rhs_names = other._evaluate_output_names(df)
-                return [x for x in lhs_names if x not in rhs_names]
-
-            return selector(self, call, evaluate_output_names)
-        else:
-            return self._to_expr() - other
-
-    def __or__(self: Self, other: DuckDBSelector | Any) -> DuckDBSelector | Any:
-        if isinstance(other, DuckDBSelector):
-
-            def call(df: DuckDBLazyFrame) -> list[duckdb.Expression]:
-                lhs_names = self._evaluate_output_names(df)
-                rhs_names = other._evaluate_output_names(df)
-                lhs = self._call(df)
-                rhs = other._call(df)
-                return [
-                    *(x for x, name in zip(lhs, lhs_names) if name not in rhs_names),
-                    *rhs,
-                ]
-
-            def evaluate_output_names(df: DuckDBLazyFrame) -> list[str]:
-                lhs_names = self._evaluate_output_names(df)
-                rhs_names = other._evaluate_output_names(df)
-                return [*(x for x in lhs_names if x not in rhs_names), *rhs_names]
-
-            return selector(self, call, evaluate_output_names)
-        else:
-            return self._to_expr() | other
-
-    def __and__(self: Self, other: DuckDBSelector | Any) -> DuckDBSelector | Any:
-        if isinstance(other, DuckDBSelector):
-
-            def call(df: DuckDBLazyFrame) -> list[duckdb.Expression]:
-                lhs_names = self._evaluate_output_names(df)
-                rhs_names = other._evaluate_output_names(df)
-                lhs = self._call(df)
-                return [x for x, name in zip(lhs, lhs_names) if name in rhs_names]
-
-            def evaluate_output_names(df: DuckDBLazyFrame) -> list[str]:
-                lhs_names = self._evaluate_output_names(df)
-                rhs_names = other._evaluate_output_names(df)
-                return [x for x in lhs_names if x in rhs_names]
-
-            return selector(self, call, evaluate_output_names)
-        else:
-            return self._to_expr() & other
-
-    def __invert__(self: Self) -> DuckDBSelector:
-        return DuckDBSelectorNamespace(self).all() - self
-
-
-def selector(
-    context: _LimitedContext,
-    call: Callable[[DuckDBLazyFrame], Sequence[duckdb.Expression]],
-    evaluate_output_names: Callable[[DuckDBLazyFrame], Sequence[str]],
-    /,
-) -> DuckDBSelector:
-    return DuckDBSelector(
-        call,
-        function_name="selector",
-        evaluate_output_names=evaluate_output_names,
-        alias_output_names=None,
-        backend_version=context._backend_version,
-        version=context._version,
-    )

From d785eb395abfd26e4360f575285d89bd9fc46371 Mon Sep 17 00:00:00 2001
From: dangotbanned <125183946+dangotbanned@users.noreply.github.com>
Date: Fri, 21 Feb 2025 23:44:45 +0000
Subject: [PATCH 21/55] fix: guard against back-compat `duckdb`

https://github.com/narwhals-dev/narwhals/actions/runs/13467042968/job/37634856612?pr=2064
---
 narwhals/_selectors.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/narwhals/_selectors.py b/narwhals/_selectors.py
index 37429bdc70..47b71cec3b 100644
--- a/narwhals/_selectors.py
+++ b/narwhals/_selectors.py
@@ -114,8 +114,8 @@ def matches(self: Self, pattern: str) -> CompliantSelector[FrameT, SeriesT]:
         p = re.compile(pattern)
 
         def series(df: FrameT) -> Sequence[SeriesT]:
-            # NOTE: Possibly cheaper than lazyframe?
-            if is_compliant_dataframe(df):
+            # NOTE: https://github.com/narwhals-dev/narwhals/actions/runs/13467042968/job/37634856612?pr=2064
+            if is_compliant_dataframe(df) and not self._implementation.is_duckdb():
                 return [df.get_column(col) for col in df.columns if p.search(col)]
 
             return [ser for ser, name in self._iter_columns_names(df) if p.search(name)]

From 90229c528350948f8d6d3d11d202b0803a23b1e1 Mon Sep 17 00:00:00 2001
From: dangotbanned <125183946+dangotbanned@users.noreply.github.com>
Date: Fri, 21 Feb 2025 23:46:35 +0000
Subject: [PATCH 22/55] feat: reimplement `_spark_like.selectors`

---
 narwhals/_spark_like/selectors.py | 214 +++++-------------------------
 1 file changed, 31 insertions(+), 183 deletions(-)

diff --git a/narwhals/_spark_like/selectors.py b/narwhals/_spark_like/selectors.py
index 95ad0407d1..1115dee13c 100644
--- a/narwhals/_spark_like/selectors.py
+++ b/narwhals/_spark_like/selectors.py
@@ -1,129 +1,55 @@
 from __future__ import annotations
 
-import re
 from typing import TYPE_CHECKING
-from typing import Any
-from typing import Callable
-from typing import Iterable
-from typing import Sequence
+from typing import Iterator
 
+from narwhals._selectors import CompliantSelector
+from narwhals._selectors import LazySelectorNamespace
 from narwhals._spark_like.expr import SparkLikeExpr
-from narwhals.utils import _parse_time_unit_and_time_zone
-from narwhals.utils import dtype_matches_time_unit_and_time_zone
-from narwhals.utils import get_column_names
-from narwhals.utils import import_dtypes_module
 
 if TYPE_CHECKING:
-    from datetime import timezone
-
     from pyspark.sql import Column
     from typing_extensions import Self
 
+    from narwhals._selectors import EvalNames
+    from narwhals._selectors import EvalSeries
     from narwhals._spark_like.dataframe import SparkLikeLazyFrame
-    from narwhals.dtypes import DType
-    from narwhals.typing import TimeUnit
     from narwhals.utils import _FullContext
 
 
-class SparkLikeSelectorNamespace:
-    def __init__(self: Self, context: _FullContext, /) -> None:
-        self._backend_version = context._backend_version
-        self._version = context._version
-        self._implementation = context._implementation
-
-    def by_dtype(self: Self, dtypes: Iterable[DType | type[DType]]) -> SparkLikeSelector:
-        def func(df: SparkLikeLazyFrame) -> list[Column]:
-            return [df._F.col(col) for col in df.columns if df.schema[col] in dtypes]
-
-        def evaluate_output_names(df: SparkLikeLazyFrame) -> Sequence[str]:
-            return [col for col in df.columns if df.schema[col] in dtypes]
-
-        return selector(self, func, evaluate_output_names)
-
-    def matches(self: Self, pattern: str) -> SparkLikeSelector:
-        def func(df: SparkLikeLazyFrame) -> list[Column]:
-            return [df._F.col(col) for col in df.columns if re.search(pattern, col)]
-
-        def evaluate_output_names(df: SparkLikeLazyFrame) -> Sequence[str]:
-            return [col for col in df.columns if re.search(pattern, col)]
+class SparkLikeSelectorNamespace(LazySelectorNamespace["SparkLikeLazyFrame", "Column"]):
+    def _iter_columns(self, df: SparkLikeLazyFrame) -> Iterator[Column]:
+        for col in df.columns:
+            yield df._F.col(col)
 
-        return selector(self, func, evaluate_output_names)
-
-    def numeric(self: Self) -> SparkLikeSelector:
-        dtypes = import_dtypes_module(self._version)
-        return self.by_dtype(
-            {
-                dtypes.Int128,
-                dtypes.Int64,
-                dtypes.Int32,
-                dtypes.Int16,
-                dtypes.Int8,
-                dtypes.UInt128,
-                dtypes.UInt64,
-                dtypes.UInt32,
-                dtypes.UInt16,
-                dtypes.UInt8,
-                dtypes.Float64,
-                dtypes.Float32,
-            },
-        )
-
-    def categorical(self: Self) -> SparkLikeSelector:
-        dtypes = import_dtypes_module(self._version)
-        return self.by_dtype({dtypes.Categorical})
-
-    def string(self: Self) -> SparkLikeSelector:
-        dtypes = import_dtypes_module(self._version)
-        return self.by_dtype({dtypes.String})
-
-    def boolean(self: Self) -> SparkLikeSelector:
-        dtypes = import_dtypes_module(self._version)
-        return self.by_dtype({dtypes.Boolean})
-
-    def all(self: Self) -> SparkLikeSelector:
-        def func(df: SparkLikeLazyFrame) -> list[Column]:
-            return [df._F.col(col) for col in df.columns]
-
-        return selector(self, func, get_column_names)
-
-    def datetime(
-        self: Self,
-        time_unit: TimeUnit | Iterable[TimeUnit] | None,
-        time_zone: str | timezone | Iterable[str | timezone | None] | None,
+    def _selector(
+        self,
+        context: _FullContext,
+        call: EvalSeries[SparkLikeLazyFrame, Column],
+        evaluate_output_names: EvalNames[SparkLikeLazyFrame],
+        /,
     ) -> SparkLikeSelector:
-        dtypes = import_dtypes_module(version=self._version)
-        time_units, time_zones = _parse_time_unit_and_time_zone(
-            time_unit=time_unit, time_zone=time_zone
+        return SparkLikeSelector(
+            call,
+            function_name="selector",
+            evaluate_output_names=evaluate_output_names,
+            alias_output_names=None,
+            backend_version=context._backend_version,
+            version=context._version,
+            implementation=context._implementation,
         )
 
-        def func(df: SparkLikeLazyFrame) -> list[Column]:
-            return [
-                df._F.col(col)
-                for col in df.columns
-                if dtype_matches_time_unit_and_time_zone(
-                    dtype=df.schema[col],
-                    dtypes=dtypes,
-                    time_units=time_units,
-                    time_zones=time_zones,
-                )
-            ]
-
-        def evaluate_output_names(df: SparkLikeLazyFrame) -> Sequence[str]:
-            return [
-                col
-                for col in df.columns
-                if dtype_matches_time_unit_and_time_zone(
-                    dtype=df.schema[col],
-                    dtypes=dtypes,
-                    time_units=time_units,
-                    time_zones=time_zones,
-                )
-            ]
+    def __init__(self: Self, context: _FullContext, /) -> None:
+        self._backend_version = context._backend_version
+        self._version = context._version
+        self._implementation = context._implementation
 
-        return selector(self, func, evaluate_output_names)
 
+class SparkLikeSelector(CompliantSelector["SparkLikeLazyFrame", "Column"], SparkLikeExpr):  # type: ignore[misc]
+    @property
+    def selectors(self: Self) -> SparkLikeSelectorNamespace:
+        return SparkLikeSelectorNamespace(self)
 
-class SparkLikeSelector(SparkLikeExpr):
     def __repr__(self: Self) -> str:  # pragma: no cover
         return f"SparkLikeSelector(function_name={self._function_name})"
 
@@ -137,81 +63,3 @@ def _to_expr(self: Self) -> SparkLikeExpr:
             version=self._version,
             implementation=self._implementation,
         )
-
-    def __sub__(self: Self, other: SparkLikeSelector | Any) -> SparkLikeSelector | Any:
-        if isinstance(other, SparkLikeSelector):
-
-            def call(df: SparkLikeLazyFrame) -> list[Column]:
-                lhs_names = self._evaluate_output_names(df)
-                rhs_names = other._evaluate_output_names(df)
-                lhs = self._call(df)
-                return [x for x, name in zip(lhs, lhs_names) if name not in rhs_names]
-
-            def evaluate_output_names(df: SparkLikeLazyFrame) -> list[str]:
-                lhs_names = self._evaluate_output_names(df)
-                rhs_names = other._evaluate_output_names(df)
-                return [x for x in lhs_names if x not in rhs_names]
-
-            return selector(self, call, evaluate_output_names)
-        else:
-            return self._to_expr() - other
-
-    def __or__(self: Self, other: SparkLikeSelector | Any) -> SparkLikeSelector | Any:
-        if isinstance(other, SparkLikeSelector):
-
-            def call(df: SparkLikeLazyFrame) -> list[Column]:
-                lhs_names = self._evaluate_output_names(df)
-                rhs_names = other._evaluate_output_names(df)
-                lhs = self._call(df)
-                rhs = other._call(df)
-                return [
-                    *(x for x, name in zip(lhs, lhs_names) if name not in rhs_names),
-                    *rhs,
-                ]
-
-            def evaluate_output_names(df: SparkLikeLazyFrame) -> list[str]:
-                lhs_names = self._evaluate_output_names(df)
-                rhs_names = other._evaluate_output_names(df)
-                return [*(x for x in lhs_names if x not in rhs_names), *rhs_names]
-
-            return selector(self, call, evaluate_output_names)
-        else:
-            return self._to_expr() | other
-
-    def __and__(self: Self, other: SparkLikeSelector | Any) -> SparkLikeSelector | Any:
-        if isinstance(other, SparkLikeSelector):
-
-            def call(df: SparkLikeLazyFrame) -> list[Column]:
-                lhs_names = self._evaluate_output_names(df)
-                rhs_names = other._evaluate_output_names(df)
-                lhs = self._call(df)
-                return [x for x, name in zip(lhs, lhs_names) if name in rhs_names]
-
-            def evaluate_output_names(df: SparkLikeLazyFrame) -> list[str]:
-                lhs_names = self._evaluate_output_names(df)
-                rhs_names = other._evaluate_output_names(df)
-                return [x for x in lhs_names if x in rhs_names]
-
-            return selector(self, call, evaluate_output_names)
-        else:
-            return self._to_expr() & other
-
-    def __invert__(self: Self) -> SparkLikeSelector:
-        return SparkLikeSelectorNamespace(self).all() - self
-
-
-def selector(
-    context: _FullContext,
-    call: Callable[[SparkLikeLazyFrame], Sequence[Column]],
-    evaluate_output_names: Callable[[SparkLikeLazyFrame], Sequence[str]],
-    /,
-) -> SparkLikeSelector:
-    return SparkLikeSelector(
-        call,
-        function_name="selector",
-        evaluate_output_names=evaluate_output_names,
-        alias_output_names=None,
-        backend_version=context._backend_version,
-        version=context._version,
-        implementation=context._implementation,
-    )

From 180b9a341f06729707052c39217660aa55e3a40c Mon Sep 17 00:00:00 2001
From: dangotbanned <125183946+dangotbanned@users.noreply.github.com>
Date: Fri, 21 Feb 2025 23:47:46 +0000
Subject: [PATCH 23/55] chore: remove duplicate import

---
 narwhals/_dask/selectors.py | 1 -
 1 file changed, 1 deletion(-)

diff --git a/narwhals/_dask/selectors.py b/narwhals/_dask/selectors.py
index ee85158758..7a9ffaa594 100644
--- a/narwhals/_dask/selectors.py
+++ b/narwhals/_dask/selectors.py
@@ -3,7 +3,6 @@
 from typing import TYPE_CHECKING
 from typing import Iterator
 
-from narwhals._dask.dataframe import DaskLazyFrame
 from narwhals._dask.expr import DaskExpr
 from narwhals._selectors import CompliantSelector
 from narwhals._selectors import LazySelectorNamespace

From bfed23f58f2343eeaadd7ec6036fe9178a31f257 Mon Sep 17 00:00:00 2001
From: dangotbanned <125183946+dangotbanned@users.noreply.github.com>
Date: Sat, 22 Feb 2025 10:14:56 +0000
Subject: [PATCH 24/55] chore(typing): ignore valid `_dask.selectors` warnings

- Only showing locally when `dask` is installed
- Hoping it doesn't show in CI as unused
- Can't fix until #2044
---
 narwhals/_dask/selectors.py | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/narwhals/_dask/selectors.py b/narwhals/_dask/selectors.py
index 7a9ffaa594..4b797a6c87 100644
--- a/narwhals/_dask/selectors.py
+++ b/narwhals/_dask/selectors.py
@@ -26,7 +26,7 @@
         import dask_expr as dx
 
 
-class DaskSelectorNamespace(LazySelectorNamespace["DaskLazyFrame", "dx.Series"]):
+class DaskSelectorNamespace(LazySelectorNamespace["DaskLazyFrame", "dx.Series"]):  # pyright: ignore[reportInvalidTypeArguments]
     def _iter_columns(self, df: DaskLazyFrame) -> Iterator[dx.Series]:
         for _col, ser in df._native_frame.items():  # noqa: PERF102
             yield ser
@@ -34,10 +34,10 @@ def _iter_columns(self, df: DaskLazyFrame) -> Iterator[dx.Series]:
     def _selector(
         self,
         context: _FullContext,
-        call: EvalSeries[DaskLazyFrame, dx.Series],
+        call: EvalSeries[DaskLazyFrame, dx.Series],  # pyright: ignore[reportInvalidTypeForm]
         evaluate_output_names: EvalNames[DaskLazyFrame],
         /,
-    ) -> CompliantSelector[DaskLazyFrame, dx.Series]:
+    ) -> CompliantSelector[DaskLazyFrame, dx.Series]:  # pyright: ignore[reportInvalidTypeArguments]
         return DaskSelector(
             call,
             depth=0,

From f060f220704d2e27ba8010360da92d0bea684588 Mon Sep 17 00:00:00 2001
From: dangotbanned <125183946+dangotbanned@users.noreply.github.com>
Date: Sat, 22 Feb 2025 11:02:00 +0000
Subject: [PATCH 25/55] refactor: define `CompliantSelector.__repr__`

Makes each backend simpler, and avoids typos like in `PandasSelector.__repr__`
---
 narwhals/_arrow/selectors.py       | 3 ---
 narwhals/_dask/selectors.py        | 3 ---
 narwhals/_duckdb/selectors.py      | 3 ---
 narwhals/_pandas_like/selectors.py | 5 -----
 narwhals/_selectors.py             | 6 +++++-
 narwhals/_spark_like/selectors.py  | 3 ---
 narwhals/utils.py                  | 9 +++++++++
 7 files changed, 14 insertions(+), 18 deletions(-)

diff --git a/narwhals/_arrow/selectors.py b/narwhals/_arrow/selectors.py
index 408e753d55..7c7f98b796 100644
--- a/narwhals/_arrow/selectors.py
+++ b/narwhals/_arrow/selectors.py
@@ -54,9 +54,6 @@ class ArrowSelector(CompliantSelector["ArrowDataFrame", "ArrowSeries"], ArrowExp
     def selectors(self) -> ArrowSelectorNamespace:
         return ArrowSelectorNamespace(self)
 
-    def __repr__(self: Self) -> str:  # pragma: no cover
-        return f"ArrowSelector(depth={self._depth}, function_name={self._function_name})"
-
     def _to_expr(self: Self) -> ArrowExpr:
         return ArrowExpr(
             self._call,
diff --git a/narwhals/_dask/selectors.py b/narwhals/_dask/selectors.py
index 4b797a6c87..28c1a18e93 100644
--- a/narwhals/_dask/selectors.py
+++ b/narwhals/_dask/selectors.py
@@ -59,9 +59,6 @@ class DaskSelector(CompliantSelector["DaskLazyFrame", "dx.Series"], DaskExpr):
     def selectors(self) -> DaskSelectorNamespace:
         return DaskSelectorNamespace(self)
 
-    def __repr__(self: Self) -> str:  # pragma: no cover
-        return f"DaskSelector(depth={self._depth}, function_name={self._function_name})"
-
     def _to_expr(self: Self) -> DaskExpr:
         return DaskExpr(
             self._call,
diff --git a/narwhals/_duckdb/selectors.py b/narwhals/_duckdb/selectors.py
index 5b4627abac..8ff21829eb 100644
--- a/narwhals/_duckdb/selectors.py
+++ b/narwhals/_duckdb/selectors.py
@@ -56,9 +56,6 @@ class DuckDBSelector(  # type: ignore[misc]
     def selectors(self) -> DuckDBSelectorNamespace:
         return DuckDBSelectorNamespace(self)
 
-    def __repr__(self: Self) -> str:  # pragma: no cover
-        return f"DuckDBSelector(function_name={self._function_name})"
-
     def _to_expr(self: Self) -> DuckDBExpr:
         return DuckDBExpr(
             self._call,
diff --git a/narwhals/_pandas_like/selectors.py b/narwhals/_pandas_like/selectors.py
index d764b80751..49095106ec 100644
--- a/narwhals/_pandas_like/selectors.py
+++ b/narwhals/_pandas_like/selectors.py
@@ -69,11 +69,6 @@ class PandasSelector(  # type: ignore[misc]
     def selectors(self) -> PandasSelectorNamespace:
         return PandasSelectorNamespace(self)
 
-    def __repr__(self) -> str:  # pragma: no cover
-        return (
-            f"PandasSelector(depth={self._depth}, function_name={self._function_name}, "
-        )
-
     def _to_expr(self: Self) -> PandasLikeExpr:
         return PandasLikeExpr(
             self._call,
diff --git a/narwhals/_selectors.py b/narwhals/_selectors.py
index 47b71cec3b..cfd71cdcaa 100644
--- a/narwhals/_selectors.py
+++ b/narwhals/_selectors.py
@@ -24,6 +24,7 @@
 from narwhals.utils import get_column_names
 from narwhals.utils import import_dtypes_module
 from narwhals.utils import is_compliant_dataframe
+from narwhals.utils import is_tracks_depth
 
 if TYPE_CHECKING:
     from datetime import timezone
@@ -192,7 +193,6 @@ def _iter_columns_dtypes(self, df: FrameT, /) -> Iterator[tuple[SeriesT, DType]]
 class CompliantSelector(CompliantExpr[SeriesT], Generic[FrameT, SeriesT], Protocol):
     @property
     def selectors(self) -> CompliantSelectorNamespace[FrameT, SeriesT]: ...
-    def __repr__(self: Self) -> str: ...
     def _to_expr(self: Self) -> CompliantExpr[SeriesT]: ...
 
     def _is_selector(
@@ -271,6 +271,10 @@ def names(df: FrameT) -> Sequence[str]:
     def __invert__(self: Self) -> CompliantSelector[FrameT, SeriesT]:
         return self.selectors.all() - self
 
+    def __repr__(self: Self) -> str:  # pragma: no cover
+        s = f"depth={self._depth}, " if is_tracks_depth(self._implementation) else ""
+        return f"{type(self).__name__}({s}function_name={self._function_name})"
+
 
 # NOTE: Should probably be a `DataFrame` method
 # Using `Expr` because this doesn't require `Selector` attrs/methods
diff --git a/narwhals/_spark_like/selectors.py b/narwhals/_spark_like/selectors.py
index 1115dee13c..631419b061 100644
--- a/narwhals/_spark_like/selectors.py
+++ b/narwhals/_spark_like/selectors.py
@@ -50,9 +50,6 @@ class SparkLikeSelector(CompliantSelector["SparkLikeLazyFrame", "Column"], Spark
     def selectors(self: Self) -> SparkLikeSelectorNamespace:
         return SparkLikeSelectorNamespace(self)
 
-    def __repr__(self: Self) -> str:  # pragma: no cover
-        return f"SparkLikeSelector(function_name={self._function_name})"
-
     def _to_expr(self: Self) -> SparkLikeExpr:
         return SparkLikeExpr(
             self._call,
diff --git a/narwhals/utils.py b/narwhals/utils.py
index bddc9ad1cd..6b526e0860 100644
--- a/narwhals/utils.py
+++ b/narwhals/utils.py
@@ -10,6 +10,7 @@
 from typing import TYPE_CHECKING
 from typing import Any
 from typing import Iterable
+from typing import Literal
 from typing import Sequence
 from typing import TypeVar
 from typing import Union
@@ -45,6 +46,7 @@
 
     import pandas as pd
     from typing_extensions import Self
+    from typing_extensions import TypeAlias
     from typing_extensions import TypeIs
 
     from narwhals.dataframe import DataFrame
@@ -72,6 +74,8 @@
     _T2 = TypeVar("_T2")
     _T3 = TypeVar("_T3")
 
+    _TracksDepth: TypeAlias = "Literal[Implementation.DASK,Implementation.CUDF,Implementation.MODIN,Implementation.PANDAS,Implementation.PYSPARK]"
+
     class _SupportsVersion(Protocol):
         __version__: str
 
@@ -1339,3 +1343,8 @@ def has_native_namespace(obj: Any) -> TypeIs[SupportsNativeNamespace]:
 
 def _supports_dataframe_interchange(obj: Any) -> TypeIs[DataFrameLike]:
     return hasattr(obj, "__dataframe__")
+
+
+def is_tracks_depth(obj: Implementation, /) -> TypeIs[_TracksDepth]:  # pragma: no cover
+    """Return `True` for implementations that utilize `CompliantExpr._depth`."""
+    return obj.is_pandas_like() or obj in {Implementation.PYARROW, Implementation.DASK}

From 3c5ea87d268dbb03633232e289e61c4f4c837543 Mon Sep 17 00:00:00 2001
From: dangotbanned <125183946+dangotbanned@users.noreply.github.com>
Date: Sat, 22 Feb 2025 11:09:58 +0000
Subject: [PATCH 26/55] refactor(typing): always use concrete class in
 `_selector() -> ...`

- `SparkLikeSelector` already has it
- No need to keep these generic, after specializing
---
 narwhals/_arrow/selectors.py       | 2 +-
 narwhals/_dask/selectors.py        | 2 +-
 narwhals/_duckdb/selectors.py      | 2 +-
 narwhals/_pandas_like/selectors.py | 2 +-
 4 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/narwhals/_arrow/selectors.py b/narwhals/_arrow/selectors.py
index 7c7f98b796..ad5f74dfeb 100644
--- a/narwhals/_arrow/selectors.py
+++ b/narwhals/_arrow/selectors.py
@@ -32,7 +32,7 @@ def _selector(
         call: EvalSeries[ArrowDataFrame, ArrowSeries],
         evaluate_output_names: EvalNames[ArrowDataFrame],
         /,
-    ) -> CompliantSelector[ArrowDataFrame, ArrowSeries]:
+    ) -> ArrowSelector:
         return ArrowSelector(
             call,
             depth=0,
diff --git a/narwhals/_dask/selectors.py b/narwhals/_dask/selectors.py
index 28c1a18e93..08fc62ea81 100644
--- a/narwhals/_dask/selectors.py
+++ b/narwhals/_dask/selectors.py
@@ -37,7 +37,7 @@ def _selector(
         call: EvalSeries[DaskLazyFrame, dx.Series],  # pyright: ignore[reportInvalidTypeForm]
         evaluate_output_names: EvalNames[DaskLazyFrame],
         /,
-    ) -> CompliantSelector[DaskLazyFrame, dx.Series]:  # pyright: ignore[reportInvalidTypeArguments]
+    ) -> DaskSelector:
         return DaskSelector(
             call,
             depth=0,
diff --git a/narwhals/_duckdb/selectors.py b/narwhals/_duckdb/selectors.py
index 8ff21829eb..1eef4a2a17 100644
--- a/narwhals/_duckdb/selectors.py
+++ b/narwhals/_duckdb/selectors.py
@@ -32,7 +32,7 @@ def _selector(
         call: EvalSeries[DuckDBLazyFrame, duckdb.Expression],  # type: ignore[type-var]
         evaluate_output_names: EvalNames[DuckDBLazyFrame],
         /,
-    ) -> CompliantSelector[DuckDBLazyFrame, duckdb.Expression]:  # type: ignore[type-var]
+    ) -> DuckDBSelector:
         return DuckDBSelector(
             call,
             function_name="selector",
diff --git a/narwhals/_pandas_like/selectors.py b/narwhals/_pandas_like/selectors.py
index 49095106ec..407b33a6ae 100644
--- a/narwhals/_pandas_like/selectors.py
+++ b/narwhals/_pandas_like/selectors.py
@@ -44,7 +44,7 @@ def _selector(
         call: EvalSeries[PandasLikeDataFrame, PandasLikeSeries],
         evaluate_output_names: EvalNames[PandasLikeDataFrame],
         /,
-    ) -> CompliantSelector[PandasLikeDataFrame, PandasLikeSeries]:
+    ) -> PandasSelector:
         return PandasSelector(
             call,
             depth=0,

From b30f020545da872aa3b753e5314482dfc965a012 Mon Sep 17 00:00:00 2001
From: dangotbanned <125183946+dangotbanned@users.noreply.github.com>
Date: Sat, 22 Feb 2025 11:28:27 +0000
Subject: [PATCH 27/55] chore: move comments to discussion threads

---
 narwhals/_pandas_like/selectors.py |  3 ---
 narwhals/_selectors.py             | 16 +---------------
 2 files changed, 1 insertion(+), 18 deletions(-)

diff --git a/narwhals/_pandas_like/selectors.py b/narwhals/_pandas_like/selectors.py
index 407b33a6ae..311c37bfca 100644
--- a/narwhals/_pandas_like/selectors.py
+++ b/narwhals/_pandas_like/selectors.py
@@ -32,9 +32,6 @@ def _iter_columns(self, df: PandasLikeDataFrame) -> Iterator[PandasLikeSeries]:
             backend_version=df._backend_version,
             version=df._version,
         )
-        # NOTE: (PERF102) is a false-positive
-        # .items() -> (str, pd.Series)
-        # .values() -> np.ndarray
         for _col, ser in df._native_frame.items():  # noqa: PERF102
             yield series(ser)
 
diff --git a/narwhals/_selectors.py b/narwhals/_selectors.py
index cfd71cdcaa..f8643bced6 100644
--- a/narwhals/_selectors.py
+++ b/narwhals/_selectors.py
@@ -1,7 +1,4 @@
-"""Almost entirely complete, generic `selectors` implementation.
-
-- Focusing on eager-only for now
-"""
+"""Almost entirely complete, generic `selectors` implementation."""
 
 from __future__ import annotations
 
@@ -42,7 +39,6 @@
     from narwhals.utils import Version
     from narwhals.utils import _FullContext
 
-    # NOTE: Plugging the gap of this not being defined in `CompliantSeries`
     class CompliantSeriesWithDType(CompliantSeries, Protocol):
         @property
         def dtype(self) -> DType: ...
@@ -55,15 +51,11 @@ def dtype(self) -> DType: ...
 EvalNames: TypeAlias = Callable[[FrameT], Sequence[str]]
 
 
-# NOTE: Pretty much finished generic for eager backends
 class CompliantSelectorNamespace(Generic[FrameT, SeriesT], Protocol):
     _implementation: Implementation
     _backend_version: tuple[int, ...]
     _version: Version
 
-    # TODO @dangotbanned: push for adding to public API for `DataFrame`
-    # Only need internally, but it plugs so many holes that it must be useful beyond that
-    # https://docs.pola.rs/api/python/stable/reference/dataframe/api/polars.DataFrame.iter_columns.html
     def _iter_columns(self, df: FrameT, /) -> Iterator[SeriesT]: ...
     def _iter_schema(self, df: FrameT, /) -> Iterator[tuple[str, DType]]:
         for ser in self._iter_columns(df):
@@ -84,9 +76,6 @@ def _selector(
         /,
     ) -> CompliantSelector[FrameT, SeriesT]: ...
 
-    # NOTE: `.dtype` won't return a `nw.DType` (or maybe anything) for lazy backends
-    # - Their `SeriesT` is a native object
-    # - See (https://github.com/narwhals-dev/narwhals/issues/2044)
     def _is_dtype(
         self: CompliantSelectorNamespace[FrameT, SeriesT], dtype: type[DType], /
     ) -> CompliantSelector[FrameT, SeriesT]:
@@ -115,7 +104,6 @@ def matches(self: Self, pattern: str) -> CompliantSelector[FrameT, SeriesT]:
         p = re.compile(pattern)
 
         def series(df: FrameT) -> Sequence[SeriesT]:
-            # NOTE: https://github.com/narwhals-dev/narwhals/actions/runs/13467042968/job/37634856612?pr=2064
             if is_compliant_dataframe(df) and not self._implementation.is_duckdb():
                 return [df.get_column(col) for col in df.columns if p.search(col)]
 
@@ -276,8 +264,6 @@ def __repr__(self: Self) -> str:  # pragma: no cover
         return f"{type(self).__name__}({s}function_name={self._function_name})"
 
 
-# NOTE: Should probably be a `DataFrame` method
-# Using `Expr` because this doesn't require `Selector` attrs/methods
 def _eval_lhs_rhs(
     df: CompliantDataFrame | CompliantLazyFrame, lhs: CompliantExpr, rhs: CompliantExpr
 ) -> tuple[Sequence[str], Sequence[str]]:

From eaa5b1c28ac8a93003825eed78698a22e7bcebab Mon Sep 17 00:00:00 2001
From: dangotbanned <125183946+dangotbanned@users.noreply.github.com>
Date: Sat, 22 Feb 2025 11:30:56 +0000
Subject: [PATCH 28/55] style: order w/ unimplemented methods first

---
 narwhals/_selectors.py | 17 +++++++++--------
 1 file changed, 9 insertions(+), 8 deletions(-)

diff --git a/narwhals/_selectors.py b/narwhals/_selectors.py
index f8643bced6..e81dc44074 100644
--- a/narwhals/_selectors.py
+++ b/narwhals/_selectors.py
@@ -56,7 +56,16 @@ class CompliantSelectorNamespace(Generic[FrameT, SeriesT], Protocol):
     _backend_version: tuple[int, ...]
     _version: Version
 
+    def _selector(
+        self,
+        context: _FullContext,
+        call: EvalSeries[FrameT, SeriesT],
+        evaluate_output_names: EvalNames[FrameT],
+        /,
+    ) -> CompliantSelector[FrameT, SeriesT]: ...
+
     def _iter_columns(self, df: FrameT, /) -> Iterator[SeriesT]: ...
+
     def _iter_schema(self, df: FrameT, /) -> Iterator[tuple[str, DType]]:
         for ser in self._iter_columns(df):
             yield ser.name, ser.dtype
@@ -68,14 +77,6 @@ def _iter_columns_dtypes(self, df: FrameT, /) -> Iterator[tuple[SeriesT, DType]]
     def _iter_columns_names(self, df: FrameT, /) -> Iterator[tuple[SeriesT, str]]:
         yield from zip(self._iter_columns(df), df.columns)
 
-    def _selector(
-        self,
-        context: _FullContext,
-        call: EvalSeries[FrameT, SeriesT],
-        evaluate_output_names: EvalNames[FrameT],
-        /,
-    ) -> CompliantSelector[FrameT, SeriesT]: ...
-
     def _is_dtype(
         self: CompliantSelectorNamespace[FrameT, SeriesT], dtype: type[DType], /
     ) -> CompliantSelector[FrameT, SeriesT]:

From 92a96cd416d98553051454b0f2957ee98c25d0f0 Mon Sep 17 00:00:00 2001
From: dangotbanned <125183946+dangotbanned@users.noreply.github.com>
Date: Sat, 22 Feb 2025 11:37:07 +0000
Subject: [PATCH 29/55] ignore `DAR201`

narwhals/utils.py:1349:1: DAR201 Missing "Returns" in Docstring: - return

https://results.pre-commit.ci/run/github/760058710/1740223863.-EOmgrTQQ7qnlq_vh8nmLw
---
 narwhals/utils.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/narwhals/utils.py b/narwhals/utils.py
index 6b526e0860..18cd2ce3d3 100644
--- a/narwhals/utils.py
+++ b/narwhals/utils.py
@@ -1346,5 +1346,5 @@ def _supports_dataframe_interchange(obj: Any) -> TypeIs[DataFrameLike]:
 
 
 def is_tracks_depth(obj: Implementation, /) -> TypeIs[_TracksDepth]:  # pragma: no cover
-    """Return `True` for implementations that utilize `CompliantExpr._depth`."""
+    """Return `True` for implementations that utilize `CompliantExpr._depth`."""  # flake8: noqa
     return obj.is_pandas_like() or obj in {Implementation.PYARROW, Implementation.DASK}

From 29eb2d02a27c9c61ba75fef8e689711aaf252b76 Mon Sep 17 00:00:00 2001
From: dangotbanned <125183946+dangotbanned@users.noreply.github.com>
Date: Sat, 22 Feb 2025 11:39:25 +0000
Subject: [PATCH 30/55] degrade the DX to satisfy pre-commit :disappointed:

---
 narwhals/utils.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/narwhals/utils.py b/narwhals/utils.py
index 18cd2ce3d3..1124004771 100644
--- a/narwhals/utils.py
+++ b/narwhals/utils.py
@@ -1346,5 +1346,5 @@ def _supports_dataframe_interchange(obj: Any) -> TypeIs[DataFrameLike]:
 
 
 def is_tracks_depth(obj: Implementation, /) -> TypeIs[_TracksDepth]:  # pragma: no cover
-    """Return `True` for implementations that utilize `CompliantExpr._depth`."""  # flake8: noqa
+    # Return `True` for implementations that utilize `CompliantExpr._depth`.
     return obj.is_pandas_like() or obj in {Implementation.PYARROW, Implementation.DASK}

From 547d62a5bf6049fc90a259a35f46afbf2505d9ad Mon Sep 17 00:00:00 2001
From: dangotbanned <125183946+dangotbanned@users.noreply.github.com>
Date: Sat, 22 Feb 2025 12:52:15 +0000
Subject: [PATCH 31/55] feat: reuse & define `CompliantNamespace.selectors`

Typing correctly will require (#2053), so leaving it out for now
---
 narwhals/_arrow/selectors.py       | 4 ----
 narwhals/_dask/selectors.py        | 4 ----
 narwhals/_duckdb/selectors.py      | 4 ----
 narwhals/_pandas_like/selectors.py | 4 ----
 narwhals/_selectors.py             | 4 +++-
 narwhals/_spark_like/selectors.py  | 4 ----
 narwhals/typing.py                 | 3 +++
 7 files changed, 6 insertions(+), 21 deletions(-)

diff --git a/narwhals/_arrow/selectors.py b/narwhals/_arrow/selectors.py
index ad5f74dfeb..6772b7b365 100644
--- a/narwhals/_arrow/selectors.py
+++ b/narwhals/_arrow/selectors.py
@@ -50,10 +50,6 @@ def __init__(self: Self, context: _FullContext, /) -> None:
 
 
 class ArrowSelector(CompliantSelector["ArrowDataFrame", "ArrowSeries"], ArrowExpr):  # type: ignore[misc]
-    @property
-    def selectors(self) -> ArrowSelectorNamespace:
-        return ArrowSelectorNamespace(self)
-
     def _to_expr(self: Self) -> ArrowExpr:
         return ArrowExpr(
             self._call,
diff --git a/narwhals/_dask/selectors.py b/narwhals/_dask/selectors.py
index 08fc62ea81..8c55b3e98d 100644
--- a/narwhals/_dask/selectors.py
+++ b/narwhals/_dask/selectors.py
@@ -55,10 +55,6 @@ def __init__(self: Self, context: _FullContext, /) -> None:
 
 
 class DaskSelector(CompliantSelector["DaskLazyFrame", "dx.Series"], DaskExpr):  # type: ignore[misc]
-    @property
-    def selectors(self) -> DaskSelectorNamespace:
-        return DaskSelectorNamespace(self)
-
     def _to_expr(self: Self) -> DaskExpr:
         return DaskExpr(
             self._call,
diff --git a/narwhals/_duckdb/selectors.py b/narwhals/_duckdb/selectors.py
index 1eef4a2a17..732cc3c6ec 100644
--- a/narwhals/_duckdb/selectors.py
+++ b/narwhals/_duckdb/selectors.py
@@ -52,10 +52,6 @@ class DuckDBSelector(  # type: ignore[misc]
     CompliantSelector["DuckDBLazyFrame", "duckdb.Expression"],  # type: ignore[type-var]
     DuckDBExpr,
 ):
-    @property
-    def selectors(self) -> DuckDBSelectorNamespace:
-        return DuckDBSelectorNamespace(self)
-
     def _to_expr(self: Self) -> DuckDBExpr:
         return DuckDBExpr(
             self._call,
diff --git a/narwhals/_pandas_like/selectors.py b/narwhals/_pandas_like/selectors.py
index 311c37bfca..43e7f10258 100644
--- a/narwhals/_pandas_like/selectors.py
+++ b/narwhals/_pandas_like/selectors.py
@@ -62,10 +62,6 @@ def __init__(self: Self, context: _FullContext, /) -> None:
 class PandasSelector(  # type: ignore[misc]
     CompliantSelector["PandasLikeDataFrame", "PandasLikeSeries"], PandasLikeExpr
 ):
-    @property
-    def selectors(self) -> PandasSelectorNamespace:
-        return PandasSelectorNamespace(self)
-
     def _to_expr(self: Self) -> PandasLikeExpr:
         return PandasLikeExpr(
             self._call,
diff --git a/narwhals/_selectors.py b/narwhals/_selectors.py
index e81dc44074..53cc3b1689 100644
--- a/narwhals/_selectors.py
+++ b/narwhals/_selectors.py
@@ -181,7 +181,9 @@ def _iter_columns_dtypes(self, df: FrameT, /) -> Iterator[tuple[SeriesT, DType]]
 
 class CompliantSelector(CompliantExpr[SeriesT], Generic[FrameT, SeriesT], Protocol):
     @property
-    def selectors(self) -> CompliantSelectorNamespace[FrameT, SeriesT]: ...
+    def selectors(self) -> CompliantSelectorNamespace[FrameT, SeriesT]:
+        return self.__narwhals_namespace__().selectors
+
     def _to_expr(self: Self) -> CompliantExpr[SeriesT]: ...
 
     def _is_selector(
diff --git a/narwhals/_spark_like/selectors.py b/narwhals/_spark_like/selectors.py
index 631419b061..928a803b39 100644
--- a/narwhals/_spark_like/selectors.py
+++ b/narwhals/_spark_like/selectors.py
@@ -46,10 +46,6 @@ def __init__(self: Self, context: _FullContext, /) -> None:
 
 
 class SparkLikeSelector(CompliantSelector["SparkLikeLazyFrame", "Column"], SparkLikeExpr):  # type: ignore[misc]
-    @property
-    def selectors(self: Self) -> SparkLikeSelectorNamespace:
-        return SparkLikeSelectorNamespace(self)
-
     def _to_expr(self: Self) -> SparkLikeExpr:
         return SparkLikeExpr(
             self._call,
diff --git a/narwhals/typing.py b/narwhals/typing.py
index 3b7075cc06..f6c478f0ea 100644
--- a/narwhals/typing.py
+++ b/narwhals/typing.py
@@ -20,6 +20,7 @@
 
     from narwhals import dtypes
     from narwhals._expression_parsing import ExprKind
+    from narwhals._selectors import CompliantSelectorNamespace
     from narwhals.dataframe import DataFrame
     from narwhals.dataframe import LazyFrame
     from narwhals.dtypes import DType
@@ -129,6 +130,8 @@ def col(self, *column_names: str) -> CompliantExpr[CompliantSeriesT_co]: ...
     def lit(
         self, value: Any, dtype: DType | None
     ) -> CompliantExpr[CompliantSeriesT_co]: ...
+    @property
+    def selectors(self) -> CompliantSelectorNamespace[Any, Any]: ...
 
 
 class SupportsNativeNamespace(Protocol):

From e6f2d072ffd0de5e5d1d680fcc555cf7a21fdb19 Mon Sep 17 00:00:00 2001
From: dangotbanned <125183946+dangotbanned@users.noreply.github.com>
Date: Sat, 22 Feb 2025 13:45:27 +0000
Subject: [PATCH 32/55] refactor: use `self` not `context`

Only added in #2057, but not needed now this is a method
---
 narwhals/_arrow/selectors.py       |  5 ++---
 narwhals/_dask/selectors.py        |  5 ++---
 narwhals/_duckdb/selectors.py      |  5 ++---
 narwhals/_pandas_like/selectors.py |  7 +++----
 narwhals/_selectors.py             | 19 +++++++++----------
 narwhals/_spark_like/selectors.py  |  7 +++----
 6 files changed, 21 insertions(+), 27 deletions(-)

diff --git a/narwhals/_arrow/selectors.py b/narwhals/_arrow/selectors.py
index 6772b7b365..8cdc379b2d 100644
--- a/narwhals/_arrow/selectors.py
+++ b/narwhals/_arrow/selectors.py
@@ -28,7 +28,6 @@ def _iter_columns(self, df: ArrowDataFrame) -> Iterator[ArrowSeries]:
 
     def _selector(
         self,
-        context: _FullContext,
         call: EvalSeries[ArrowDataFrame, ArrowSeries],
         evaluate_output_names: EvalNames[ArrowDataFrame],
         /,
@@ -39,8 +38,8 @@ def _selector(
             function_name="selector",
             evaluate_output_names=evaluate_output_names,
             alias_output_names=None,
-            backend_version=context._backend_version,
-            version=context._version,
+            backend_version=self._backend_version,
+            version=self._version,
         )
 
     def __init__(self: Self, context: _FullContext, /) -> None:
diff --git a/narwhals/_dask/selectors.py b/narwhals/_dask/selectors.py
index 8c55b3e98d..bd42fc76c7 100644
--- a/narwhals/_dask/selectors.py
+++ b/narwhals/_dask/selectors.py
@@ -33,7 +33,6 @@ def _iter_columns(self, df: DaskLazyFrame) -> Iterator[dx.Series]:
 
     def _selector(
         self,
-        context: _FullContext,
         call: EvalSeries[DaskLazyFrame, dx.Series],  # pyright: ignore[reportInvalidTypeForm]
         evaluate_output_names: EvalNames[DaskLazyFrame],
         /,
@@ -44,8 +43,8 @@ def _selector(
             function_name="selector",
             evaluate_output_names=evaluate_output_names,
             alias_output_names=None,
-            backend_version=context._backend_version,
-            version=context._version,
+            backend_version=self._backend_version,
+            version=self._version,
         )
 
     def __init__(self: Self, context: _FullContext, /) -> None:
diff --git a/narwhals/_duckdb/selectors.py b/narwhals/_duckdb/selectors.py
index 732cc3c6ec..9e99f0e78f 100644
--- a/narwhals/_duckdb/selectors.py
+++ b/narwhals/_duckdb/selectors.py
@@ -28,7 +28,6 @@ def _iter_columns(self, df: DuckDBLazyFrame) -> Iterator[duckdb.Expression]:
 
     def _selector(
         self,
-        context: _FullContext,
         call: EvalSeries[DuckDBLazyFrame, duckdb.Expression],  # type: ignore[type-var]
         evaluate_output_names: EvalNames[DuckDBLazyFrame],
         /,
@@ -38,8 +37,8 @@ def _selector(
             function_name="selector",
             evaluate_output_names=evaluate_output_names,
             alias_output_names=None,
-            backend_version=context._backend_version,
-            version=context._version,
+            backend_version=self._backend_version,
+            version=self._version,
         )
 
     def __init__(self: Self, context: _FullContext, /) -> None:
diff --git a/narwhals/_pandas_like/selectors.py b/narwhals/_pandas_like/selectors.py
index 43e7f10258..ed1947d530 100644
--- a/narwhals/_pandas_like/selectors.py
+++ b/narwhals/_pandas_like/selectors.py
@@ -37,7 +37,6 @@ def _iter_columns(self, df: PandasLikeDataFrame) -> Iterator[PandasLikeSeries]:
 
     def _selector(
         self,
-        context: _FullContext,
         call: EvalSeries[PandasLikeDataFrame, PandasLikeSeries],
         evaluate_output_names: EvalNames[PandasLikeDataFrame],
         /,
@@ -48,9 +47,9 @@ def _selector(
             function_name="selector",
             evaluate_output_names=evaluate_output_names,
             alias_output_names=None,
-            implementation=context._implementation,
-            backend_version=context._backend_version,
-            version=context._version,
+            implementation=self._implementation,
+            backend_version=self._backend_version,
+            version=self._version,
         )
 
     def __init__(self: Self, context: _FullContext, /) -> None:
diff --git a/narwhals/_selectors.py b/narwhals/_selectors.py
index 53cc3b1689..bd6922559f 100644
--- a/narwhals/_selectors.py
+++ b/narwhals/_selectors.py
@@ -58,7 +58,6 @@ class CompliantSelectorNamespace(Generic[FrameT, SeriesT], Protocol):
 
     def _selector(
         self,
-        context: _FullContext,
         call: EvalSeries[FrameT, SeriesT],
         evaluate_output_names: EvalNames[FrameT],
         /,
@@ -88,7 +87,7 @@ def series(df: FrameT) -> Sequence[SeriesT]:
         def names(df: FrameT) -> Sequence[str]:
             return [name for name, tp in self._iter_schema(df) if isinstance(tp, dtype)]
 
-        return self._selector(self, series, names)
+        return self._selector(series, names)
 
     def by_dtype(
         self: Self, dtypes: Collection[DType | type[DType]]
@@ -99,7 +98,7 @@ def series(df: FrameT) -> Sequence[SeriesT]:
         def names(df: FrameT) -> Sequence[str]:
             return [name for name, tp in self._iter_schema(df) if tp in dtypes]
 
-        return self._selector(self, series, names)
+        return self._selector(series, names)
 
     def matches(self: Self, pattern: str) -> CompliantSelector[FrameT, SeriesT]:
         p = re.compile(pattern)
@@ -113,7 +112,7 @@ def series(df: FrameT) -> Sequence[SeriesT]:
         def names(df: FrameT) -> Sequence[str]:
             return [col for col in df.columns if p.search(col)]
 
-        return self._selector(self, series, names)
+        return self._selector(series, names)
 
     def numeric(self: Self) -> CompliantSelector[FrameT, SeriesT]:
         def series(df: FrameT) -> Sequence[SeriesT]:
@@ -122,7 +121,7 @@ def series(df: FrameT) -> Sequence[SeriesT]:
         def names(df: FrameT) -> Sequence[str]:
             return [name for name, tp in self._iter_schema(df) if tp.is_numeric()]
 
-        return self._selector(self, series, names)
+        return self._selector(series, names)
 
     def categorical(self: Self) -> CompliantSelector[FrameT, SeriesT]:
         return self._is_dtype(import_dtypes_module(self._version).Categorical)
@@ -137,7 +136,7 @@ def all(self: Self) -> CompliantSelector[FrameT, SeriesT]:
         def series(df: FrameT) -> Sequence[SeriesT]:
             return list(self._iter_columns(df))
 
-        return self._selector(self, series, get_column_names)
+        return self._selector(series, get_column_names)
 
     def datetime(
         self: Self,
@@ -158,7 +157,7 @@ def series(df: FrameT) -> Sequence[SeriesT]:
         def names(df: FrameT) -> Sequence[str]:
             return [name for name, tp in self._iter_schema(df) if matches(tp)]
 
-        return self._selector(self, series, names)
+        return self._selector(series, names)
 
     # NOTE: Can't reuse for `<3.11`
     # - https://github.com/python/cpython/issues/88970
@@ -210,7 +209,7 @@ def names(df: FrameT) -> Sequence[str]:
                 lhs_names, rhs_names = _eval_lhs_rhs(df, self, other)
                 return [x for x in lhs_names if x not in rhs_names]
 
-            return self.selectors._selector(self, series, names)
+            return self.selectors._selector(series, names)
         else:
             return self._to_expr() - other
 
@@ -234,7 +233,7 @@ def series(df: FrameT) -> Sequence[str]:
                 lhs_names, rhs_names = _eval_lhs_rhs(df, self, other)
                 return [*(x for x in lhs_names if x not in rhs_names), *rhs_names]
 
-            return self.selectors._selector(self, names, series)
+            return self.selectors._selector(names, series)
         else:
             return self._to_expr() | other
 
@@ -255,7 +254,7 @@ def names(df: FrameT) -> Sequence[str]:
                 lhs_names, rhs_names = _eval_lhs_rhs(df, self, other)
                 return [x for x in lhs_names if x in rhs_names]
 
-            return self.selectors._selector(self, series, names)
+            return self.selectors._selector(series, names)
         else:
             return self._to_expr() & other
 
diff --git a/narwhals/_spark_like/selectors.py b/narwhals/_spark_like/selectors.py
index 928a803b39..5c9e77a3d0 100644
--- a/narwhals/_spark_like/selectors.py
+++ b/narwhals/_spark_like/selectors.py
@@ -24,7 +24,6 @@ def _iter_columns(self, df: SparkLikeLazyFrame) -> Iterator[Column]:
 
     def _selector(
         self,
-        context: _FullContext,
         call: EvalSeries[SparkLikeLazyFrame, Column],
         evaluate_output_names: EvalNames[SparkLikeLazyFrame],
         /,
@@ -34,9 +33,9 @@ def _selector(
             function_name="selector",
             evaluate_output_names=evaluate_output_names,
             alias_output_names=None,
-            backend_version=context._backend_version,
-            version=context._version,
-            implementation=context._implementation,
+            backend_version=self._backend_version,
+            version=self._version,
+            implementation=self._implementation,
         )
 
     def __init__(self: Self, context: _FullContext, /) -> None:

From 13882a5fbf9676399ed531beb528a4f462c8aad9 Mon Sep 17 00:00:00 2001
From: dangotbanned <125183946+dangotbanned@users.noreply.github.com>
Date: Sun, 23 Feb 2025 11:25:27 +0000
Subject: [PATCH 33/55] refactor: remove `CompliantSeriesWithDType`

Resolves (https://github.com/narwhals-dev/narwhals/pull/2064#discussion_r1966578116)
---
 narwhals/_selectors.py | 6 +-----
 1 file changed, 1 insertion(+), 5 deletions(-)

diff --git a/narwhals/_selectors.py b/narwhals/_selectors.py
index bd6922559f..c52f35efe5 100644
--- a/narwhals/_selectors.py
+++ b/narwhals/_selectors.py
@@ -39,12 +39,8 @@
     from narwhals.utils import Version
     from narwhals.utils import _FullContext
 
-    class CompliantSeriesWithDType(CompliantSeries, Protocol):
-        @property
-        def dtype(self) -> DType: ...
 
-
-SeriesT = TypeVar("SeriesT", bound="CompliantSeriesWithDType")
+SeriesT = TypeVar("SeriesT", bound="CompliantSeries")
 FrameT = TypeVar("FrameT", bound="CompliantDataFrame | CompliantLazyFrame")
 SelectorOrExpr: TypeAlias = "CompliantSelector[FrameT, SeriesT] | CompliantExpr[SeriesT]"
 EvalSeries: TypeAlias = Callable[[FrameT], Sequence[SeriesT]]

From 7f65b4b00055a4d7ab07ed391e8a0c40457c65b0 Mon Sep 17 00:00:00 2001
From: dangotbanned <125183946+dangotbanned@users.noreply.github.com>
Date: Sun, 23 Feb 2025 12:04:40 +0000
Subject: [PATCH 34/55] fix(typing): resolve some variance issues

`pyright` wanted *contra* because `CompliantExpr` was missing the annotation for `__call__`
---
 narwhals/_arrow/expr.py            |  2 +-
 narwhals/_arrow/namespace.py       |  2 +-
 narwhals/_dask/expr.py             |  2 +-
 narwhals/_dask/namespace.py        |  2 +-
 narwhals/_duckdb/expr.py           |  2 +-
 narwhals/_duckdb/namespace.py      |  2 +-
 narwhals/_expression_parsing.py    | 22 ++++++++++------------
 narwhals/_pandas_like/expr.py      |  2 +-
 narwhals/_pandas_like/namespace.py |  2 +-
 narwhals/_selectors.py             | 24 +++++++++++++++++-------
 narwhals/_spark_like/expr.py       |  2 +-
 narwhals/_spark_like/namespace.py  |  2 +-
 narwhals/typing.py                 | 25 ++++++++++---------------
 narwhals/utils.py                  |  6 +++---
 14 files changed, 50 insertions(+), 47 deletions(-)

diff --git a/narwhals/_arrow/expr.py b/narwhals/_arrow/expr.py
index 8ae5833d9b..1509722fef 100644
--- a/narwhals/_arrow/expr.py
+++ b/narwhals/_arrow/expr.py
@@ -49,7 +49,7 @@ def __init__(
         self._depth = depth
         self._function_name = function_name
         self._depth = depth
-        self._evaluate_output_names = evaluate_output_names  # pyright: ignore[reportAttributeAccessIssue]
+        self._evaluate_output_names = evaluate_output_names
         self._alias_output_names = alias_output_names
         self._backend_version = backend_version
         self._version = version
diff --git a/narwhals/_arrow/namespace.py b/narwhals/_arrow/namespace.py
index ecbec58fa3..ed1b0c383a 100644
--- a/narwhals/_arrow/namespace.py
+++ b/narwhals/_arrow/namespace.py
@@ -473,7 +473,7 @@ def __init__(
         self._call = call
         self._depth = depth
         self._function_name = function_name
-        self._evaluate_output_names = evaluate_output_names  # pyright: ignore[reportAttributeAccessIssue]
+        self._evaluate_output_names = evaluate_output_names
         self._alias_output_names = alias_output_names
         self._call_kwargs = call_kwargs or {}
 
diff --git a/narwhals/_dask/expr.py b/narwhals/_dask/expr.py
index 5d765a3a9d..f0c6ce5250 100644
--- a/narwhals/_dask/expr.py
+++ b/narwhals/_dask/expr.py
@@ -55,7 +55,7 @@ def __init__(
         self._call = call
         self._depth = depth
         self._function_name = function_name
-        self._evaluate_output_names = evaluate_output_names  # pyright: ignore[reportAttributeAccessIssue]
+        self._evaluate_output_names = evaluate_output_names
         self._alias_output_names = alias_output_names
         self._backend_version = backend_version
         self._version = version
diff --git a/narwhals/_dask/namespace.py b/narwhals/_dask/namespace.py
index 9453d15b9d..b6a179ea7c 100644
--- a/narwhals/_dask/namespace.py
+++ b/narwhals/_dask/namespace.py
@@ -412,7 +412,7 @@ def __init__(
         self._call = call
         self._depth = depth
         self._function_name = function_name
-        self._evaluate_output_names = evaluate_output_names  # pyright: ignore[reportAttributeAccessIssue]
+        self._evaluate_output_names = evaluate_output_names
         self._alias_output_names = alias_output_names
         self._call_kwargs = call_kwargs or {}
 
diff --git a/narwhals/_duckdb/expr.py b/narwhals/_duckdb/expr.py
index 45f614489a..b098b7be65 100644
--- a/narwhals/_duckdb/expr.py
+++ b/narwhals/_duckdb/expr.py
@@ -49,7 +49,7 @@ def __init__(
     ) -> None:
         self._call = call
         self._function_name = function_name
-        self._evaluate_output_names = evaluate_output_names  # pyright: ignore[reportAttributeAccessIssue]
+        self._evaluate_output_names = evaluate_output_names
         self._alias_output_names = alias_output_names
         self._backend_version = backend_version
         self._version = version
diff --git a/narwhals/_duckdb/namespace.py b/narwhals/_duckdb/namespace.py
index 39e39242ca..11fbd89384 100644
--- a/narwhals/_duckdb/namespace.py
+++ b/narwhals/_duckdb/namespace.py
@@ -336,7 +336,7 @@ def __init__(
         self._version = version
         self._call = call
         self._function_name = function_name
-        self._evaluate_output_names = evaluate_output_names  # pyright: ignore[reportAttributeAccessIssue]
+        self._evaluate_output_names = evaluate_output_names
         self._alias_output_names = alias_output_names
 
     def otherwise(self: Self, value: DuckDBExpr | Any) -> DuckDBExpr:
diff --git a/narwhals/_expression_parsing.py b/narwhals/_expression_parsing.py
index c3862b3a0f..32ab404dfb 100644
--- a/narwhals/_expression_parsing.py
+++ b/narwhals/_expression_parsing.py
@@ -29,7 +29,7 @@
     from narwhals.expr import Expr
     from narwhals.typing import CompliantDataFrame
     from narwhals.typing import CompliantExpr
-    from narwhals.typing import CompliantFrameT_contra
+    from narwhals.typing import CompliantFrameT
     from narwhals.typing import CompliantLazyFrame
     from narwhals.typing import CompliantNamespace
     from narwhals.typing import CompliantSeries
@@ -51,8 +51,7 @@ def is_expr(obj: Any) -> TypeIs[Expr]:
 
 
 def evaluate_into_expr(
-    df: CompliantFrameT_contra,
-    expr: CompliantExpr[CompliantFrameT_contra, CompliantSeriesT_co],
+    df: CompliantFrameT, expr: CompliantExpr[CompliantFrameT, CompliantSeriesT_co]
 ) -> Sequence[CompliantSeriesT_co]:
     """Return list of raw columns.
 
@@ -72,9 +71,9 @@ def evaluate_into_expr(
 
 
 def evaluate_into_exprs(
-    df: CompliantFrameT_contra,
+    df: CompliantFrameT,
     /,
-    *exprs: CompliantExpr[CompliantFrameT_contra, CompliantSeriesT_co],
+    *exprs: CompliantExpr[CompliantFrameT, CompliantSeriesT_co],
 ) -> list[CompliantSeriesT_co]:
     """Evaluate each expr into Series."""
     return [
@@ -86,8 +85,7 @@ def evaluate_into_exprs(
 
 @overload
 def maybe_evaluate_expr(
-    df: CompliantFrameT_contra,
-    expr: CompliantExpr[CompliantFrameT_contra, CompliantSeriesT_co],
+    df: CompliantFrameT, expr: CompliantExpr[CompliantFrameT, CompliantSeriesT_co]
 ) -> CompliantSeriesT_co: ...
 
 
@@ -257,15 +255,15 @@ def is_simple_aggregation(expr: CompliantExpr[Any, Any]) -> bool:
 
 
 def combine_evaluate_output_names(
-    *exprs: CompliantExpr[CompliantFrameT_contra, Any],
-) -> Callable[[CompliantFrameT_contra], Sequence[str]]:
+    *exprs: CompliantExpr[CompliantFrameT, Any],
+) -> Callable[[CompliantFrameT], Sequence[str]]:
     # Follow left-hand-rule for naming. E.g. `nw.sum_horizontal(expr1, expr2)` takes the
     # first name of `expr1`.
     if not is_compliant_expr(exprs[0]):  # pragma: no cover
         msg = f"Safety assertion failed, expected expression, got: {type(exprs[0])}. Please report a bug."
         raise AssertionError(msg)
 
-    def evaluate_output_names(df: CompliantFrameT_contra) -> Sequence[str]:
+    def evaluate_output_names(df: CompliantFrameT) -> Sequence[str]:
         return exprs[0]._evaluate_output_names(df)[:1]
 
     return evaluate_output_names
@@ -286,11 +284,11 @@ def alias_output_names(names: Sequence[str]) -> Sequence[str]:
 
 
 def extract_compliant(
-    plx: CompliantNamespace[CompliantFrameT_contra, CompliantSeriesT_co],
+    plx: CompliantNamespace[CompliantFrameT, CompliantSeriesT_co],
     other: Any,
     *,
     str_as_lit: bool,
-) -> CompliantExpr[CompliantFrameT_contra, CompliantSeriesT_co] | object:
+) -> CompliantExpr[CompliantFrameT, CompliantSeriesT_co] | object:
     if is_expr(other):
         return other._to_compliant_expr(plx)
     if isinstance(other, str) and not str_as_lit:
diff --git a/narwhals/_pandas_like/expr.py b/narwhals/_pandas_like/expr.py
index 20077af8a2..3cd875e8d6 100644
--- a/narwhals/_pandas_like/expr.py
+++ b/narwhals/_pandas_like/expr.py
@@ -63,7 +63,7 @@ def __init__(
         self._call = call
         self._depth = depth
         self._function_name = function_name
-        self._evaluate_output_names = evaluate_output_names  # pyright: ignore[reportAttributeAccessIssue]
+        self._evaluate_output_names = evaluate_output_names
         self._alias_output_names = alias_output_names
         self._implementation = implementation
         self._backend_version = backend_version
diff --git a/narwhals/_pandas_like/namespace.py b/narwhals/_pandas_like/namespace.py
index 3aa74fdd8f..206d923969 100644
--- a/narwhals/_pandas_like/namespace.py
+++ b/narwhals/_pandas_like/namespace.py
@@ -496,7 +496,7 @@ def __init__(
         self._call = call
         self._depth = depth
         self._function_name = function_name
-        self._evaluate_output_names = evaluate_output_names  # pyright: ignore[reportAttributeAccessIssue]
+        self._evaluate_output_names = evaluate_output_names
         self._alias_output_names = alias_output_names
         self._call_kwargs = call_kwargs or {}
 
diff --git a/narwhals/_selectors.py b/narwhals/_selectors.py
index c52f35efe5..68342bf95d 100644
--- a/narwhals/_selectors.py
+++ b/narwhals/_selectors.py
@@ -42,7 +42,9 @@
 
 SeriesT = TypeVar("SeriesT", bound="CompliantSeries")
 FrameT = TypeVar("FrameT", bound="CompliantDataFrame | CompliantLazyFrame")
-SelectorOrExpr: TypeAlias = "CompliantSelector[FrameT, SeriesT] | CompliantExpr[SeriesT]"
+SelectorOrExpr: TypeAlias = (
+    "CompliantSelector[FrameT, SeriesT] | CompliantExpr[FrameT, SeriesT]"
+)
 EvalSeries: TypeAlias = Callable[[FrameT], Sequence[SeriesT]]
 EvalNames: TypeAlias = Callable[[FrameT], Sequence[str]]
 
@@ -174,22 +176,26 @@ def _iter_columns_dtypes(self, df: FrameT, /) -> Iterator[tuple[SeriesT, DType]]
         yield from zip(self._iter_columns(df), df.schema.values())
 
 
-class CompliantSelector(CompliantExpr[SeriesT], Generic[FrameT, SeriesT], Protocol):
+class CompliantSelector(
+    CompliantExpr[FrameT, SeriesT], Generic[FrameT, SeriesT], Protocol
+):
     @property
     def selectors(self) -> CompliantSelectorNamespace[FrameT, SeriesT]:
         return self.__narwhals_namespace__().selectors
 
-    def _to_expr(self: Self) -> CompliantExpr[SeriesT]: ...
+    def _to_expr(self: Self) -> CompliantExpr[FrameT, SeriesT]: ...
 
     def _is_selector(
-        self: Self, other: Self | CompliantExpr[SeriesT]
+        self: Self, other: Self | CompliantExpr[FrameT, SeriesT]
     ) -> TypeIs[CompliantSelector[FrameT, SeriesT]]:
         return isinstance(other, type(self))
 
     @overload
     def __sub__(self: Self, other: Self) -> Self: ...
     @overload
-    def __sub__(self: Self, other: CompliantExpr[SeriesT]) -> CompliantExpr[SeriesT]: ...
+    def __sub__(
+        self: Self, other: CompliantExpr[FrameT, SeriesT]
+    ) -> CompliantExpr[FrameT, SeriesT]: ...
     def __sub__(
         self: Self, other: SelectorOrExpr[FrameT, SeriesT]
     ) -> SelectorOrExpr[FrameT, SeriesT]:
@@ -212,7 +218,9 @@ def names(df: FrameT) -> Sequence[str]:
     @overload
     def __or__(self: Self, other: Self) -> Self: ...
     @overload
-    def __or__(self: Self, other: CompliantExpr[SeriesT]) -> CompliantExpr[SeriesT]: ...
+    def __or__(
+        self: Self, other: CompliantExpr[FrameT, SeriesT]
+    ) -> CompliantExpr[FrameT, SeriesT]: ...
     def __or__(
         self: Self, other: SelectorOrExpr[FrameT, SeriesT]
     ) -> SelectorOrExpr[FrameT, SeriesT]:
@@ -236,7 +244,9 @@ def series(df: FrameT) -> Sequence[str]:
     @overload
     def __and__(self: Self, other: Self) -> Self: ...
     @overload
-    def __and__(self: Self, other: CompliantExpr[SeriesT]) -> CompliantExpr[SeriesT]: ...
+    def __and__(
+        self: Self, other: CompliantExpr[FrameT, SeriesT]
+    ) -> CompliantExpr[FrameT, SeriesT]: ...
     def __and__(
         self: Self, other: SelectorOrExpr[FrameT, SeriesT]
     ) -> SelectorOrExpr[FrameT, SeriesT]:
diff --git a/narwhals/_spark_like/expr.py b/narwhals/_spark_like/expr.py
index 599b96059a..57daaf517d 100644
--- a/narwhals/_spark_like/expr.py
+++ b/narwhals/_spark_like/expr.py
@@ -43,7 +43,7 @@ def __init__(
     ) -> None:
         self._call = call
         self._function_name = function_name
-        self._evaluate_output_names = evaluate_output_names  # pyright: ignore[reportAttributeAccessIssue]
+        self._evaluate_output_names = evaluate_output_names
         self._alias_output_names = alias_output_names
         self._backend_version = backend_version
         self._version = version
diff --git a/narwhals/_spark_like/namespace.py b/narwhals/_spark_like/namespace.py
index 7a21701599..74d0841431 100644
--- a/narwhals/_spark_like/namespace.py
+++ b/narwhals/_spark_like/namespace.py
@@ -380,7 +380,7 @@ def __init__(
         self._version = version
         self._call = call
         self._function_name = function_name
-        self._evaluate_output_names = evaluate_output_names  # pyright: ignore[reportAttributeAccessIssue]
+        self._evaluate_output_names = evaluate_output_names
         self._alias_output_names = alias_output_names
         self._implementation = implementation
 
diff --git a/narwhals/typing.py b/narwhals/typing.py
index aa9243d479..9c4cb8bea4 100644
--- a/narwhals/typing.py
+++ b/narwhals/typing.py
@@ -92,30 +92,25 @@ def columns(self) -> Sequence[str]: ...
     def schema(self) -> Mapping[str, DType]: ...
 
 
-CompliantFrameT_contra = TypeVar(
-    "CompliantFrameT_contra",
-    bound="CompliantDataFrame | CompliantLazyFrame",
-    contravariant=True,
-)
-CompliantSeriesT_co = TypeVar(
-    "CompliantSeriesT_co", bound=CompliantSeries, covariant=True
+CompliantFrameT = TypeVar(
+    "CompliantFrameT", bound="CompliantDataFrame | CompliantLazyFrame"
 )
 
 
-class CompliantExpr(Protocol, Generic[CompliantFrameT_contra, CompliantSeriesT_co]):
+class CompliantExpr(Protocol, Generic[CompliantFrameT, CompliantSeriesT_co]):
     _implementation: Implementation
     _backend_version: tuple[int, ...]
     _version: Version
-    _evaluate_output_names: Callable[[CompliantFrameT_contra], Sequence[str]]
+    _evaluate_output_names: Callable[[CompliantFrameT], Sequence[str]]
     _alias_output_names: Callable[[Sequence[str]], Sequence[str]] | None
     _depth: int
     _function_name: str
 
-    def __call__(self, df: Any) -> Sequence[CompliantSeriesT_co]: ...
+    def __call__(self, df: CompliantFrameT) -> Sequence[CompliantSeriesT_co]: ...
     def __narwhals_expr__(self) -> None: ...
     def __narwhals_namespace__(
         self,
-    ) -> CompliantNamespace[CompliantFrameT_contra, CompliantSeriesT_co]: ...
+    ) -> CompliantNamespace[CompliantFrameT, CompliantSeriesT_co]: ...
     def is_null(self) -> Self: ...
     def alias(self, name: str) -> Self: ...
     def cast(self, dtype: DType) -> Self: ...
@@ -137,13 +132,13 @@ def broadcast(
     ) -> Self: ...
 
 
-class CompliantNamespace(Protocol, Generic[CompliantFrameT_contra, CompliantSeriesT_co]):
+class CompliantNamespace(Protocol, Generic[CompliantFrameT, CompliantSeriesT_co]):
     def col(
         self, *column_names: str
-    ) -> CompliantExpr[CompliantFrameT_contra, CompliantSeriesT_co]: ...
+    ) -> CompliantExpr[CompliantFrameT, CompliantSeriesT_co]: ...
     def lit(
         self, value: Any, dtype: DType | None
-    ) -> CompliantExpr[CompliantFrameT_contra, CompliantSeriesT_co]: ...
+    ) -> CompliantExpr[CompliantFrameT, CompliantSeriesT_co]: ...
     @property
     def selectors(self) -> CompliantSelectorNamespace[Any, Any]: ...
 
@@ -345,7 +340,7 @@ class DTypes:
     # This one needs to be in TYPE_CHECKING to pass on 3.9,
     # and can only be defined after CompliantExpr has been defined
     IntoCompliantExpr: TypeAlias = (
-        CompliantExpr[CompliantFrameT_contra, CompliantSeriesT_co] | CompliantSeriesT_co
+        CompliantExpr[CompliantFrameT, CompliantSeriesT_co] | CompliantSeriesT_co
     )
 
 
diff --git a/narwhals/utils.py b/narwhals/utils.py
index 6338fa3ebc..e4d23f5b8a 100644
--- a/narwhals/utils.py
+++ b/narwhals/utils.py
@@ -55,7 +55,7 @@
     from narwhals.series import Series
     from narwhals.typing import CompliantDataFrame
     from narwhals.typing import CompliantExpr
-    from narwhals.typing import CompliantFrameT_contra
+    from narwhals.typing import CompliantFrameT
     from narwhals.typing import CompliantLazyFrame
     from narwhals.typing import CompliantSeries
     from narwhals.typing import CompliantSeriesT_co
@@ -1333,8 +1333,8 @@ def is_compliant_series(obj: Any) -> TypeIs[CompliantSeries]:
 
 
 def is_compliant_expr(
-    obj: CompliantExpr[CompliantFrameT_contra, CompliantSeriesT_co] | Any,
-) -> TypeIs[CompliantExpr[CompliantFrameT_contra, CompliantSeriesT_co]]:
+    obj: CompliantExpr[CompliantFrameT, CompliantSeriesT_co] | Any,
+) -> TypeIs[CompliantExpr[CompliantFrameT, CompliantSeriesT_co]]:
     return hasattr(obj, "__narwhals_expr__")
 
 

From c11dc9566c5097a3be4675a36565128aef0986f3 Mon Sep 17 00:00:00 2001
From: dangotbanned <125183946+dangotbanned@users.noreply.github.com>
Date: Sun, 23 Feb 2025 13:17:20 +0000
Subject: [PATCH 35/55] fix(typing): resolve more `mypy` errors

https://github.com/narwhals-dev/narwhals/actions/runs/13483136123/job/37670892891?pr=2064
---
 narwhals/_arrow/dataframe.py       | 22 ++++++++++++----------
 narwhals/_pandas_like/dataframe.py | 18 +++++++++++-------
 2 files changed, 23 insertions(+), 17 deletions(-)

diff --git a/narwhals/_arrow/dataframe.py b/narwhals/_arrow/dataframe.py
index 1d7e3d1e53..98ba71810d 100644
--- a/narwhals/_arrow/dataframe.py
+++ b/narwhals/_arrow/dataframe.py
@@ -343,24 +343,24 @@ def simple_select(self, *column_names: str) -> Self:
             self._native_frame.select(list(column_names)), validate_column_names=False
         )
 
-    def aggregate(self: Self, *exprs: ArrowExpr) -> Self:
+    def aggregate(self: ArrowDataFrame, *exprs: ArrowExpr) -> ArrowDataFrame:
         return self.select(*exprs)
 
-    def select(self: Self, *exprs: ArrowExpr) -> Self:
-        new_series: Sequence[ArrowSeries] = evaluate_into_exprs(self, *exprs)
+    def select(self: ArrowDataFrame, *exprs: ArrowExpr) -> ArrowDataFrame:
+        new_series = evaluate_into_exprs(self, *exprs)
         if not new_series:
             # return empty dataframe, like Polars does
             return self._from_native_frame(
                 self._native_frame.__class__.from_arrays([]), validate_column_names=False
             )
         names = [s.name for s in new_series]
-        new_series = align_series_full_broadcast(*new_series)
-        df = pa.Table.from_arrays([s._native_series for s in new_series], names=names)
+        reshaped = align_series_full_broadcast(*new_series)
+        df = pa.Table.from_arrays([s._native_series for s in reshaped], names=names)
         return self._from_native_frame(df, validate_column_names=False)
 
-    def with_columns(self: Self, *exprs: ArrowExpr) -> Self:
+    def with_columns(self: ArrowDataFrame, *exprs: ArrowExpr) -> ArrowDataFrame:
         native_frame = self._native_frame
-        new_columns: list[ArrowSeries] = evaluate_into_exprs(self, *exprs)
+        new_columns = evaluate_into_exprs(self, *exprs)
 
         length = len(self)
         columns = self.columns
@@ -458,7 +458,7 @@ def drop(self: Self, columns: list[str], strict: bool) -> Self:  # noqa: FBT001
             self._native_frame.drop(to_drop), validate_column_names=False
         )
 
-    def drop_nulls(self: Self, subset: list[str] | None) -> Self:
+    def drop_nulls(self: Self, subset: list[str] | None) -> ArrowDataFrame:
         if subset is None:
             return self._from_native_frame(
                 self._native_frame.drop_null(), validate_column_names=False
@@ -540,7 +540,9 @@ def with_row_index(self: Self, name: str) -> Self:
             df.append_column(name, row_indices).select([name, *cols])
         )
 
-    def filter(self: Self, predicate: ArrowExpr | list[bool | None]) -> Self:
+    def filter(
+        self: ArrowDataFrame, predicate: ArrowExpr | list[bool | None]
+    ) -> ArrowDataFrame:
         if isinstance(predicate, list):
             mask_native: Mask | ArrowChunkedArray = predicate
         else:
@@ -737,7 +739,7 @@ def unique(
         *,
         keep: Literal["any", "first", "last", "none"],
         maintain_order: bool | None = None,
-    ) -> Self:
+    ) -> ArrowDataFrame:
         # The param `maintain_order` is only here for compatibility with the Polars API
         # and has no effect on the output.
         import numpy as np  # ignore-banned-import
diff --git a/narwhals/_pandas_like/dataframe.py b/narwhals/_pandas_like/dataframe.py
index e235fe27ee..f29a34db7b 100644
--- a/narwhals/_pandas_like/dataframe.py
+++ b/narwhals/_pandas_like/dataframe.py
@@ -388,11 +388,11 @@ def simple_select(self: Self, *column_names: str) -> Self:
             validate_column_names=False,
         )
 
-    def aggregate(self: Self, *exprs: PandasLikeExpr) -> Self:
+    def aggregate(self: Self, *exprs: PandasLikeExpr) -> PandasLikeDataFrame:
         return self.select(*exprs)
 
-    def select(self: Self, *exprs: PandasLikeExpr) -> Self:
-        new_series: list[PandasLikeSeries] = evaluate_into_exprs(self, *exprs)
+    def select(self: PandasLikeDataFrame, *exprs: PandasLikeExpr) -> PandasLikeDataFrame:
+        new_series = evaluate_into_exprs(self, *exprs)
         if not new_series:
             # return empty dataframe, like Polars does
             return self._from_native_frame(
@@ -406,7 +406,7 @@ def select(self: Self, *exprs: PandasLikeExpr) -> Self:
         )
         return self._from_native_frame(df, validate_column_names=False)
 
-    def drop_nulls(self: Self, subset: list[str] | None) -> Self:
+    def drop_nulls(self: Self, subset: list[str] | None) -> PandasLikeDataFrame:
         if subset is None:
             return self._from_native_frame(
                 self._native_frame.dropna(axis=0), validate_column_names=False
@@ -437,7 +437,9 @@ def with_row_index(self: Self, name: str) -> Self:
     def row(self: Self, row: int) -> tuple[Any, ...]:
         return tuple(x for x in self._native_frame.iloc[row])
 
-    def filter(self: Self, predicate: PandasLikeExpr | list[bool]) -> Self:
+    def filter(
+        self: PandasLikeDataFrame, predicate: PandasLikeExpr | list[bool]
+    ) -> PandasLikeDataFrame:
         if isinstance(predicate, list):
             mask_native: pd.Series[Any] | list[bool] = predicate
         else:
@@ -449,9 +451,11 @@ def filter(self: Self, predicate: PandasLikeExpr | list[bool]) -> Self:
             self._native_frame.loc[mask_native], validate_column_names=False
         )
 
-    def with_columns(self: Self, *exprs: PandasLikeExpr) -> Self:
+    def with_columns(
+        self: PandasLikeDataFrame, *exprs: PandasLikeExpr
+    ) -> PandasLikeDataFrame:
         index = self._native_frame.index
-        new_columns: list[PandasLikeSeries] = evaluate_into_exprs(self, *exprs)
+        new_columns = evaluate_into_exprs(self, *exprs)
         if not new_columns and len(self) == 0:
             return self
 

From ef85eda6599d0a0fce261c35416983756fa456e5 Mon Sep 17 00:00:00 2001
From: dangotbanned <125183946+dangotbanned@users.noreply.github.com>
Date: Sun, 23 Feb 2025 13:40:09 +0000
Subject: [PATCH 36/55] fix(typing): resolve `when()` related issues

These were untyped before, but now `Expr.__call__` has the right typevar

https://github.com/narwhals-dev/narwhals/actions/runs/13483136123/job/37670892891?pr=2064
---
 narwhals/_arrow/namespace.py       | 15 +++++++--------
 narwhals/_dask/namespace.py        | 19 ++++++++-----------
 narwhals/_pandas_like/namespace.py | 15 +++++++--------
 3 files changed, 22 insertions(+), 27 deletions(-)

diff --git a/narwhals/_arrow/namespace.py b/narwhals/_arrow/namespace.py
index ed1b0c383a..c15913f347 100644
--- a/narwhals/_arrow/namespace.py
+++ b/narwhals/_arrow/namespace.py
@@ -28,7 +28,6 @@
 from narwhals.utils import Implementation
 from narwhals.utils import get_column_names
 from narwhals.utils import import_dtypes_module
-from narwhals.utils import is_compliant_expr
 
 if TYPE_CHECKING:
     from typing import Callable
@@ -392,9 +391,9 @@ def __init__(
         version: Version,
     ) -> None:
         self._backend_version = backend_version
-        self._condition = condition
-        self._then_value = then_value
-        self._otherwise_value = otherwise_value
+        self._condition: ArrowExpr = condition
+        self._then_value: ArrowExpr | Any = then_value
+        self._otherwise_value: ArrowExpr | Any = otherwise_value
         self._version = version
 
     def __call__(self: Self, df: ArrowDataFrame) -> Sequence[ArrowSeries]:
@@ -402,8 +401,8 @@ def __call__(self: Self, df: ArrowDataFrame) -> Sequence[ArrowSeries]:
         condition = self._condition(df)[0]
         condition_native = condition._native_series
 
-        if is_compliant_expr(self._then_value):
-            value_series: ArrowSeries = self._then_value(df)[0]
+        if isinstance(self._then_value, ArrowExpr):
+            value_series = self._then_value(df)[0]
         else:
             # `self._then_value` is a scalar
             value_series = plx._create_series_from_scalar(
@@ -421,8 +420,8 @@ def __call__(self: Self, df: ArrowDataFrame) -> Sequence[ArrowSeries]:
                     pc.if_else(condition_native, value_series_native, otherwise_null)
                 )
             ]
-        if is_compliant_expr(self._otherwise_value):
-            otherwise_series: ArrowSeries = self._otherwise_value(df)[0]
+        if isinstance(self._otherwise_value, ArrowExpr):
+            otherwise_series = self._otherwise_value(df)[0]
         else:
             # `self._otherwise_value` is a scalar
             otherwise_series = plx._create_series_from_scalar(
diff --git a/narwhals/_dask/namespace.py b/narwhals/_dask/namespace.py
index b6a179ea7c..5cd828adfe 100644
--- a/narwhals/_dask/namespace.py
+++ b/narwhals/_dask/namespace.py
@@ -8,7 +8,6 @@
 from typing import Iterable
 from typing import Literal
 from typing import Sequence
-from typing import cast
 
 import dask.dataframe as dd
 import pandas as pd
@@ -26,7 +25,6 @@
 from narwhals.typing import CompliantNamespace
 from narwhals.utils import Implementation
 from narwhals.utils import get_column_names
-from narwhals.utils import is_compliant_expr
 
 if TYPE_CHECKING:
     from typing_extensions import Self
@@ -351,17 +349,16 @@ def __init__(
         version: Version,
     ) -> None:
         self._backend_version = backend_version
-        self._condition = condition
-        self._then_value = then_value
-        self._otherwise_value = otherwise_value
+        self._condition: DaskExpr = condition
+        self._then_value: DaskExpr | Any = then_value
+        self._otherwise_value: DaskExpr | Any = otherwise_value
         self._version = version
 
     def __call__(self: Self, df: DaskLazyFrame) -> Sequence[dx.Series]:
         condition = self._condition(df)[0]
-        condition = cast("dx.Series", condition)
 
-        if is_compliant_expr(self._then_value):
-            then_value: dx.Series | object = self._then_value(df)[0]
+        if isinstance(self._then_value, DaskExpr):
+            then_value = self._then_value(df)[0]
         else:
             then_value = self._then_value
         (then_series,) = align_series_full_broadcast(df, then_value)
@@ -370,13 +367,13 @@ def __call__(self: Self, df: DaskLazyFrame) -> Sequence[dx.Series]:
         if self._otherwise_value is None:
             return [then_series.where(condition)]
 
-        if is_compliant_expr(self._otherwise_value):
-            otherwise_value: dx.Series | object = self._otherwise_value(df)[0]
+        if isinstance(self._otherwise_value, DaskExpr):
+            otherwise_value = self._otherwise_value(df)[0]
         else:
             otherwise_value = self._otherwise_value
         (otherwise_series,) = align_series_full_broadcast(df, otherwise_value)
         validate_comparand(condition, otherwise_series)
-        return [then_series.where(condition, otherwise_series)]
+        return [then_series.where(condition, otherwise_series)]  # pyright: ignore[reportArgumentType]
 
     def then(self: Self, value: DaskExpr | Any) -> DaskThen:
         self._then_value = value
diff --git a/narwhals/_pandas_like/namespace.py b/narwhals/_pandas_like/namespace.py
index 206d923969..21ae784a66 100644
--- a/narwhals/_pandas_like/namespace.py
+++ b/narwhals/_pandas_like/namespace.py
@@ -24,7 +24,6 @@
 from narwhals.typing import CompliantNamespace
 from narwhals.utils import get_column_names
 from narwhals.utils import import_dtypes_module
-from narwhals.utils import is_compliant_expr
 
 if TYPE_CHECKING:
     from typing_extensions import Self
@@ -413,9 +412,9 @@ def __init__(
     ) -> None:
         self._implementation = implementation
         self._backend_version = backend_version
-        self._condition = condition
-        self._then_value = then_value
-        self._otherwise_value = otherwise_value
+        self._condition: PandasLikeExpr = condition
+        self._then_value: PandasLikeExpr | Any = then_value
+        self._otherwise_value: PandasLikeExpr | Any = otherwise_value
         self._version = version
 
     def __call__(self: Self, df: PandasLikeDataFrame) -> Sequence[PandasLikeSeries]:
@@ -423,8 +422,8 @@ def __call__(self: Self, df: PandasLikeDataFrame) -> Sequence[PandasLikeSeries]:
         condition = self._condition(df)[0]
         condition_native = condition._native_series
 
-        if is_compliant_expr(self._then_value):
-            value_series: PandasLikeSeries = self._then_value(df)[0]
+        if isinstance(self._then_value, PandasLikeExpr):
+            value_series = self._then_value(df)[0]
         else:
             # `self._then_value` is a scalar
             value_series = plx._create_series_from_scalar(
@@ -442,8 +441,8 @@ def __call__(self: Self, df: PandasLikeDataFrame) -> Sequence[PandasLikeSeries]:
                 )
             ]
 
-        if is_compliant_expr(self._otherwise_value):
-            otherwise_series: PandasLikeSeries = self._otherwise_value(df)[0]
+        if isinstance(self._otherwise_value, PandasLikeExpr):
+            otherwise_series = self._otherwise_value(df)[0]
         else:
             # `self._then_value` is a scalar
             otherwise_series = plx._create_series_from_scalar(

From 0301a4b9c9f902b558e9b6124c1647530a1174e5 Mon Sep 17 00:00:00 2001
From: dangotbanned <125183946+dangotbanned@users.noreply.github.com>
Date: Sun, 23 Feb 2025 13:46:05 +0000
Subject: [PATCH 37/55] chore(typing): Ignore all `_dask` warnings

**SPLIT INTO A PR**
- Only doing this here to reduce the noise while I work out the rest
---
 narwhals/_dask/expr.py      | 8 ++++----
 narwhals/_dask/expr_dt.py   | 6 +++---
 narwhals/_dask/group_by.py  | 2 +-
 narwhals/_dask/namespace.py | 2 +-
 narwhals/_dask/utils.py     | 6 +++---
 5 files changed, 12 insertions(+), 12 deletions(-)

diff --git a/narwhals/_dask/expr.py b/narwhals/_dask/expr.py
index f0c6ce5250..b19272d56e 100644
--- a/narwhals/_dask/expr.py
+++ b/narwhals/_dask/expr.py
@@ -35,7 +35,7 @@
     from narwhals.utils import Version
 
 
-class DaskExpr(CompliantExpr["DaskLazyFrame", "dx.Series"]):
+class DaskExpr(CompliantExpr["DaskLazyFrame", "dx.Series"]):  # pyright: ignore[reportInvalidTypeArguments] (#2044)
     _implementation: Implementation = Implementation.DASK
 
     def __init__(
@@ -454,7 +454,7 @@ def func(_input: dx.Series) -> dx.Series:
                 _input.dtype, self._version, self._implementation
             )
             if dtype.is_numeric():
-                return _input != _input  # noqa: PLR0124
+                return _input != _input  # pyright: ignore[reportReturnType] # noqa: PLR0124
             msg = f"`.is_nan` only supported for numeric dtypes and not {dtype}, did you mean `.is_null`?"
             raise InvalidOperationError(msg)
 
@@ -487,7 +487,7 @@ def is_first_distinct(self: Self) -> Self:
         def func(_input: dx.Series) -> dx.Series:
             _name = _input.name
             col_token = generate_temporary_column_name(n_bytes=8, columns=[_name])
-            _input = add_row_index(
+            _input = add_row_index(  # pyright: ignore[reportAssignmentType]
                 _input.to_frame(),
                 col_token,
                 backend_version=self._backend_version,
@@ -504,7 +504,7 @@ def is_last_distinct(self: Self) -> Self:
         def func(_input: dx.Series) -> dx.Series:
             _name = _input.name
             col_token = generate_temporary_column_name(n_bytes=8, columns=[_name])
-            _input = add_row_index(
+            _input = add_row_index(  # pyright: ignore[reportAssignmentType]
                 _input.to_frame(),
                 col_token,
                 backend_version=self._backend_version,
diff --git a/narwhals/_dask/expr_dt.py b/narwhals/_dask/expr_dt.py
index c569d7dc5b..b7355dad4b 100644
--- a/narwhals/_dask/expr_dt.py
+++ b/narwhals/_dask/expr_dt.py
@@ -96,9 +96,9 @@ def func(s: dx.Series, time_zone: str) -> dx.Series:
                 s.dtype, self._compliant_expr._version, Implementation.DASK
             )
             if dtype.time_zone is None:  # type: ignore[attr-defined]
-                return s.dt.tz_localize("UTC").dt.tz_convert(time_zone)
+                return s.dt.tz_localize("UTC").dt.tz_convert(time_zone)  # pyright: ignore[reportAttributeAccessIssue]
             else:
-                return s.dt.tz_convert(time_zone)
+                return s.dt.tz_convert(time_zone)  # pyright: ignore[reportAttributeAccessIssue]
 
         return self._compliant_expr._from_call(func, "tz_convert", time_zone=time_zone)
 
@@ -125,7 +125,7 @@ def func(s: dx.Series, time_unit: TimeUnit) -> dx.Series:
             else:
                 msg = "Input should be either of Date or Datetime type"
                 raise TypeError(msg)
-            return result.where(~mask_na)
+            return result.where(~mask_na)  # pyright: ignore[reportReturnType]
 
         return self._compliant_expr._from_call(func, "datetime", time_unit=time_unit)
 
diff --git a/narwhals/_dask/group_by.py b/narwhals/_dask/group_by.py
index 13b57796a2..7cae261407 100644
--- a/narwhals/_dask/group_by.py
+++ b/narwhals/_dask/group_by.py
@@ -107,7 +107,7 @@ def _from_native_frame(self: Self, df: DaskLazyFrame) -> DaskLazyFrame:
 def agg_dask(
     df: DaskLazyFrame,
     grouped: Any,
-    exprs: Sequence[CompliantExpr[DaskLazyFrame, dx.Series]],
+    exprs: Sequence[CompliantExpr[DaskLazyFrame, dx.Series]],  # pyright: ignore[reportInvalidTypeArguments]
     keys: list[str],
     from_dataframe: Callable[[Any], DaskLazyFrame],
 ) -> DaskLazyFrame:
diff --git a/narwhals/_dask/namespace.py b/narwhals/_dask/namespace.py
index 5cd828adfe..cf46590086 100644
--- a/narwhals/_dask/namespace.py
+++ b/narwhals/_dask/namespace.py
@@ -38,7 +38,7 @@
         import dask_expr as dx
 
 
-class DaskNamespace(CompliantNamespace[DaskLazyFrame, "dx.Series"]):
+class DaskNamespace(CompliantNamespace[DaskLazyFrame, "dx.Series"]):  # pyright: ignore[reportInvalidTypeArguments]
     _implementation: Implementation = Implementation.DASK
 
     @property
diff --git a/narwhals/_dask/utils.py b/narwhals/_dask/utils.py
index 5c1c4c0742..a98241ecf2 100644
--- a/narwhals/_dask/utils.py
+++ b/narwhals/_dask/utils.py
@@ -62,7 +62,7 @@ def align_series_full_broadcast(
     return [
         s if isinstance(s, dx.Series) else df._native_frame.assign(_tmp=s)["_tmp"]
         for s in series
-    ]
+    ]  # pyright: ignore[reportReturnType]
 
 
 def add_row_index(
@@ -155,8 +155,8 @@ def narwhals_to_native_dtype(dtype: DType | type[DType], version: Version) -> An
 
 
 def name_preserving_sum(s1: dx.Series, s2: dx.Series) -> dx.Series:
-    return (s1 + s2).rename(s1.name)
+    return (s1 + s2).rename(s1.name)  # pyright: ignore[reportOperatorIssue]
 
 
 def name_preserving_div(s1: dx.Series, s2: dx.Series) -> dx.Series:
-    return (s1 / s2).rename(s1.name)
+    return (s1 / s2).rename(s1.name)  # pyright: ignore[reportOperatorIssue]

From 72014548b6f49b864164167c2372fa43778556ad Mon Sep 17 00:00:00 2001
From: dangotbanned <125183946+dangotbanned@users.noreply.github.com>
Date: Mon, 24 Feb 2025 12:24:00 +0000
Subject: [PATCH 38/55] chore(typing): resolve `mypy` strict errors

Following #2077
---
 narwhals/_arrow/dataframe.py      | 2 +-
 narwhals/_expression_parsing.py   | 6 +++---
 narwhals/_selectors.py            | 9 ++++++---
 narwhals/_spark_like/dataframe.py | 2 +-
 narwhals/typing.py                | 2 +-
 narwhals/utils.py                 | 6 ++++--
 6 files changed, 16 insertions(+), 11 deletions(-)

diff --git a/narwhals/_arrow/dataframe.py b/narwhals/_arrow/dataframe.py
index da07078c34..d3af796227 100644
--- a/narwhals/_arrow/dataframe.py
+++ b/narwhals/_arrow/dataframe.py
@@ -618,7 +618,7 @@ def collect(
         self: Self,
         backend: Implementation | None,
         **kwargs: Any,
-    ) -> CompliantDataFrame:
+    ) -> CompliantDataFrame[Any]:
         if backend is Implementation.PYARROW or backend is None:
             from narwhals._arrow.dataframe import ArrowDataFrame
 
diff --git a/narwhals/_expression_parsing.py b/narwhals/_expression_parsing.py
index 89264da2d8..b6c68dbac7 100644
--- a/narwhals/_expression_parsing.py
+++ b/narwhals/_expression_parsing.py
@@ -90,7 +90,7 @@ def maybe_evaluate_expr(
 
 
 @overload
-def maybe_evaluate_expr(df: CompliantDataFrame, expr: T) -> T: ...
+def maybe_evaluate_expr(df: CompliantDataFrame[Any], expr: T) -> T: ...
 
 
 def maybe_evaluate_expr(
@@ -152,7 +152,7 @@ def reuse_series_implementation(
     """
     plx = expr.__narwhals_namespace__()
 
-    def func(df: CompliantDataFrame) -> Sequence[CompliantSeries]:
+    def func(df: CompliantDataFrame[Any]) -> Sequence[CompliantSeries]:
         _kwargs = {
             **(call_kwargs or {}),
             **{
@@ -303,7 +303,7 @@ def extract_compliant(
 
 def evaluate_output_names_and_aliases(
     expr: CompliantExpr[Any, Any],
-    df: CompliantDataFrame | CompliantLazyFrame,
+    df: CompliantDataFrame[Any] | CompliantLazyFrame,
     exclude: Sequence[str],
 ) -> tuple[Sequence[str], Sequence[str]]:
     output_names = expr._evaluate_output_names(df)
diff --git a/narwhals/_selectors.py b/narwhals/_selectors.py
index 68342bf95d..4c1a138114 100644
--- a/narwhals/_selectors.py
+++ b/narwhals/_selectors.py
@@ -5,6 +5,7 @@
 import re
 from functools import partial
 from typing import TYPE_CHECKING
+from typing import Any
 from typing import Callable
 from typing import Collection
 from typing import Generic
@@ -41,7 +42,7 @@
 
 
 SeriesT = TypeVar("SeriesT", bound="CompliantSeries")
-FrameT = TypeVar("FrameT", bound="CompliantDataFrame | CompliantLazyFrame")
+FrameT = TypeVar("FrameT", bound="CompliantDataFrame[Any] | CompliantLazyFrame")
 SelectorOrExpr: TypeAlias = (
     "CompliantSelector[FrameT, SeriesT] | CompliantExpr[FrameT, SeriesT]"
 )
@@ -265,7 +266,7 @@ def names(df: FrameT) -> Sequence[str]:
             return self._to_expr() & other
 
     def __invert__(self: Self) -> CompliantSelector[FrameT, SeriesT]:
-        return self.selectors.all() - self
+        return self.selectors.all() - self  # type: ignore[no-any-return]
 
     def __repr__(self: Self) -> str:  # pragma: no cover
         s = f"depth={self._depth}, " if is_tracks_depth(self._implementation) else ""
@@ -273,6 +274,8 @@ def __repr__(self: Self) -> str:  # pragma: no cover
 
 
 def _eval_lhs_rhs(
-    df: CompliantDataFrame | CompliantLazyFrame, lhs: CompliantExpr, rhs: CompliantExpr
+    df: CompliantDataFrame[Any] | CompliantLazyFrame,
+    lhs: CompliantExpr[Any, Any],
+    rhs: CompliantExpr[Any, Any],
 ) -> tuple[Sequence[str], Sequence[str]]:
     return lhs._evaluate_output_names(df), rhs._evaluate_output_names(df)
diff --git a/narwhals/_spark_like/dataframe.py b/narwhals/_spark_like/dataframe.py
index 1739be09bb..cc2ac9bc87 100644
--- a/narwhals/_spark_like/dataframe.py
+++ b/narwhals/_spark_like/dataframe.py
@@ -163,7 +163,7 @@ def collect(
         self: Self,
         backend: ModuleType | Implementation | str | None,
         **kwargs: Any,
-    ) -> CompliantDataFrame:
+    ) -> CompliantDataFrame[Any]:
         if backend is Implementation.PANDAS:
             import pandas as pd  # ignore-banned-import
 
diff --git a/narwhals/typing.py b/narwhals/typing.py
index 9c4cb8bea4..8f83cce8ec 100644
--- a/narwhals/typing.py
+++ b/narwhals/typing.py
@@ -93,7 +93,7 @@ def schema(self) -> Mapping[str, DType]: ...
 
 
 CompliantFrameT = TypeVar(
-    "CompliantFrameT", bound="CompliantDataFrame | CompliantLazyFrame"
+    "CompliantFrameT", bound="CompliantDataFrame[Any] | CompliantLazyFrame"
 )
 
 
diff --git a/narwhals/utils.py b/narwhals/utils.py
index d24d74a5a9..5d2fe3e77d 100644
--- a/narwhals/utils.py
+++ b/narwhals/utils.py
@@ -1309,7 +1309,7 @@ def dtype_matches_time_unit_and_time_zone(
 
 
 def get_column_names(
-    df: NativeFrame | CompliantDataFrame | CompliantLazyFrame,
+    df: NativeFrame | CompliantDataFrame[Any] | CompliantLazyFrame,
 ) -> Sequence[str]:
     return df.columns
 
@@ -1319,7 +1319,9 @@ def _hasattr_static(obj: Any, attr: str) -> bool:
     return getattr_static(obj, attr, sentinel) is not sentinel
 
 
-def is_compliant_dataframe(obj: Any) -> TypeIs[CompliantDataFrame]:
+def is_compliant_dataframe(
+    obj: Any | CompliantDataFrame[CompliantSeriesT_co],
+) -> TypeIs[CompliantDataFrame[CompliantSeriesT_co]]:
     return _hasattr_static(obj, "__narwhals_dataframe__")
 
 

From a4edcb767a935492dfd5ab9eb9f6e8b2bdc35505 Mon Sep 17 00:00:00 2001
From: dangotbanned <125183946+dangotbanned@users.noreply.github.com>
Date: Mon, 24 Feb 2025 12:42:33 +0000
Subject: [PATCH 39/55] fix(DRAFT): try giving a `super(...)` hints for `3.8`

If this works, the errors in CI should only be for `ArrowSelector`
https://github.com/narwhals-dev/narwhals/pull/2064#discussion_r1965980715
---
 narwhals/_pandas_like/selectors.py | 17 +++++++++++++----
 1 file changed, 13 insertions(+), 4 deletions(-)

diff --git a/narwhals/_pandas_like/selectors.py b/narwhals/_pandas_like/selectors.py
index ed1947d530..c3e332dfb7 100644
--- a/narwhals/_pandas_like/selectors.py
+++ b/narwhals/_pandas_like/selectors.py
@@ -1,7 +1,9 @@
 from __future__ import annotations
 
+import sys
 from functools import partial
 from typing import TYPE_CHECKING
+from typing import Any
 from typing import Iterator
 
 from narwhals._pandas_like.dataframe import PandasLikeDataFrame
@@ -47,7 +49,7 @@ def _selector(
             function_name="selector",
             evaluate_output_names=evaluate_output_names,
             alias_output_names=None,
-            implementation=self._implementation,
+            implementation=self._implementation,  # AttributeError: 'PandasSelector' object has no attribute '_implementation'
             backend_version=self._backend_version,
             version=self._version,
         )
@@ -58,17 +60,24 @@ def __init__(self: Self, context: _FullContext, /) -> None:
         self._version = context._version
 
 
+# BUG: `3.8` Protocol?
+# https://github.com/narwhals-dev/narwhals/pull/2064#discussion_r1965980715
 class PandasSelector(  # type: ignore[misc]
     CompliantSelector["PandasLikeDataFrame", "PandasLikeSeries"], PandasLikeExpr
 ):
+    if sys.version_info < (3, 9):
+
+        def __init__(self, *args: Any, **kwds: Any) -> None:
+            super(PandasLikeExpr).__init__(*args, **kwds)
+
     def _to_expr(self: Self) -> PandasLikeExpr:
         return PandasLikeExpr(
-            self._call,
+            self._call,  # AttributeError: 'PandasSelector' object has no attribute '_call'
             depth=self._depth,
             function_name=self._function_name,
-            evaluate_output_names=self._evaluate_output_names,
+            evaluate_output_names=self._evaluate_output_names,  # AttributeError: 'PandasSelector' object has no attribute '_evaluate_output_names'
             alias_output_names=self._alias_output_names,
-            implementation=self._implementation,
+            implementation=self._implementation,  # AttributeError: 'PandasSelector' object has no attribute '_implementation'
             backend_version=self._backend_version,
             version=self._version,
         )

From cc00fdc076c4518ad5639c1288cc1c57e793a634 Mon Sep 17 00:00:00 2001
From: dangotbanned <125183946+dangotbanned@users.noreply.github.com>
Date: Mon, 24 Feb 2025 12:47:02 +0000
Subject: [PATCH 40/55] don't forget `self`

https://github.com/narwhals-dev/narwhals/actions/runs/13498306238/job/37710441415?pr=2064#step:7:2647
---
 narwhals/_pandas_like/selectors.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/narwhals/_pandas_like/selectors.py b/narwhals/_pandas_like/selectors.py
index c3e332dfb7..d4eb8273cb 100644
--- a/narwhals/_pandas_like/selectors.py
+++ b/narwhals/_pandas_like/selectors.py
@@ -68,7 +68,7 @@ class PandasSelector(  # type: ignore[misc]
     if sys.version_info < (3, 9):
 
         def __init__(self, *args: Any, **kwds: Any) -> None:
-            super(PandasLikeExpr).__init__(*args, **kwds)
+            super(PandasLikeExpr, self).__init__(*args, **kwds)
 
     def _to_expr(self: Self) -> PandasLikeExpr:
         return PandasLikeExpr(

From b650c7817063c0c9fc65c78239fb9a37b7c7c3f6 Mon Sep 17 00:00:00 2001
From: dangotbanned <125183946+dangotbanned@users.noreply.github.com>
Date: Mon, 24 Feb 2025 13:10:21 +0000
Subject: [PATCH 41/55] fix(DRAFT): try removing
 `CompliantSelectorNamespace.__init__`

Aiming to indirectly fix https://github.com/narwhals-dev/narwhals/pull/2064#discussion_r1965980715
---
 narwhals/_selectors.py | 9 ---------
 1 file changed, 9 deletions(-)

diff --git a/narwhals/_selectors.py b/narwhals/_selectors.py
index 4c1a138114..a1f131c4e2 100644
--- a/narwhals/_selectors.py
+++ b/narwhals/_selectors.py
@@ -38,7 +38,6 @@
     from narwhals.typing import TimeUnit
     from narwhals.utils import Implementation
     from narwhals.utils import Version
-    from narwhals.utils import _FullContext
 
 
 SeriesT = TypeVar("SeriesT", bound="CompliantSeries")
@@ -158,14 +157,6 @@ def names(df: FrameT) -> Sequence[str]:
 
         return self._selector(series, names)
 
-    # NOTE: Can't reuse for `<3.11`
-    # - https://github.com/python/cpython/issues/88970
-    # - https://github.com/python/cpython/pull/31628
-    def __init__(self: Self, context: _FullContext, /) -> None:  # pragma: no cover
-        self._implementation = context._implementation
-        self._backend_version = context._backend_version
-        self._version = context._version
-
 
 class LazySelectorNamespace(
     CompliantSelectorNamespace[FrameT, SeriesT], Generic[FrameT, SeriesT], Protocol

From acd8cbf0d1fc33af096fd69e795b214f2353ec15 Mon Sep 17 00:00:00 2001
From: dangotbanned <125183946+dangotbanned@users.noreply.github.com>
Date: Mon, 24 Feb 2025 13:17:11 +0000
Subject: [PATCH 42/55] revert (72014548b6f49b864164167c2372fa43778556ad)

---
 narwhals/_pandas_like/selectors.py | 7 -------
 1 file changed, 7 deletions(-)

diff --git a/narwhals/_pandas_like/selectors.py b/narwhals/_pandas_like/selectors.py
index d4eb8273cb..097016ffa8 100644
--- a/narwhals/_pandas_like/selectors.py
+++ b/narwhals/_pandas_like/selectors.py
@@ -1,9 +1,7 @@
 from __future__ import annotations
 
-import sys
 from functools import partial
 from typing import TYPE_CHECKING
-from typing import Any
 from typing import Iterator
 
 from narwhals._pandas_like.dataframe import PandasLikeDataFrame
@@ -65,11 +63,6 @@ def __init__(self: Self, context: _FullContext, /) -> None:
 class PandasSelector(  # type: ignore[misc]
     CompliantSelector["PandasLikeDataFrame", "PandasLikeSeries"], PandasLikeExpr
 ):
-    if sys.version_info < (3, 9):
-
-        def __init__(self, *args: Any, **kwds: Any) -> None:
-            super(PandasLikeExpr, self).__init__(*args, **kwds)
-
     def _to_expr(self: Self) -> PandasLikeExpr:
         return PandasLikeExpr(
             self._call,  # AttributeError: 'PandasSelector' object has no attribute '_call'

From 3be58e607250beced85698f5e6d3ed197b082c9b Mon Sep 17 00:00:00 2001
From: dangotbanned <125183946+dangotbanned@users.noreply.github.com>
Date: Mon, 24 Feb 2025 13:22:29 +0000
Subject: [PATCH 43/55] refactor: try reusing `Protocol`'s `Generic

- Didn't realise `3.8` had a `.__class_getitem__`
- Possibly can replace `Protocol` w/ `Generic` in a `3.8` compat block
  - Provided this change doesn't break anything else
---
 narwhals/_selectors.py | 9 +++------
 1 file changed, 3 insertions(+), 6 deletions(-)

diff --git a/narwhals/_selectors.py b/narwhals/_selectors.py
index a1f131c4e2..d4a891574e 100644
--- a/narwhals/_selectors.py
+++ b/narwhals/_selectors.py
@@ -8,7 +8,6 @@
 from typing import Any
 from typing import Callable
 from typing import Collection
-from typing import Generic
 from typing import Iterable
 from typing import Iterator
 from typing import Protocol
@@ -49,7 +48,7 @@
 EvalNames: TypeAlias = Callable[[FrameT], Sequence[str]]
 
 
-class CompliantSelectorNamespace(Generic[FrameT, SeriesT], Protocol):
+class CompliantSelectorNamespace(Protocol[FrameT, SeriesT]):
     _implementation: Implementation
     _backend_version: tuple[int, ...]
     _version: Version
@@ -159,7 +158,7 @@ def names(df: FrameT) -> Sequence[str]:
 
 
 class LazySelectorNamespace(
-    CompliantSelectorNamespace[FrameT, SeriesT], Generic[FrameT, SeriesT], Protocol
+    CompliantSelectorNamespace[FrameT, SeriesT], Protocol[FrameT, SeriesT]
 ):
     def _iter_schema(self, df: FrameT) -> Iterator[tuple[str, DType]]:
         yield from df.schema.items()
@@ -168,9 +167,7 @@ def _iter_columns_dtypes(self, df: FrameT, /) -> Iterator[tuple[SeriesT, DType]]
         yield from zip(self._iter_columns(df), df.schema.values())
 
 
-class CompliantSelector(
-    CompliantExpr[FrameT, SeriesT], Generic[FrameT, SeriesT], Protocol
-):
+class CompliantSelector(CompliantExpr[FrameT, SeriesT], Protocol[FrameT, SeriesT]):
     @property
     def selectors(self) -> CompliantSelectorNamespace[FrameT, SeriesT]:
         return self.__narwhals_namespace__().selectors

From d57c877aea98bc18677437aa1f8eebc22a1f7ca7 Mon Sep 17 00:00:00 2001
From: dangotbanned <125183946+dangotbanned@users.noreply.github.com>
Date: Mon, 24 Feb 2025 13:34:28 +0000
Subject: [PATCH 44/55] refactor: try the same for `CompliantExpr`

- Related to https://github.com/narwhals-dev/narwhals/pull/2064#discussion_r1967646113
- Also in the bases of `CompliantSelector`
---
 narwhals/typing.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/narwhals/typing.py b/narwhals/typing.py
index 8f83cce8ec..2a662484cf 100644
--- a/narwhals/typing.py
+++ b/narwhals/typing.py
@@ -97,7 +97,7 @@ def schema(self) -> Mapping[str, DType]: ...
 )
 
 
-class CompliantExpr(Protocol, Generic[CompliantFrameT, CompliantSeriesT_co]):
+class CompliantExpr(Protocol[CompliantFrameT, CompliantSeriesT_co]):
     _implementation: Implementation
     _backend_version: tuple[int, ...]
     _version: Version

From b192deb9863bd862a1dde359cb5492352a06ed4c Mon Sep 17 00:00:00 2001
From: dangotbanned <125183946+dangotbanned@users.noreply.github.com>
Date: Mon, 24 Feb 2025 13:45:59 +0000
Subject: [PATCH 45/55] fix(DRAFT): try removing `Protocol` altogether?

https://github.com/narwhals-dev/narwhals/pull/2064#discussion_r1967646113
---
 narwhals/_selectors.py | 13 ++++++++++++-
 narwhals/typing.py     | 13 ++++++++++++-
 2 files changed, 24 insertions(+), 2 deletions(-)

diff --git a/narwhals/_selectors.py b/narwhals/_selectors.py
index d4a891574e..53c040274c 100644
--- a/narwhals/_selectors.py
+++ b/narwhals/_selectors.py
@@ -10,7 +10,6 @@
 from typing import Collection
 from typing import Iterable
 from typing import Iterator
-from typing import Protocol
 from typing import Sequence
 from typing import TypeVar
 from typing import overload
@@ -23,6 +22,18 @@
 from narwhals.utils import is_compliant_dataframe
 from narwhals.utils import is_tracks_depth
 
+if not TYPE_CHECKING:
+    import sys
+
+    if sys.version_info >= (3, 9):
+        from typing import Protocol
+    else:
+        from typing import Generic
+
+        Protocol = Generic
+else:
+    from typing import Protocol
+
 if TYPE_CHECKING:
     from datetime import timezone
 
diff --git a/narwhals/typing.py b/narwhals/typing.py
index 2a662484cf..be2f2f1831 100644
--- a/narwhals/typing.py
+++ b/narwhals/typing.py
@@ -5,11 +5,22 @@
 from typing import Callable
 from typing import Generic
 from typing import Literal
-from typing import Protocol
 from typing import Sequence
 from typing import TypeVar
 from typing import Union
 
+if not TYPE_CHECKING:
+    import sys
+
+    if sys.version_info >= (3, 9):
+        from typing import Protocol
+    else:
+        from typing import Generic
+
+        Protocol = Generic
+else:
+    from typing import Protocol
+
 if TYPE_CHECKING:
     from types import ModuleType
     from typing import Mapping

From 75c5a81d8fd6c7ae9d11059309fc7957931f677e Mon Sep 17 00:00:00 2001
From: dangotbanned <125183946+dangotbanned@users.noreply.github.com>
Date: Mon, 24 Feb 2025 13:50:45 +0000
Subject: [PATCH 46/55] fix: only replace `Protocol` for `CompliantExpr`

https://github.com/narwhals-dev/narwhals/actions/runs/13499519966/job/37714261755?pr=2064
---
 narwhals/typing.py | 11 +++++------
 1 file changed, 5 insertions(+), 6 deletions(-)

diff --git a/narwhals/typing.py b/narwhals/typing.py
index be2f2f1831..1efc12d926 100644
--- a/narwhals/typing.py
+++ b/narwhals/typing.py
@@ -5,6 +5,7 @@
 from typing import Callable
 from typing import Generic
 from typing import Literal
+from typing import Protocol
 from typing import Sequence
 from typing import TypeVar
 from typing import Union
@@ -13,13 +14,11 @@
     import sys
 
     if sys.version_info >= (3, 9):
-        from typing import Protocol
+        from typing import Protocol as Protocol38
     else:
-        from typing import Generic
-
-        Protocol = Generic
+        from typing import Generic as Protocol38
 else:
-    from typing import Protocol
+    from typing import Protocol as Protocol38
 
 if TYPE_CHECKING:
     from types import ModuleType
@@ -108,7 +107,7 @@ def schema(self) -> Mapping[str, DType]: ...
 )
 
 
-class CompliantExpr(Protocol[CompliantFrameT, CompliantSeriesT_co]):
+class CompliantExpr(Protocol38[CompliantFrameT, CompliantSeriesT_co]):
     _implementation: Implementation
     _backend_version: tuple[int, ...]
     _version: Version

From 372f1eb17ff05457b7b8fb9caa1f46da1ae3cd7d Mon Sep 17 00:00:00 2001
From: dangotbanned <125183946+dangotbanned@users.noreply.github.com>
Date: Mon, 24 Feb 2025 13:59:38 +0000
Subject: [PATCH 47/55] ignore coverage, remove comments

https://github.com/narwhals-dev/narwhals/actions/runs/13499608446/job/37714553903?pr=2064
---
 narwhals/_pandas_like/selectors.py | 10 ++++------
 narwhals/_selectors.py             |  4 ++--
 2 files changed, 6 insertions(+), 8 deletions(-)

diff --git a/narwhals/_pandas_like/selectors.py b/narwhals/_pandas_like/selectors.py
index 097016ffa8..ed1947d530 100644
--- a/narwhals/_pandas_like/selectors.py
+++ b/narwhals/_pandas_like/selectors.py
@@ -47,7 +47,7 @@ def _selector(
             function_name="selector",
             evaluate_output_names=evaluate_output_names,
             alias_output_names=None,
-            implementation=self._implementation,  # AttributeError: 'PandasSelector' object has no attribute '_implementation'
+            implementation=self._implementation,
             backend_version=self._backend_version,
             version=self._version,
         )
@@ -58,19 +58,17 @@ def __init__(self: Self, context: _FullContext, /) -> None:
         self._version = context._version
 
 
-# BUG: `3.8` Protocol?
-# https://github.com/narwhals-dev/narwhals/pull/2064#discussion_r1965980715
 class PandasSelector(  # type: ignore[misc]
     CompliantSelector["PandasLikeDataFrame", "PandasLikeSeries"], PandasLikeExpr
 ):
     def _to_expr(self: Self) -> PandasLikeExpr:
         return PandasLikeExpr(
-            self._call,  # AttributeError: 'PandasSelector' object has no attribute '_call'
+            self._call,
             depth=self._depth,
             function_name=self._function_name,
-            evaluate_output_names=self._evaluate_output_names,  # AttributeError: 'PandasSelector' object has no attribute '_evaluate_output_names'
+            evaluate_output_names=self._evaluate_output_names,
             alias_output_names=self._alias_output_names,
-            implementation=self._implementation,  # AttributeError: 'PandasSelector' object has no attribute '_implementation'
+            implementation=self._implementation,
             backend_version=self._backend_version,
             version=self._version,
         )
diff --git a/narwhals/_selectors.py b/narwhals/_selectors.py
index 53c040274c..09632f1ecc 100644
--- a/narwhals/_selectors.py
+++ b/narwhals/_selectors.py
@@ -22,7 +22,7 @@
 from narwhals.utils import is_compliant_dataframe
 from narwhals.utils import is_tracks_depth
 
-if not TYPE_CHECKING:
+if not TYPE_CHECKING:  # pragma: no cover
     import sys
 
     if sys.version_info >= (3, 9):
@@ -31,7 +31,7 @@
         from typing import Generic
 
         Protocol = Generic
-else:
+else:  # pragma: no cover
     from typing import Protocol
 
 if TYPE_CHECKING:

From c26d7ff4809d642f709f5920dd664aa1d07f34ca Mon Sep 17 00:00:00 2001
From: dangotbanned <125183946+dangotbanned@users.noreply.github.com>
Date: Sat, 1 Mar 2025 18:47:07 +0000
Subject: [PATCH 48/55] chore(typing): ignore spark issues

Revisit in #2044
---
 narwhals/_spark_like/selectors.py | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/narwhals/_spark_like/selectors.py b/narwhals/_spark_like/selectors.py
index 5c9e77a3d0..116d8c8bd5 100644
--- a/narwhals/_spark_like/selectors.py
+++ b/narwhals/_spark_like/selectors.py
@@ -17,14 +17,14 @@
     from narwhals.utils import _FullContext
 
 
-class SparkLikeSelectorNamespace(LazySelectorNamespace["SparkLikeLazyFrame", "Column"]):
+class SparkLikeSelectorNamespace(LazySelectorNamespace["SparkLikeLazyFrame", "Column"]):  # type: ignore[type-var] (#2044)
     def _iter_columns(self, df: SparkLikeLazyFrame) -> Iterator[Column]:
         for col in df.columns:
             yield df._F.col(col)
 
     def _selector(
         self,
-        call: EvalSeries[SparkLikeLazyFrame, Column],
+        call: EvalSeries[SparkLikeLazyFrame, Column],  # type: ignore[type-var] (#2044)
         evaluate_output_names: EvalNames[SparkLikeLazyFrame],
         /,
     ) -> SparkLikeSelector:
@@ -44,7 +44,7 @@ def __init__(self: Self, context: _FullContext, /) -> None:
         self._implementation = context._implementation
 
 
-class SparkLikeSelector(CompliantSelector["SparkLikeLazyFrame", "Column"], SparkLikeExpr):  # type: ignore[misc]
+class SparkLikeSelector(CompliantSelector["SparkLikeLazyFrame", "Column"], SparkLikeExpr):  # type: ignore[type-var, misc] (#2044)
     def _to_expr(self: Self) -> SparkLikeExpr:
         return SparkLikeExpr(
             self._call,

From b4f6a5134ac93adf199fadd30efb6de79a353bfa Mon Sep 17 00:00:00 2001
From: dangotbanned <125183946+dangotbanned@users.noreply.github.com>
Date: Sat, 1 Mar 2025 18:51:53 +0000
Subject: [PATCH 49/55] :unamused:

https://github.com/narwhals-dev/narwhals/actions/runs/13607778818/job/38041286499?pr=2064
---
 narwhals/_spark_like/selectors.py | 7 ++++---
 1 file changed, 4 insertions(+), 3 deletions(-)

diff --git a/narwhals/_spark_like/selectors.py b/narwhals/_spark_like/selectors.py
index 116d8c8bd5..d29cbf82ed 100644
--- a/narwhals/_spark_like/selectors.py
+++ b/narwhals/_spark_like/selectors.py
@@ -17,14 +17,15 @@
     from narwhals.utils import _FullContext
 
 
-class SparkLikeSelectorNamespace(LazySelectorNamespace["SparkLikeLazyFrame", "Column"]):  # type: ignore[type-var] (#2044)
+# NOTE: See issue regarding ignores (#2044)
+class SparkLikeSelectorNamespace(LazySelectorNamespace["SparkLikeLazyFrame", "Column"]):  # type: ignore[type-var]
     def _iter_columns(self, df: SparkLikeLazyFrame) -> Iterator[Column]:
         for col in df.columns:
             yield df._F.col(col)
 
     def _selector(
         self,
-        call: EvalSeries[SparkLikeLazyFrame, Column],  # type: ignore[type-var] (#2044)
+        call: EvalSeries[SparkLikeLazyFrame, Column],  # type: ignore[type-var]
         evaluate_output_names: EvalNames[SparkLikeLazyFrame],
         /,
     ) -> SparkLikeSelector:
@@ -44,7 +45,7 @@ def __init__(self: Self, context: _FullContext, /) -> None:
         self._implementation = context._implementation
 
 
-class SparkLikeSelector(CompliantSelector["SparkLikeLazyFrame", "Column"], SparkLikeExpr):  # type: ignore[type-var, misc] (#2044)
+class SparkLikeSelector(CompliantSelector["SparkLikeLazyFrame", "Column"], SparkLikeExpr):  # type: ignore[type-var, misc]
     def _to_expr(self: Self) -> SparkLikeExpr:
         return SparkLikeExpr(
             self._call,

From ec5dd0ca48415b7418034a3583ee6fc634172b7d Mon Sep 17 00:00:00 2001
From: dangotbanned <125183946+dangotbanned@users.noreply.github.com>
Date: Sat, 1 Mar 2025 19:10:09 +0000
Subject: [PATCH 50/55] refactor: Reuse eager-backend `iter_columns`

Utilizing (#2115) and (#2104)
---
 narwhals/_arrow/selectors.py       | 13 ++-----------
 narwhals/_pandas_like/selectors.py | 18 ++----------------
 narwhals/_selectors.py             |  8 ++++++++
 narwhals/typing.py                 |  2 ++
 4 files changed, 14 insertions(+), 27 deletions(-)

diff --git a/narwhals/_arrow/selectors.py b/narwhals/_arrow/selectors.py
index 8cdc379b2d..d9c74be112 100644
--- a/narwhals/_arrow/selectors.py
+++ b/narwhals/_arrow/selectors.py
@@ -1,11 +1,10 @@
 from __future__ import annotations
 
 from typing import TYPE_CHECKING
-from typing import Iterator
 
 from narwhals._arrow.expr import ArrowExpr
 from narwhals._selectors import CompliantSelector
-from narwhals._selectors import CompliantSelectorNamespace
+from narwhals._selectors import EagerSelectorNamespace
 
 if TYPE_CHECKING:
     from typing_extensions import Self
@@ -17,15 +16,7 @@
     from narwhals.utils import _FullContext
 
 
-class ArrowSelectorNamespace(CompliantSelectorNamespace["ArrowDataFrame", "ArrowSeries"]):
-    def _iter_columns(self, df: ArrowDataFrame) -> Iterator[ArrowSeries]:
-        from narwhals._arrow.series import ArrowSeries
-
-        for col, ser in zip(df.columns, df._native_frame.itercolumns()):
-            yield ArrowSeries(
-                ser, name=col, backend_version=df._backend_version, version=df._version
-            )
-
+class ArrowSelectorNamespace(EagerSelectorNamespace["ArrowDataFrame", "ArrowSeries"]):
     def _selector(
         self,
         call: EvalSeries[ArrowDataFrame, ArrowSeries],
diff --git a/narwhals/_pandas_like/selectors.py b/narwhals/_pandas_like/selectors.py
index ed1947d530..bdf5cf33cd 100644
--- a/narwhals/_pandas_like/selectors.py
+++ b/narwhals/_pandas_like/selectors.py
@@ -1,14 +1,12 @@
 from __future__ import annotations
 
-from functools import partial
 from typing import TYPE_CHECKING
-from typing import Iterator
 
 from narwhals._pandas_like.dataframe import PandasLikeDataFrame
 from narwhals._pandas_like.expr import PandasLikeExpr
 from narwhals._pandas_like.series import PandasLikeSeries
 from narwhals._selectors import CompliantSelector
-from narwhals._selectors import CompliantSelectorNamespace
+from narwhals._selectors import EagerSelectorNamespace
 
 if TYPE_CHECKING:
     from typing_extensions import Self
@@ -21,20 +19,8 @@
 
 
 class PandasSelectorNamespace(
-    CompliantSelectorNamespace["PandasLikeDataFrame", "PandasLikeSeries"]
+    EagerSelectorNamespace["PandasLikeDataFrame", "PandasLikeSeries"]
 ):
-    def _iter_columns(self, df: PandasLikeDataFrame) -> Iterator[PandasLikeSeries]:
-        from narwhals._pandas_like.series import PandasLikeSeries
-
-        series = partial(
-            PandasLikeSeries,
-            implementation=df._implementation,
-            backend_version=df._backend_version,
-            version=df._version,
-        )
-        for _col, ser in df._native_frame.items():  # noqa: PERF102
-            yield series(ser)
-
     def _selector(
         self,
         call: EvalSeries[PandasLikeDataFrame, PandasLikeSeries],
diff --git a/narwhals/_selectors.py b/narwhals/_selectors.py
index 09632f1ecc..9ef7b20c9c 100644
--- a/narwhals/_selectors.py
+++ b/narwhals/_selectors.py
@@ -52,6 +52,7 @@
 
 SeriesT = TypeVar("SeriesT", bound="CompliantSeries")
 FrameT = TypeVar("FrameT", bound="CompliantDataFrame[Any] | CompliantLazyFrame")
+DataFrameT = TypeVar("DataFrameT", bound="CompliantDataFrame[Any]")
 SelectorOrExpr: TypeAlias = (
     "CompliantSelector[FrameT, SeriesT] | CompliantExpr[FrameT, SeriesT]"
 )
@@ -168,6 +169,13 @@ def names(df: FrameT) -> Sequence[str]:
         return self._selector(series, names)
 
 
+class EagerSelectorNamespace(
+    CompliantSelectorNamespace[DataFrameT, SeriesT], Protocol[DataFrameT, SeriesT]
+):
+    def _iter_columns(self, df: DataFrameT, /) -> Iterator[SeriesT]:
+        yield from df.iter_columns()
+
+
 class LazySelectorNamespace(
     CompliantSelectorNamespace[FrameT, SeriesT], Protocol[FrameT, SeriesT]
 ):
diff --git a/narwhals/typing.py b/narwhals/typing.py
index c1d256ea72..e409568430 100644
--- a/narwhals/typing.py
+++ b/narwhals/typing.py
@@ -3,6 +3,7 @@
 from typing import TYPE_CHECKING
 from typing import Any
 from typing import Callable
+from typing import Iterator
 from typing import Literal
 from typing import Protocol
 from typing import Sequence
@@ -83,6 +84,7 @@ def columns(self) -> Sequence[str]: ...
     @property
     def schema(self) -> Mapping[str, DType]: ...
     def get_column(self, name: str) -> CompliantSeriesT_co: ...
+    def iter_columns(self) -> Iterator[CompliantSeriesT_co]: ...
 
 
 class CompliantLazyFrame(Protocol):

From c180c7efbd85ed78810b04625fc9ee87681e89d3 Mon Sep 17 00:00:00 2001
From: dangotbanned <125183946+dangotbanned@users.noreply.github.com>
Date: Sun, 2 Mar 2025 18:21:18 +0000
Subject: [PATCH 51/55] refactor: define `CompliantLazyFrame._iter_columns`

- Required some compatibility aliasing for `pandas`, `pyarrow`
  - They're faux-lazy

https://github.com/narwhals-dev/narwhals/pull/2064#discussion_r1976486623
---
 narwhals/_arrow/dataframe.py       |  2 ++
 narwhals/_dask/dataframe.py        |  6 ++++++
 narwhals/_dask/selectors.py        |  5 -----
 narwhals/_duckdb/dataframe.py      |  5 +++++
 narwhals/_duckdb/selectors.py      |  7 -------
 narwhals/_pandas_like/dataframe.py |  2 ++
 narwhals/_polars/dataframe.py      |  3 +++
 narwhals/_selectors.py             | 10 +++++++---
 narwhals/_spark_like/dataframe.py  |  5 +++++
 narwhals/_spark_like/selectors.py  |  5 -----
 narwhals/typing.py                 |  1 +
 11 files changed, 31 insertions(+), 20 deletions(-)

diff --git a/narwhals/_arrow/dataframe.py b/narwhals/_arrow/dataframe.py
index 2cb84475d0..e0709a84ff 100644
--- a/narwhals/_arrow/dataframe.py
+++ b/narwhals/_arrow/dataframe.py
@@ -164,6 +164,8 @@ def iter_columns(self) -> Iterator[ArrowSeries]:
                 version=self._version,
             )
 
+    _iter_columns = iter_columns
+
     def iter_rows(
         self: Self, *, named: bool, buffer_size: int
     ) -> Iterator[tuple[Any, ...]] | Iterator[dict[str, Any]]:
diff --git a/narwhals/_dask/dataframe.py b/narwhals/_dask/dataframe.py
index e77d05d742..e8dde409a3 100644
--- a/narwhals/_dask/dataframe.py
+++ b/narwhals/_dask/dataframe.py
@@ -2,6 +2,7 @@
 
 from typing import TYPE_CHECKING
 from typing import Any
+from typing import Iterator
 from typing import Literal
 from typing import Sequence
 
@@ -24,6 +25,7 @@
 if TYPE_CHECKING:
     from types import ModuleType
 
+    import dask.dataframe.dask_expr as dx
     from typing_extensions import Self
 
     from narwhals._dask.expr import DaskExpr
@@ -79,6 +81,10 @@ def _from_native_frame(self: Self, df: Any) -> Self:
             version=self._version,
         )
 
+    def _iter_columns(self) -> Iterator[dx.Series]:
+        for _col, ser in self._native_frame.items():  # noqa: PERF102
+            yield ser
+
     def with_columns(self: Self, *exprs: DaskExpr) -> Self:
         df = self._native_frame
         new_series = evaluate_exprs(self, *exprs)
diff --git a/narwhals/_dask/selectors.py b/narwhals/_dask/selectors.py
index bd42fc76c7..59c8dba474 100644
--- a/narwhals/_dask/selectors.py
+++ b/narwhals/_dask/selectors.py
@@ -1,7 +1,6 @@
 from __future__ import annotations
 
 from typing import TYPE_CHECKING
-from typing import Iterator
 
 from narwhals._dask.expr import DaskExpr
 from narwhals._selectors import CompliantSelector
@@ -27,10 +26,6 @@
 
 
 class DaskSelectorNamespace(LazySelectorNamespace["DaskLazyFrame", "dx.Series"]):  # pyright: ignore[reportInvalidTypeArguments]
-    def _iter_columns(self, df: DaskLazyFrame) -> Iterator[dx.Series]:
-        for _col, ser in df._native_frame.items():  # noqa: PERF102
-            yield ser
-
     def _selector(
         self,
         call: EvalSeries[DaskLazyFrame, dx.Series],  # pyright: ignore[reportInvalidTypeForm]
diff --git a/narwhals/_duckdb/dataframe.py b/narwhals/_duckdb/dataframe.py
index dc74eae824..e1fa303bd9 100644
--- a/narwhals/_duckdb/dataframe.py
+++ b/narwhals/_duckdb/dataframe.py
@@ -2,6 +2,7 @@
 
 from typing import TYPE_CHECKING
 from typing import Any
+from typing import Iterator
 from typing import Literal
 from typing import Sequence
 
@@ -85,6 +86,10 @@ def __getitem__(self: Self, item: str) -> DuckDBInterchangeSeries:
             self._native_frame.select(item), version=self._version
         )
 
+    def _iter_columns(self) -> Iterator[duckdb.Expression]:
+        for col in self.columns:
+            yield ColumnExpression(col)
+
     def collect(
         self: Self,
         backend: ModuleType | Implementation | str | None,
diff --git a/narwhals/_duckdb/selectors.py b/narwhals/_duckdb/selectors.py
index 9e99f0e78f..0e54fd3c76 100644
--- a/narwhals/_duckdb/selectors.py
+++ b/narwhals/_duckdb/selectors.py
@@ -1,9 +1,6 @@
 from __future__ import annotations
 
 from typing import TYPE_CHECKING
-from typing import Iterator
-
-from duckdb import ColumnExpression
 
 from narwhals._duckdb.expr import DuckDBExpr
 from narwhals._selectors import CompliantSelector
@@ -22,10 +19,6 @@
 class DuckDBSelectorNamespace(
     LazySelectorNamespace["DuckDBLazyFrame", "duckdb.Expression"]  # type: ignore[type-var]
 ):
-    def _iter_columns(self, df: DuckDBLazyFrame) -> Iterator[duckdb.Expression]:
-        for col in df.columns:
-            yield ColumnExpression(col)
-
     def _selector(
         self,
         call: EvalSeries[DuckDBLazyFrame, duckdb.Expression],  # type: ignore[type-var]
diff --git a/narwhals/_pandas_like/dataframe.py b/narwhals/_pandas_like/dataframe.py
index f2a336146e..e958575461 100644
--- a/narwhals/_pandas_like/dataframe.py
+++ b/narwhals/_pandas_like/dataframe.py
@@ -350,6 +350,8 @@ def iter_columns(self) -> Iterator[PandasLikeSeries]:
                 version=self._version,
             )
 
+    _iter_columns = iter_columns
+
     def iter_rows(
         self: Self,
         *,
diff --git a/narwhals/_polars/dataframe.py b/narwhals/_polars/dataframe.py
index bc25184e11..4683d59e7a 100644
--- a/narwhals/_polars/dataframe.py
+++ b/narwhals/_polars/dataframe.py
@@ -438,6 +438,9 @@ def func(*args: Any, **kwargs: Any) -> Any:
 
         return func
 
+    def _iter_columns(self) -> Iterator[PolarsSeries]:  # pragma: no cover
+        yield from self.collect(self._implementation).iter_columns()
+
     @property
     def columns(self: Self) -> list[str]:
         return self._native_frame.columns
diff --git a/narwhals/_selectors.py b/narwhals/_selectors.py
index 9ef7b20c9c..ac4f3cc0fa 100644
--- a/narwhals/_selectors.py
+++ b/narwhals/_selectors.py
@@ -53,6 +53,7 @@
 SeriesT = TypeVar("SeriesT", bound="CompliantSeries")
 FrameT = TypeVar("FrameT", bound="CompliantDataFrame[Any] | CompliantLazyFrame")
 DataFrameT = TypeVar("DataFrameT", bound="CompliantDataFrame[Any]")
+LazyFrameT = TypeVar("LazyFrameT", bound="CompliantLazyFrame")
 SelectorOrExpr: TypeAlias = (
     "CompliantSelector[FrameT, SeriesT] | CompliantExpr[FrameT, SeriesT]"
 )
@@ -177,12 +178,15 @@ def _iter_columns(self, df: DataFrameT, /) -> Iterator[SeriesT]:
 
 
 class LazySelectorNamespace(
-    CompliantSelectorNamespace[FrameT, SeriesT], Protocol[FrameT, SeriesT]
+    CompliantSelectorNamespace[LazyFrameT, SeriesT], Protocol[LazyFrameT, SeriesT]
 ):
-    def _iter_schema(self, df: FrameT) -> Iterator[tuple[str, DType]]:
+    def _iter_schema(self, df: LazyFrameT) -> Iterator[tuple[str, DType]]:
         yield from df.schema.items()
 
-    def _iter_columns_dtypes(self, df: FrameT, /) -> Iterator[tuple[SeriesT, DType]]:
+    def _iter_columns(self, df: LazyFrameT) -> Iterator[SeriesT]:
+        yield from df._iter_columns()
+
+    def _iter_columns_dtypes(self, df: LazyFrameT, /) -> Iterator[tuple[SeriesT, DType]]:
         yield from zip(self._iter_columns(df), df.schema.values())
 
 
diff --git a/narwhals/_spark_like/dataframe.py b/narwhals/_spark_like/dataframe.py
index d4a792e678..ec5d7aafd7 100644
--- a/narwhals/_spark_like/dataframe.py
+++ b/narwhals/_spark_like/dataframe.py
@@ -4,6 +4,7 @@
 from importlib import import_module
 from typing import TYPE_CHECKING
 from typing import Any
+from typing import Iterator
 from typing import Literal
 from typing import Sequence
 from typing import cast
@@ -195,6 +196,10 @@ def _collect_to_arrow(self) -> pa.Table:
             to_arrow: Incomplete = self._native_frame.toArrow
             return to_arrow()
 
+    def _iter_columns(self) -> Iterator[Column]:
+        for col in self.columns:
+            yield self._F.col(col)
+
     @property
     def columns(self: Self) -> list[str]:
         return list(self.schema)
diff --git a/narwhals/_spark_like/selectors.py b/narwhals/_spark_like/selectors.py
index d29cbf82ed..eb7ab72fae 100644
--- a/narwhals/_spark_like/selectors.py
+++ b/narwhals/_spark_like/selectors.py
@@ -1,7 +1,6 @@
 from __future__ import annotations
 
 from typing import TYPE_CHECKING
-from typing import Iterator
 
 from narwhals._selectors import CompliantSelector
 from narwhals._selectors import LazySelectorNamespace
@@ -19,10 +18,6 @@
 
 # NOTE: See issue regarding ignores (#2044)
 class SparkLikeSelectorNamespace(LazySelectorNamespace["SparkLikeLazyFrame", "Column"]):  # type: ignore[type-var]
-    def _iter_columns(self, df: SparkLikeLazyFrame) -> Iterator[Column]:
-        for col in df.columns:
-            yield df._F.col(col)
-
     def _selector(
         self,
         call: EvalSeries[SparkLikeLazyFrame, Column],  # type: ignore[type-var]
diff --git a/narwhals/typing.py b/narwhals/typing.py
index e409568430..30123513f4 100644
--- a/narwhals/typing.py
+++ b/narwhals/typing.py
@@ -101,6 +101,7 @@ def aggregate(self, *exprs: Any) -> Self:
     def columns(self) -> Sequence[str]: ...
     @property
     def schema(self) -> Mapping[str, DType]: ...
+    def _iter_columns(self) -> Iterator[Any]: ...
 
 
 CompliantFrameT = TypeVar(

From e85c934bec1bc3908aeab558b2bd75de64554041 Mon Sep 17 00:00:00 2001
From: dangotbanned <125183946+dangotbanned@users.noreply.github.com>
Date: Sun, 2 Mar 2025 18:22:12 +0000
Subject: [PATCH 52/55] docs: move note back into code

https://github.com/narwhals-dev/narwhals/pull/2064#discussion_r1976671296
---
 narwhals/_selectors.py | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/narwhals/_selectors.py b/narwhals/_selectors.py
index ac4f3cc0fa..0b7055fe18 100644
--- a/narwhals/_selectors.py
+++ b/narwhals/_selectors.py
@@ -80,6 +80,10 @@ def _iter_schema(self, df: FrameT, /) -> Iterator[tuple[str, DType]]:
             yield ser.name, ser.dtype
 
     def _iter_columns_dtypes(self, df: FrameT, /) -> Iterator[tuple[SeriesT, DType]]:
+        # NOTE: Defined to be overriden for lazy
+        # - Their `SeriesT` is a **native** object
+        #   - `.dtype` won't return a `nw.DType` (or maybe anything) for lazy backends
+        #   - See (https://github.com/narwhals-dev/narwhals/issues/2044)
         for ser in self._iter_columns(df):
             yield ser, ser.dtype
 

From a43d5e5732b69a49bdc880a675b3e01ba46e81a9 Mon Sep 17 00:00:00 2001
From: dangotbanned <125183946+dangotbanned@users.noreply.github.com>
Date: Sun, 2 Mar 2025 18:23:28 +0000
Subject: [PATCH 53/55] refactor: remove duplicate import

---
 narwhals/_dask/selectors.py | 5 -----
 1 file changed, 5 deletions(-)

diff --git a/narwhals/_dask/selectors.py b/narwhals/_dask/selectors.py
index 59c8dba474..9533721d49 100644
--- a/narwhals/_dask/selectors.py
+++ b/narwhals/_dask/selectors.py
@@ -19,11 +19,6 @@
     from narwhals._selectors import EvalSeries
     from narwhals.utils import _FullContext
 
-    try:
-        import dask.dataframe.dask_expr as dx
-    except ModuleNotFoundError:
-        import dask_expr as dx
-
 
 class DaskSelectorNamespace(LazySelectorNamespace["DaskLazyFrame", "dx.Series"]):  # pyright: ignore[reportInvalidTypeArguments]
     def _selector(

From f5765ffcc92cc49fc2fb1967ab13ea4d7ab79635 Mon Sep 17 00:00:00 2001
From: dangotbanned <125183946+dangotbanned@users.noreply.github.com>
Date: Sun, 2 Mar 2025 18:26:26 +0000
Subject: [PATCH 54/55] typo

https://results.pre-commit.ci/run/github/760058710/1740939895.mH7-kxPiRby66pjYWonI9A
---
 narwhals/_selectors.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/narwhals/_selectors.py b/narwhals/_selectors.py
index 0b7055fe18..41b6248e21 100644
--- a/narwhals/_selectors.py
+++ b/narwhals/_selectors.py
@@ -80,7 +80,7 @@ def _iter_schema(self, df: FrameT, /) -> Iterator[tuple[str, DType]]:
             yield ser.name, ser.dtype
 
     def _iter_columns_dtypes(self, df: FrameT, /) -> Iterator[tuple[SeriesT, DType]]:
-        # NOTE: Defined to be overriden for lazy
+        # NOTE: Defined to be overridden for lazy
         # - Their `SeriesT` is a **native** object
         #   - `.dtype` won't return a `nw.DType` (or maybe anything) for lazy backends
         #   - See (https://github.com/narwhals-dev/narwhals/issues/2044)

From cd31cbe243bdc150ae1e851dbce18b32ec3301f0 Mon Sep 17 00:00:00 2001
From: dangotbanned <125183946+dangotbanned@users.noreply.github.com>
Date: Sun, 2 Mar 2025 18:33:58 +0000
Subject: [PATCH 55/55] docs: add notes on `3.8` compat code

https://github.com/narwhals-dev/narwhals/pull/2064#pullrequestreview-2652771711
---
 narwhals/_selectors.py | 2 ++
 narwhals/typing.py     | 2 ++
 2 files changed, 4 insertions(+)

diff --git a/narwhals/_selectors.py b/narwhals/_selectors.py
index 41b6248e21..639b0eb740 100644
--- a/narwhals/_selectors.py
+++ b/narwhals/_selectors.py
@@ -23,6 +23,8 @@
 from narwhals.utils import is_tracks_depth
 
 if not TYPE_CHECKING:  # pragma: no cover
+    # TODO @dangotbanned: Remove after dropping `3.8` (#2084)
+    # - https://github.com/narwhals-dev/narwhals/pull/2064#discussion_r1965921386
     import sys
 
     if sys.version_info >= (3, 9):
diff --git a/narwhals/typing.py b/narwhals/typing.py
index 30123513f4..d131a32ec5 100644
--- a/narwhals/typing.py
+++ b/narwhals/typing.py
@@ -18,6 +18,8 @@
     else:
         from typing import Generic as Protocol38
 else:
+    # TODO @dangotbanned: Remove after dropping `3.8` (#2084)
+    # - https://github.com/narwhals-dev/narwhals/pull/2064#discussion_r1965921386
     from typing import Protocol as Protocol38
 
 if TYPE_CHECKING: