diff --git a/.github/workflows/typing.yml b/.github/workflows/typing.yml index 43c81d9a2f..33532531cd 100644 --- a/.github/workflows/typing.yml +++ b/.github/workflows/typing.yml @@ -31,7 +31,7 @@ jobs: run: uv venv .venv - name: install-reqs # TODO: add more dependencies/backends incrementally - run: uv pip install -e ".[pyspark]" --group core --group typing + run: uv pip install -e ".[pyspark]" --group core --group typing-ci - name: show-deps run: uv pip freeze - name: Run mypy and pyright diff --git a/Makefile b/Makefile index 44c744614f..c404003756 100644 --- a/Makefile +++ b/Makefile @@ -20,6 +20,6 @@ help: ## Display this help screen .PHONY: typing typing: ## Run typing checks - $(VENV_BIN)/uv pip install -e . --group typing + $(VENV_BIN)/uv pip install -e . --group typing-ci $(VENV_BIN)/pyright $(VENV_BIN)/mypy diff --git a/docs/api-reference/dataframe.md b/docs/api-reference/dataframe.md index 1524464c92..34762bdddb 100644 --- a/docs/api-reference/dataframe.md +++ b/docs/api-reference/dataframe.md @@ -58,3 +58,4 @@ - write_parquet show_source: false show_bases: false + inherited_members: true diff --git a/docs/api-reference/lazyframe.md b/docs/api-reference/lazyframe.md index b27800d8d9..970a87639c 100644 --- a/docs/api-reference/lazyframe.md +++ b/docs/api-reference/lazyframe.md @@ -34,3 +34,4 @@ show_root_heading: false show_source: false show_bases: false + inherited_members: true diff --git a/narwhals/_namespace.py b/narwhals/_namespace.py index efe2a2bcdf..ffd7a79390 100644 --- a/narwhals/_namespace.py +++ b/narwhals/_namespace.py @@ -7,9 +7,9 @@ Any, Callable, Generic, - Literal, Protocol, TypeVar, + cast, overload, ) @@ -37,8 +37,6 @@ import pandas as pd import polars as pl import pyarrow as pa - import pyspark.sql as pyspark_sql - from pyspark.sql.connect.dataframe import DataFrame as PySparkConnectDataFrame from typing_extensions import Self, TypeAlias, TypeIs from narwhals._arrow.namespace import ArrowNamespace @@ -68,30 +66,33 @@ _Guard: TypeAlias = "Callable[[Any], TypeIs[T]]" EagerAllowedNamespace: TypeAlias = "Namespace[PandasLikeNamespace] | Namespace[ArrowNamespace] | Namespace[PolarsNamespace]" + Incomplete: TypeAlias = Any class _BasePandasLike(Sized, Protocol): index: Any """`mypy` doesn't like the asymmetric `property` setter in `pandas`.""" def __getitem__(self, key: Any, /) -> Any: ... - def __mul__(self, other: float | Collection[float] | Self) -> Self: ... - def __floordiv__(self, other: float | Collection[float] | Self) -> Self: ... + def __mul__(self, other: float | Collection[float] | Self, /) -> Self: ... + def __floordiv__(self, other: float | Collection[float] | Self, /) -> Self: ... @property def loc(self) -> Any: ... @property def shape(self) -> tuple[int, ...]: ... def set_axis(self, labels: Any, *, axis: Any = ..., copy: bool = ...) -> Self: ... def copy(self, deep: bool = ...) -> Self: ... # noqa: FBT001 - def rename(self, *args: Any, inplace: Literal[False], **kwds: Any) -> Self: - """`inplace=False` is required to avoid (incorrect?) default overloads.""" - ... + def rename(self, *args: Any, **kwds: Any) -> Self | Incomplete: + """`mypy` & `pyright` disagree on overloads. + + `Incomplete` used to fix [more important issue](https://github.com/narwhals-dev/narwhals/pull/3016#discussion_r2296139744). + """ class _BasePandasLikeFrame(NativeDataFrame, _BasePandasLike, Protocol): ... class _BasePandasLikeSeries(NativeSeries, _BasePandasLike, Protocol): - def where(self, cond: Any, other: Any = ..., **kwds: Any) -> Any: ... + def where(self, cond: Any, other: Any = ..., /) -> Self | Incomplete: ... - class _NativeDask(Protocol): + class _NativeDask(NativeLazyFrame, Protocol): _partition_type: type[pd.DataFrame] class _CuDFDataFrame(_BasePandasLikeFrame, Protocol): @@ -112,6 +113,12 @@ class _ModinDataFrame(_BasePandasLikeFrame, Protocol): class _ModinSeries(_BasePandasLikeSeries, Protocol): _pandas_class: type[pd.Series[Any]] + # NOTE: Using `pyspark.sql.DataFrame` creates false positives in overloads when not installed + class _PySparkDataFrame(NativeLazyFrame, Protocol): + # Arbitrary method that `sqlframe` doesn't have and unlikely to appear anywhere else + # https://github.com/apache/spark/blob/8530444e25b83971da4314c608aa7d763adeceb3/python/pyspark/sql/dataframe.py#L4875 + def dropDuplicatesWithinWatermark(self, *arg: Any, **kwargs: Any) -> Any: ... # noqa: N802 + _NativePolars: TypeAlias = "pl.DataFrame | pl.LazyFrame | pl.Series" _NativeArrow: TypeAlias = "pa.Table | pa.ChunkedArray[Any]" _NativeDuckDB: TypeAlias = "duckdb.DuckDBPyRelation" @@ -124,8 +131,8 @@ class _ModinSeries(_BasePandasLikeSeries, Protocol): ) _NativePandasLike: TypeAlias = "_NativePandasLikeDataFrame |_NativePandasLikeSeries" _NativeSQLFrame: TypeAlias = "SQLFrameDataFrame" - _NativePySpark: TypeAlias = "pyspark_sql.DataFrame" - _NativePySparkConnect: TypeAlias = "PySparkConnectDataFrame" + _NativePySpark: TypeAlias = _PySparkDataFrame + _NativePySparkConnect: TypeAlias = _PySparkDataFrame _NativeSparkLike: TypeAlias = ( "_NativeSQLFrame | _NativePySpark | _NativePySparkConnect" ) @@ -371,8 +378,10 @@ def is_native_dask(obj: Any) -> TypeIs[_NativeDask]: is_native_duckdb: _Guard[_NativeDuckDB] = is_duckdb_relation is_native_sqlframe: _Guard[_NativeSQLFrame] = is_sqlframe_dataframe -is_native_pyspark: _Guard[_NativePySpark] = is_pyspark_dataframe -is_native_pyspark_connect: _Guard[_NativePySparkConnect] = is_pyspark_connect_dataframe +is_native_pyspark = cast("_Guard[_NativePySpark]", is_pyspark_dataframe) +is_native_pyspark_connect = cast( + "_Guard[_NativePySparkConnect]", is_pyspark_connect_dataframe +) def is_native_pandas(obj: Any) -> TypeIs[_NativePandas]: diff --git a/narwhals/_pandas_like/utils.py b/narwhals/_pandas_like/utils.py index 9e0102b624..a24f429351 100644 --- a/narwhals/_pandas_like/utils.py +++ b/narwhals/_pandas_like/utils.py @@ -3,7 +3,7 @@ import functools import operator import re -from typing import TYPE_CHECKING, Any, Callable, Literal, TypeVar +from typing import TYPE_CHECKING, Any, Callable, Literal, TypeVar, cast import pandas as pd @@ -202,8 +202,10 @@ def rename( if implementation is Implementation.PANDAS and ( implementation._backend_version() >= (3,) ): # pragma: no cover - return obj.rename(*args, **kwargs, inplace=False) - return obj.rename(*args, **kwargs, copy=False, inplace=False) + result = obj.rename(*args, **kwargs, inplace=False) + else: + result = obj.rename(*args, **kwargs, copy=False, inplace=False) + return cast("NativeNDFrameT", result) # type: ignore[redundant-cast] @functools.lru_cache(maxsize=16) diff --git a/narwhals/_utils.py b/narwhals/_utils.py index 7c9bf6c820..4d26dd5673 100644 --- a/narwhals/_utils.py +++ b/narwhals/_utils.py @@ -73,14 +73,40 @@ NativeSeriesT_co, ) from narwhals._compliant.typing import EvalNames, NativeDataFrameT, NativeLazyFrameT - from narwhals._namespace import Namespace + from narwhals._namespace import ( + Namespace, + _NativeArrow, + _NativeCuDF, + _NativeDask, + _NativeDuckDB, + _NativeIbis, + _NativeModin, + _NativePandas, + _NativePandasLike, + _NativePolars, + _NativePySpark, + _NativePySparkConnect, + _NativeSQLFrame, + ) from narwhals._translate import ArrowStreamExportable, IntoArrowTable, ToNarwhalsT_co from narwhals._typing import ( Backend, IntoBackend, + _ArrowImpl, + _CudfImpl, + _DaskImpl, + _DuckDBImpl, _EagerAllowedImpl, + _IbisImpl, _LazyAllowedImpl, _LazyFrameCollectImpl, + _ModinImpl, + _PandasImpl, + _PandasLikeImpl, + _PolarsImpl, + _PySparkConnectImpl, + _PySparkImpl, + _SQLFrameImpl, ) from narwhals.dataframe import DataFrame, LazyFrame from narwhals.dtypes import DType @@ -141,7 +167,7 @@ def columns(self) -> Sequence[str]: ... _Constructor: TypeAlias = "Callable[Concatenate[_T, P], R2]" -class _StoresNative(Protocol[NativeT_co]): # noqa: PYI046 +class _StoresNative(Protocol[NativeT_co]): """Provides access to a native object. Native objects have types like: @@ -2034,3 +2060,91 @@ def deep_attrgetter(attr: str, *nested: str) -> attrgetter[Any]: def deep_getattr(obj: Any, name_1: str, *nested: str) -> Any: """Perform a nested attribute lookup on `obj`.""" return deep_attrgetter(name_1, *nested)(obj) + + +class Compliant( + _StoresNative[NativeT_co], _StoresImplementation, Protocol[NativeT_co] +): ... + + +class Narwhals(Protocol[NativeT_co]): + """Minimal *Narwhals-level* protocol. + + Provides access to a compliant object: + + obj: Narwhals[NativeT_co]] + compliant: Compliant[NativeT_co] = obj._compliant + + Which itself exposes: + + implementation: Implementation = compliant.implementation + native: NativeT_co = compliant.native + + This interface is used for revealing which `Implementation` member is associated with **either**: + - One or more [nominal] native type(s) + - One or more [structural] type(s) + - where the true native type(s) are [assignable to] *at least* one of them + + These relationships are defined in the `@overload`s of `_Implementation.__get__(...)`. + + [nominal]: https://typing.python.org/en/latest/spec/glossary.html#term-nominal + [structural]: https://typing.python.org/en/latest/spec/glossary.html#term-structural + [assignable to]: https://typing.python.org/en/latest/spec/glossary.html#term-assignable + """ + + @property + def _compliant(self) -> Compliant[NativeT_co]: ... + + +class _Implementation: + """Descriptor for matching an opaque `Implementation` on a generic class. + + Based on [pyright comment](https://github.com/microsoft/pyright/issues/3071#issuecomment-1043978070) + """ + + def __set_name__(self, owner: type[Any], name: str) -> None: + self.__name__: str = name + + @overload + def __get__(self, instance: Narwhals[_NativePolars], owner: Any) -> _PolarsImpl: ... + @overload + def __get__(self, instance: Narwhals[_NativePandas], owner: Any) -> _PandasImpl: ... + @overload + def __get__(self, instance: Narwhals[_NativeModin], owner: Any) -> _ModinImpl: ... + @overload # TODO @dangotbanned: Rename `_typing` `*Cudf*` aliases to `*CuDF*` + def __get__(self, instance: Narwhals[_NativeCuDF], owner: Any) -> _CudfImpl: ... + @overload + def __get__( + self, instance: Narwhals[_NativePandasLike], owner: Any + ) -> _PandasLikeImpl: ... + @overload + def __get__(self, instance: Narwhals[_NativeArrow], owner: Any) -> _ArrowImpl: ... + @overload + def __get__( + self, instance: Narwhals[_NativePolars | _NativeArrow | _NativePandas], owner: Any + ) -> _PolarsImpl | _PandasImpl | _ArrowImpl: ... + @overload + def __get__(self, instance: Narwhals[_NativeDuckDB], owner: Any) -> _DuckDBImpl: ... + @overload + def __get__( + self, instance: Narwhals[_NativeSQLFrame], owner: Any + ) -> _SQLFrameImpl: ... + @overload + def __get__(self, instance: Narwhals[_NativeDask], owner: Any) -> _DaskImpl: ... + @overload + def __get__(self, instance: Narwhals[_NativeIbis], owner: Any) -> _IbisImpl: ... + @overload + def __get__( + self, instance: Narwhals[_NativePySpark | _NativePySparkConnect], owner: Any + ) -> _PySparkImpl | _PySparkConnectImpl: ... + # NOTE: https://docs.python.org/3/howto/descriptor.html#invocation-from-a-class + @overload + def __get__(self, instance: None, owner: type[Narwhals[Any]]) -> Self: ... + @overload + def __get__( + self, instance: DataFrame[Any] | Series[Any], owner: Any + ) -> _EagerAllowedImpl: ... + @overload + def __get__(self, instance: LazyFrame[Any], owner: Any) -> _LazyAllowedImpl: ... + def __get__(self, instance: Narwhals[Any] | None, owner: Any) -> Any: + return self if instance is None else instance._compliant._implementation diff --git a/narwhals/dataframe.py b/narwhals/dataframe.py index 0519dab653..c0f5e1e05d 100644 --- a/narwhals/dataframe.py +++ b/narwhals/dataframe.py @@ -27,6 +27,7 @@ from narwhals._utils import ( Implementation, Version, + _Implementation, can_lazyframe_collect, check_columns_exist, flatten, @@ -105,6 +106,31 @@ class BaseFrame(Generic[_FrameT]): _compliant_frame: Any _level: Literal["full", "lazy", "interchange"] + implementation: _Implementation = _Implementation() + """Return [`narwhals.Implementation`][] of native frame. + + This can be useful when you need to use special-casing for features outside of + Narwhals' scope - for example, when dealing with pandas' Period Dtype. + + Examples: + >>> import narwhals as nw + >>> import pandas as pd + >>> df_native = pd.DataFrame({"a": [1, 2, 3]}) + >>> df = nw.from_native(df_native) + >>> df.implementation + + >>> df.implementation.is_pandas() + True + >>> df.implementation.is_pandas_like() + True + >>> df.implementation.is_polars() + False + """ + + @property + @abstractmethod + def _compliant(self) -> Any: ... + def __native_namespace__(self) -> ModuleType: return self._compliant_frame.__native_namespace__() # type: ignore[no-any-return] @@ -446,6 +472,10 @@ class DataFrame(BaseFrame[DataFrameT]): _version: ClassVar[Version] = Version.MAIN + @property + def _compliant(self) -> CompliantDataFrame[Any, Any, DataFrameT, Self]: + return self._compliant_frame + def _extract_compliant(self, arg: Any) -> Any: if is_into_expr_eager(arg): plx: EagerNamespaceAny = self.__narwhals_namespace__() @@ -652,29 +682,6 @@ def from_numpy( ) raise ValueError(msg) - @property - def implementation(self) -> Implementation: - """Return implementation of native frame. - - This can be useful when you need to use special-casing for features outside of - Narwhals' scope - for example, when dealing with pandas' Period Dtype. - - Examples: - >>> import narwhals as nw - >>> import pandas as pd - >>> df_native = pd.DataFrame({"a": [1, 2, 3]}) - >>> df = nw.from_native(df_native) - >>> df.implementation - - >>> df.implementation.is_pandas() - True - >>> df.implementation.is_pandas_like() - True - >>> df.implementation.is_polars() - False - """ - return self._compliant_frame._implementation - def __len__(self) -> int: return self._compliant_frame.__len__() @@ -2276,6 +2283,10 @@ class LazyFrame(BaseFrame[LazyFrameT]): ``` """ + @property + def _compliant(self) -> CompliantLazyFrame[Any, LazyFrameT, Self]: + return self._compliant_frame + def _extract_compliant(self, arg: Any) -> Any: from narwhals.expr import Expr from narwhals.series import Series @@ -2327,22 +2338,6 @@ def __init__(self, df: Any, *, level: Literal["full", "lazy", "interchange"]) -> def __repr__(self) -> str: # pragma: no cover return generate_repr("Narwhals LazyFrame", self.to_native().__repr__()) - @property - def implementation(self) -> Implementation: - """Return implementation of native frame. - - This can be useful when you need to use special-casing for features outside of - Narwhals' scope - for example, when dealing with pandas' Period Dtype. - - Examples: - >>> import narwhals as nw - >>> import dask.dataframe as dd - >>> lf_native = dd.from_dict({"a": [1, 2]}, npartitions=1) - >>> nw.from_native(lf_native).implementation - - """ - return self._compliant_frame._implementation - def __getitem__(self, item: str | slice) -> NoReturn: msg = "Slicing is not supported on LazyFrame" raise TypeError(msg) diff --git a/narwhals/series.py b/narwhals/series.py index 1a5f29a75c..c28ff05464 100644 --- a/narwhals/series.py +++ b/narwhals/series.py @@ -7,6 +7,7 @@ from narwhals._utils import ( Implementation, Version, + _Implementation, _validate_rolling_arguments, ensure_type, generate_repr, @@ -78,6 +79,10 @@ class Series(Generic[IntoSeriesT]): _version: ClassVar[Version] = Version.MAIN + @property + def _compliant(self) -> CompliantSeries[IntoSeriesT]: + return self._compliant_series + @property def _dataframe(self) -> type[DataFrame[Any]]: from narwhals.dataframe import DataFrame @@ -226,33 +231,26 @@ def from_iterable( ) raise ValueError(msg) - @property - def implementation(self) -> Implementation: - """Return implementation of native Series. - - This can be useful when you need to use special-casing for features outside of - Narwhals' scope - for example, when dealing with pandas' Period Dtype. - - Examples: - >>> import narwhals as nw - >>> import pandas as pd - - >>> s_native = pd.Series([1, 2, 3]) - >>> s = nw.from_native(s_native, series_only=True) - - >>> s.implementation - - - >>> s.implementation.is_pandas() - True - - >>> s.implementation.is_pandas_like() - True - - >>> s.implementation.is_polars() - False - """ - return self._compliant_series._implementation + implementation: _Implementation = _Implementation() + """Return [`narwhals.Implementation`][] of native Series. + + This can be useful when you need to use special-casing for features outside of + Narwhals' scope - for example, when dealing with pandas' Period Dtype. + + Examples: + >>> import narwhals as nw + >>> import pandas as pd + >>> s_native = pd.Series([1, 2, 3]) + >>> s = nw.from_native(s_native, series_only=True) + >>> s.implementation + + >>> s.implementation.is_pandas() + True + >>> s.implementation.is_pandas_like() + True + >>> s.implementation.is_polars() + False + """ def __bool__(self) -> NoReturn: msg = ( diff --git a/pyproject.toml b/pyproject.toml index 0c5d157ab8..303e0a0bf4 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -76,6 +76,10 @@ typing = [ # keep some of these pinned and bump periodically so there's fewer s "uv", "narwhals[ibis]", ] +typing-ci = [ + "narwhals[dask,modin]", + {include-group = "typing"} +] docs = [ "black", # required by mkdocstrings_handlers "jinja2", diff --git a/tests/expr_and_series/is_close_test.py b/tests/expr_and_series/is_close_test.py index a40ee0312e..c5bb4df551 100644 --- a/tests/expr_and_series/is_close_test.py +++ b/tests/expr_and_series/is_close_test.py @@ -11,7 +11,6 @@ import pytest import narwhals as nw -from narwhals._utils import is_eager_allowed from narwhals.exceptions import ComputeError, InvalidOperationError from tests.conftest import ( dask_lazy_p1_constructor, @@ -114,11 +113,8 @@ def test_is_close_series_with_series( ) -> None: df = nw.from_native(constructor_eager(data), eager_only=True) x, y = df["x"], df["y"] - backend = df.implementation - assert is_eager_allowed(backend) - nulls = nw.new_series( - name="nulls", values=[None] * len(x), dtype=nw.Float64(), backend=backend + "nulls", [None] * len(x), nw.Float64(), backend=df.implementation ) x = x.zip_with(x != NAN_PLACEHOLDER, x**0.5).zip_with(x != NULL_PLACEHOLDER, nulls) y = y.zip_with(y != NAN_PLACEHOLDER, y**0.5).zip_with(y != NULL_PLACEHOLDER, nulls) @@ -141,11 +137,8 @@ def test_is_close_series_with_scalar( ) -> None: df = nw.from_native(constructor_eager(data), eager_only=True) y = df["y"] - backend = df.implementation - assert is_eager_allowed(backend) - nulls = nw.new_series( - name="nulls", values=[None] * len(y), dtype=nw.Float64(), backend=backend + "nulls", [None] * len(y), nw.Float64(), backend=df.implementation ) y = y.zip_with(y != NAN_PLACEHOLDER, y**0.5).zip_with(y != NULL_PLACEHOLDER, nulls) result = y.is_close(other, abs_tol=abs_tol, rel_tol=rel_tol, nans_equal=nans_equal) diff --git a/tests/implementation_test.py b/tests/implementation_test.py index c6f7ccffc8..1915e112d0 100644 --- a/tests/implementation_test.py +++ b/tests/implementation_test.py @@ -1,9 +1,28 @@ from __future__ import annotations +# Using pyright's assert type instead +# mypy: disable-error-code="assert-type" +from typing import TYPE_CHECKING, Any, cast + import pytest import narwhals as nw +if TYPE_CHECKING: + from narwhals._typing import ( + _ArrowImpl, + _DaskImpl, + _DuckDBImpl, + _EagerAllowedImpl, + _IbisImpl, + _LazyAllowedImpl, + _ModinImpl, + _PandasImpl, + _PolarsImpl, + _SQLFrameImpl, + ) + from narwhals.typing import IntoDataFrame, IntoLazyFrame, IntoSeries + def test_implementation_pandas() -> None: pytest.importorskip("pandas") @@ -52,3 +71,248 @@ def test_implementation_polars() -> None: ) def test_implementation_new(member: str, value: str) -> None: assert nw.Implementation(value) is getattr(nw.Implementation, member) + + +_TYPING_ONLY_TESTS = "_" +"""Exhaustive checks for overload matching native -> implementation. + +## Arrange +Each test defines a function accepting a `native` frame. + +Important: + We *must* use the concrete types and therefore the type checker *needs* the package installed. + +Next, wrap `native` as a `nw.{Data,Lazy}Frame`. + +Note: + If we support multiple native types, use `native` to generate `nw.{LazyFrame,Series}` as well. + +Finally, look-up `.implementation` on all wrapped objects. + +## Act +Try passing every result (`*_impl`) to functions that *only* accept a **subset** of `Implementation`. + +This step *may require* a `# (type|pyright): ignore` directive, which defines the `# [... Negative]` result. +Otherwise, results are labelled with `# [... Positive]`. + +If this *static* label matches *runtime* we use `# [True ...]`, otherwise `# [False ...]`. + +Tip: + `# [False Negative]`s are the most frustrating for users. + Always try to minimize warning on safe code. + +## Assert +The action determined whether or not our typing warns on an `@overload` match. + +We still need to use [`assert_type`] to verify which `Implementation`(s) were returned as a result. + +[`assert_type`]: https://typing-extensions.readthedocs.io/en/latest/#typing_extensions.assert_type +""" +if TYPE_CHECKING: + import dask.dataframe as dd + import duckdb + import ibis + import modin.pandas as mpd + import pandas as pd + import polars as pl + import pyarrow as pa + from sqlframe.base.dataframe import BaseDataFrame + from typing_extensions import assert_type + + any_df: nw.DataFrame[Any] = cast("nw.DataFrame[Any]", "") + any_ldf: nw.LazyFrame[Any] = cast("nw.LazyFrame[Any]", "") + any_ser: nw.Series[Any] = cast("nw.Series[Any]", "") + bound_df: nw.DataFrame[IntoDataFrame] = cast("nw.DataFrame[IntoDataFrame]", "") + bound_ldf: nw.LazyFrame[IntoLazyFrame] = cast("nw.LazyFrame[IntoLazyFrame]", "") + bound_ser: nw.Series[IntoSeries] = cast("nw.Series[IntoSeries]", "") + + def test_polars_typing(native: pl.DataFrame) -> None: + df = nw.from_native(native) + ldf = nw.from_native(native.lazy()) + ser = nw.from_native(native.to_series(), series_only=True) + + df_impl = df.implementation + ldf_impl = ldf.implementation + ser_impl = ser.implementation + + # [True Positive] + any_df.lazy(df_impl) + any_df.lazy(ldf_impl) + any_df.lazy(ser_impl) + any_ldf.collect(df_impl) + any_ldf.collect(ldf_impl) + any_ldf.collect(ser_impl) + + assert_type(df_impl, _PolarsImpl) + assert_type(ldf_impl, _PolarsImpl) + assert_type(ser_impl, _PolarsImpl) + + def test_pandas_typing(native: pd.DataFrame) -> None: + df = nw.from_native(native) + ldf = nw.from_native(native).lazy() + ser = nw.from_native(native.iloc[0], series_only=True) + + df_impl = df.implementation + ldf_impl = ldf.implementation + ser_impl = ser.implementation + + # [True Negative] + any_df.lazy(df_impl) # type: ignore[arg-type] + # [False Positive] + any_df.lazy(ldf_impl) + # [True Negative] + any_df.lazy(ser_impl) # pyright: ignore[reportArgumentType] + # [True Positive] + any_ldf.collect(df_impl) + any_ldf.collect(ldf_impl) + any_ldf.collect(ser_impl) + + assert_type(df_impl, _PandasImpl) + # NOTE: Would require adding overloads to `DataFrame.lazy` + assert_type(ldf_impl, _PandasImpl) # pyright: ignore[reportAssertTypeFailure] + assert_type(ser_impl, _PandasImpl) + + def test_arrow_typing(native: pa.Table) -> None: + df = nw.from_native(native) + ldf = nw.from_native(native).lazy() + ser = nw.from_native(native.column(0), series_only=True) + + df_impl = df.implementation + ldf_impl = ldf.implementation + ser_impl = ser.implementation + + # [True Negative] + any_df.lazy(df_impl) # type: ignore[arg-type] + # [False Positive] + any_df.lazy(ldf_impl) + # [True Negative] + any_df.lazy(ser_impl) # pyright: ignore[reportArgumentType] + # [True Positive] + any_ldf.collect(df_impl) + any_ldf.collect(ldf_impl) + any_ldf.collect(ser_impl) + + assert_type(df_impl, _ArrowImpl) + # NOTE: Would require adding overloads to `DataFrame.lazy` + assert_type(ldf_impl, _ArrowImpl) # pyright: ignore[reportAssertTypeFailure] + assert_type(ser_impl, _ArrowImpl) + + def test_duckdb_typing(native: duckdb.DuckDBPyRelation) -> None: + ldf = nw.from_native(native) + + ldf_impl = ldf.implementation + + # [True Positive] + any_df.lazy(ldf_impl) + # [True Negative] + any_ldf.collect(ldf_impl) # type: ignore[arg-type] + + assert_type(ldf.implementation, _DuckDBImpl) + + def test_sqlframe_typing(native: BaseDataFrame[Any, Any, Any, Any, Any]) -> None: + ldf = nw.from_native(native) + + ldf_impl = ldf.implementation + + # [True Positive] + any_df.lazy(ldf_impl) + # [True Negative] + any_ldf.collect(ldf_impl) # pyright: ignore[reportArgumentType] + + assert_type(ldf.implementation, _SQLFrameImpl) + + def test_ibis_typing(native: ibis.Table) -> None: + ldf = nw.from_native(native) + + ldf_impl = ldf.implementation + + # [True Positive] + any_df.lazy(ldf_impl) + # [True Negative] + any_ldf.collect(ldf_impl) # pyright: ignore[reportArgumentType] + + assert_type(ldf.implementation, _IbisImpl) + + def test_dask_typing(native: dd.DataFrame) -> None: + ldf = nw.from_native(native) + + ldf_impl = ldf.implementation + + # [True Positive] + any_df.lazy(ldf_impl) + # [True Negative] + any_ldf.collect(ldf_impl) # pyright: ignore[reportArgumentType] + + assert_type(ldf.implementation, _DaskImpl) + + def test_modin_typing(native: mpd.DataFrame) -> None: + df = nw.from_native(native) + # NOTE: Arbitrary method that returns a `Series` + ser = nw.from_native(native.duplicated(), series_only=True) + + df_impl = df.implementation + ser_impl = ser.implementation + + # [True Negative] + any_df.lazy(df_impl) # pyright: ignore[reportArgumentType] + any_df.lazy(ser_impl) # pyright: ignore[reportArgumentType] + any_ldf.collect(df_impl) # pyright: ignore[reportArgumentType] + any_ldf.collect(ser_impl) # pyright: ignore[reportArgumentType] + + assert_type(df_impl, _ModinImpl) + assert_type(ser_impl, _ModinImpl) + + def test_any_typing() -> None: + df_impl = any_df.implementation + ldf_impl = any_ldf.implementation + ser_impl = any_ser.implementation + + # [False Positive] + any_df.lazy(df_impl) + any_df.lazy(ldf_impl) + any_df.lazy(ser_impl) + any_ldf.collect(df_impl) + any_ldf.collect(ldf_impl) + any_ldf.collect(ser_impl) + + assert_type(df_impl, _EagerAllowedImpl) # pyright: ignore[reportAssertTypeFailure] + assert_type(ldf_impl, _LazyAllowedImpl) # pyright: ignore[reportAssertTypeFailure] + assert_type(ser_impl, _EagerAllowedImpl) # pyright: ignore[reportAssertTypeFailure] + # Fallback, matches the first overload `_PolarsImpl` + assert_type(df_impl, _PolarsImpl) + assert_type(ldf_impl, _PolarsImpl) + assert_type(ser_impl, _PolarsImpl) + + def test_bound_typing() -> None: + df_impl = bound_df.implementation + ldf_impl = bound_ldf.implementation + ser_impl = bound_ser.implementation + + # [True Negative] + any_df.lazy(df_impl) # type: ignore[arg-type] + # [True Positive] + any_df.lazy(ldf_impl) + # [True Negative] + any_df.lazy(ser_impl) # type: ignore[arg-type] + any_ldf.collect(df_impl) # type: ignore[arg-type] + any_ldf.collect(ldf_impl) # type: ignore[arg-type] + any_ldf.collect(ser_impl) # type: ignore[arg-type] + + assert_type(df_impl, _EagerAllowedImpl) + assert_type(ldf_impl, _LazyAllowedImpl) + assert_type(ser_impl, _EagerAllowedImpl) + + def test_mixed_eager_typing( + *args: nw.DataFrame[pl.DataFrame | pd.DataFrame | pa.Table] + | nw.Series[pl.Series | pd.Series[Any] | pa.ChunkedArray[Any]], + ) -> None: + # NOTE: Any combination of eager objects that **does not** include `cuDF`, `modin` should + # preserve that detail + mix_impl = args[0].implementation + + # [True Negative] + any_df.lazy(mix_impl) # type: ignore[arg-type] + # [True Positive] + any_ldf.collect(mix_impl) + + assert_type(mix_impl, _PolarsImpl | _PandasImpl | _ArrowImpl) diff --git a/tests/namespace_test.py b/tests/namespace_test.py index b1eae1df66..1445c0aad6 100644 --- a/tests/namespace_test.py +++ b/tests/namespace_test.py @@ -81,7 +81,7 @@ def test_namespace_from_native_object(constructor: Constructor) -> None: def test_namespace_from_native_object_invalid() -> None: data = {"a": [1, 2, 3], "b": [4, 5, 6]} with pytest.raises(TypeError, match=r"dict"): - Namespace.from_native_object(data) # pyright: ignore[reportCallIssue, reportArgumentType] + Namespace.from_native_object(data) # type: ignore[call-overload] @eager_allowed diff --git a/utils/check_api_reference.py b/utils/check_api_reference.py index 01c5ae889e..3233b24b1c 100644 --- a/utils/check_api_reference.py +++ b/utils/check_api_reference.py @@ -5,8 +5,8 @@ import sys # ruff: noqa: N806 -from collections import deque -from inspect import isfunction +from collections import OrderedDict, deque +from inspect import isfunction, ismethoddescriptor from pathlib import Path from types import MethodType, ModuleType from typing import TYPE_CHECKING, Any @@ -24,13 +24,14 @@ def _is_public_method_or_property(obj: Any) -> bool: return ( - isfunction(obj) or isinstance(obj, (MethodType, property)) + isfunction(obj) + or (isinstance(obj, (MethodType, property)) or ismethoddescriptor(obj)) ) and obj.__name__.startswith(LOWERCASE) else: def _is_public_method_or_property(obj: Any) -> bool: return ( - (isfunction(obj) or isinstance(obj, MethodType)) + (isfunction(obj) or (isinstance(obj, MethodType) or ismethoddescriptor(obj))) and obj.__name__.startswith(LOWERCASE) ) or (isinstance(obj, property) and obj.fget.__name__.startswith(LOWERCASE)) @@ -222,7 +223,11 @@ def read_documented_members(source: str | Path) -> list[str]: # Schema schema_methods = list(iter_api_reference_names(nw.Schema)) documented = read_documented_members(DIR_API_REF / "schema.md") -if missing := set(schema_methods).difference(documented): +if ( + missing := set(schema_methods) + .difference(documented) + .difference(iter_api_reference_names(OrderedDict)) +): print("Schema: not documented") # noqa: T201 print(missing) # noqa: T201 ret = 1