diff --git a/docs/api-reference/typing.md b/docs/api-reference/typing.md index ae8c983d27..dc9a483ee6 100644 --- a/docs/api-reference/typing.md +++ b/docs/api-reference/typing.md @@ -18,8 +18,6 @@ Narwhals comes fully statically typed. In addition to `nw.DataFrame`, `nw.Expr`, - IntoSeries - IntoSeriesT - IntoBackend - - IntoEagerBackend - - IntoLazyBackend - IntoDType - IntoSchema - SizeUnit diff --git a/narwhals/_arrow/dataframe.py b/narwhals/_arrow/dataframe.py index afc7e3f67e..bf41a7ed28 100644 --- a/narwhals/_arrow/dataframe.py +++ b/narwhals/_arrow/dataframe.py @@ -44,13 +44,12 @@ ) from narwhals._compliant.typing import CompliantDataFrameAny, CompliantLazyFrameAny from narwhals._translate import IntoArrowTable + from narwhals._typing import _EagerAllowedImpl, _LazyAllowedImpl from narwhals._utils import Version, _LimitedContext from narwhals.dtypes import DType from narwhals.typing import ( - EagerImplementation, IntoSchema, JoinStrategy, - LazyImplementation, SizedMultiIndexSelector, SizedMultiNameSelector, SizeUnit, @@ -513,7 +512,7 @@ def tail(self, n: int) -> Self: ) return self._with_native(df.slice(abs(n)), validate_column_names=False) - def lazy(self, backend: LazyImplementation | None = None) -> CompliantLazyFrameAny: + def lazy(self, backend: _LazyAllowedImpl | None = None) -> CompliantLazyFrameAny: if backend is None: return self if backend is Implementation.DUCKDB: @@ -559,7 +558,7 @@ def lazy(self, backend: LazyImplementation | None = None) -> CompliantLazyFrameA raise AssertionError # pragma: no cover def collect( - self, backend: EagerImplementation | None, **kwargs: Any + self, backend: _EagerAllowedImpl | None, **kwargs: Any ) -> CompliantDataFrameAny: if backend is Implementation.PYARROW or backend is None: from narwhals._arrow.dataframe import ArrowDataFrame diff --git a/narwhals/_compliant/dataframe.py b/narwhals/_compliant/dataframe.py index 463750e262..1cd8023a32 100644 --- a/narwhals/_compliant/dataframe.py +++ b/narwhals/_compliant/dataframe.py @@ -49,16 +49,15 @@ from narwhals._compliant.group_by import CompliantGroupBy, DataFrameGroupBy from narwhals._compliant.namespace import EagerNamespace from narwhals._translate import IntoArrowTable + from narwhals._typing import _EagerAllowedImpl, _LazyAllowedImpl from narwhals._utils import Implementation, _LimitedContext from narwhals.dataframe import DataFrame from narwhals.dtypes import DType from narwhals.exceptions import ColumnNotFoundError from narwhals.typing import ( AsofJoinStrategy, - EagerImplementation, IntoSchema, JoinStrategy, - LazyImplementation, LazyUniqueKeepStrategy, MultiColSelector, MultiIndexSelector, @@ -155,7 +154,7 @@ def schema(self) -> Mapping[str, DType]: ... def shape(self) -> tuple[int, int]: ... def clone(self) -> Self: ... def collect( - self, backend: EagerImplementation | None, **kwargs: Any + self, backend: _EagerAllowedImpl | None, **kwargs: Any ) -> CompliantDataFrameAny: ... def collect_schema(self) -> Mapping[str, DType]: ... def drop(self, columns: Sequence[str], *, strict: bool) -> Self: ... @@ -198,7 +197,7 @@ def join_asof( strategy: AsofJoinStrategy, suffix: str, ) -> Self: ... - def lazy(self, backend: LazyImplementation | None) -> CompliantLazyFrameAny: ... + def lazy(self, backend: _LazyAllowedImpl | None) -> CompliantLazyFrameAny: ... def pivot( self, on: Sequence[str], @@ -300,7 +299,7 @@ def columns(self) -> Sequence[str]: ... def schema(self) -> Mapping[str, DType]: ... def _iter_columns(self) -> Iterator[Any]: ... def collect( - self, backend: EagerImplementation | None, **kwargs: Any + self, backend: _EagerAllowedImpl | None, **kwargs: Any ) -> CompliantDataFrameAny: ... def collect_schema(self) -> Mapping[str, DType]: ... def drop(self, columns: Sequence[str], *, strict: bool) -> Self: ... diff --git a/narwhals/_dask/dataframe.py b/narwhals/_dask/dataframe.py index 2ae3593bc5..4fcdde9001 100644 --- a/narwhals/_dask/dataframe.py +++ b/narwhals/_dask/dataframe.py @@ -33,16 +33,12 @@ from narwhals._dask.expr import DaskExpr from narwhals._dask.group_by import DaskLazyGroupBy from narwhals._dask.namespace import DaskNamespace + from narwhals._typing import _EagerAllowedImpl from narwhals._utils import Version, _LimitedContext from narwhals.dataframe import LazyFrame from narwhals.dtypes import DType from narwhals.exceptions import ColumnNotFoundError - from narwhals.typing import ( - AsofJoinStrategy, - EagerImplementation, - JoinStrategy, - LazyUniqueKeepStrategy, - ) + from narwhals.typing import AsofJoinStrategy, JoinStrategy, LazyUniqueKeepStrategy Incomplete: TypeAlias = "Any" """Using `_pandas_like` utils with `_dask`. @@ -116,7 +112,7 @@ def with_columns(self, *exprs: DaskExpr) -> Self: return self._with_native(self.native.assign(**dict(new_series))) def collect( - self, backend: EagerImplementation | None, **kwargs: Any + self, backend: _EagerAllowedImpl | None, **kwargs: Any ) -> CompliantDataFrameAny: result = self.native.compute(**kwargs) diff --git a/narwhals/_duckdb/dataframe.py b/narwhals/_duckdb/dataframe.py index 08ce4fcb4c..eed7fd62c1 100644 --- a/narwhals/_duckdb/dataframe.py +++ b/narwhals/_duckdb/dataframe.py @@ -47,17 +47,12 @@ from narwhals._duckdb.group_by import DuckDBGroupBy from narwhals._duckdb.namespace import DuckDBNamespace from narwhals._duckdb.series import DuckDBInterchangeSeries + from narwhals._typing import _EagerAllowedImpl, _LazyAllowedImpl from narwhals._utils import _LimitedContext from narwhals.dataframe import LazyFrame from narwhals.dtypes import DType from narwhals.stable.v1 import DataFrame as DataFrameV1 - from narwhals.typing import ( - AsofJoinStrategy, - EagerImplementation, - JoinStrategy, - LazyImplementation, - LazyUniqueKeepStrategy, - ) + from narwhals.typing import AsofJoinStrategy, JoinStrategy, LazyUniqueKeepStrategy class DuckDBLazyFrame( @@ -135,7 +130,7 @@ def _iter_columns(self) -> Iterator[Expression]: yield col(name) def collect( - self, backend: EagerImplementation | None, **kwargs: Any + self, backend: _EagerAllowedImpl | None, **kwargs: Any ) -> CompliantDataFrameAny: if backend is None or backend is Implementation.PYARROW: from narwhals._arrow.dataframe import ArrowDataFrame @@ -193,7 +188,7 @@ def drop(self, columns: Sequence[str], *, strict: bool) -> Self: selection = (name for name in self.columns if name not in columns_to_drop) return self._with_native(self.native.select(*selection)) - def lazy(self, backend: LazyImplementation | None = None) -> Self: + def lazy(self, backend: _LazyAllowedImpl | None = None) -> Self: # The `backend`` argument has no effect but we keep it here for # backwards compatibility because in `narwhals.stable.v1` # function `.from_native()` will return a DataFrame for DuckDB. diff --git a/narwhals/_ibis/dataframe.py b/narwhals/_ibis/dataframe.py index 707b894f79..0b55054962 100644 --- a/narwhals/_ibis/dataframe.py +++ b/narwhals/_ibis/dataframe.py @@ -33,17 +33,12 @@ from narwhals._ibis.group_by import IbisGroupBy from narwhals._ibis.namespace import IbisNamespace from narwhals._ibis.series import IbisInterchangeSeries + from narwhals._typing import _EagerAllowedImpl, _LazyAllowedImpl from narwhals._utils import _LimitedContext from narwhals.dataframe import LazyFrame from narwhals.dtypes import DType from narwhals.stable.v1 import DataFrame as DataFrameV1 - from narwhals.typing import ( - AsofJoinStrategy, - EagerImplementation, - JoinStrategy, - LazyImplementation, - LazyUniqueKeepStrategy, - ) + from narwhals.typing import AsofJoinStrategy, JoinStrategy, LazyUniqueKeepStrategy JoinPredicates: TypeAlias = "Sequence[ir.BooleanColumn] | Sequence[str]" @@ -107,7 +102,7 @@ def _iter_columns(self) -> Iterator[ir.Expr]: yield self.native[name] def collect( - self, backend: EagerImplementation | None, **kwargs: Any + self, backend: _EagerAllowedImpl | None, **kwargs: Any ) -> CompliantDataFrameAny: if backend is None or backend is Implementation.PYARROW: from narwhals._arrow.dataframe import ArrowDataFrame @@ -169,7 +164,7 @@ def drop(self, columns: Sequence[str], *, strict: bool) -> Self: selection = (col for col in self.columns if col not in columns_to_drop) return self._with_native(self.native.select(*selection)) - def lazy(self, backend: LazyImplementation | None = None) -> Self: + def lazy(self, backend: _LazyAllowedImpl | None = None) -> Self: # The `backend`` argument has no effect but we keep it here for # backwards compatibility because in `narwhals.stable.v1` # function `.from_native()` will return a DataFrame for Ibis. diff --git a/narwhals/_namespace.py b/narwhals/_namespace.py index 6c0c913d43..4f937eb7ef 100644 --- a/narwhals/_namespace.py +++ b/narwhals/_namespace.py @@ -49,34 +49,25 @@ from narwhals._polars.namespace import PolarsNamespace from narwhals._spark_like.dataframe import SQLFrameDataFrame from narwhals._spark_like.namespace import SparkLikeNamespace - from narwhals.typing import ( + from narwhals._typing import ( Arrow, + Backend, Dask, - DataFrameLike, DuckDB, EagerAllowed, Ibis, IntoBackend, - NativeFrame, - NativeLazyFrame, - NativeSeries, PandasLike, Polars, SparkLike, ) + from narwhals.typing import DataFrameLike, NativeFrame, NativeLazyFrame, NativeSeries T = TypeVar("T") _Guard: TypeAlias = "Callable[[Any], TypeIs[T]]" EagerAllowedNamespace: TypeAlias = "Namespace[PandasLikeNamespace] | Namespace[ArrowNamespace] | Namespace[PolarsNamespace]" - EagerAllowedImplementation: TypeAlias = Literal[ - Implementation.PANDAS, - Implementation.CUDF, - Implementation.MODIN, - Implementation.PYARROW, - Implementation.POLARS, - ] class _BasePandasLike(Sized, Protocol): index: Any @@ -214,12 +205,12 @@ def from_backend(cls, backend: EagerAllowed, /) -> EagerAllowedNamespace: ... @overload @classmethod def from_backend( - cls, backend: IntoBackend, / + cls, backend: IntoBackend[Backend], / ) -> Namespace[CompliantNamespaceAny]: ... @classmethod def from_backend( - cls: type[Namespace[Any]], backend: IntoBackend, / + cls: type[Namespace[Any]], backend: IntoBackend[Backend], / ) -> Namespace[Any]: """Instantiate from native namespace module, string, or Implementation. @@ -333,6 +324,7 @@ def from_native_object( def from_native_object( cls: type[Namespace[Any]], native: NativeAny, / ) -> Namespace[Any]: + impl: Backend if is_native_polars(native): impl = Implementation.POLARS elif is_native_pandas(native): diff --git a/narwhals/_pandas_like/dataframe.py b/narwhals/_pandas_like/dataframe.py index 50c8f1c472..d37585d5b6 100644 --- a/narwhals/_pandas_like/dataframe.py +++ b/narwhals/_pandas_like/dataframe.py @@ -47,15 +47,14 @@ from narwhals._pandas_like.group_by import PandasLikeGroupBy from narwhals._pandas_like.namespace import PandasLikeNamespace from narwhals._translate import IntoArrowTable + from narwhals._typing import _EagerAllowedImpl, _LazyAllowedImpl from narwhals._utils import Version, _LimitedContext from narwhals.dtypes import DType from narwhals.typing import ( AsofJoinStrategy, DTypeBackend, - EagerImplementation, IntoSchema, JoinStrategy, - LazyImplementation, PivotAgg, SizedMultiIndexSelector, SizedMultiNameSelector, @@ -492,7 +491,7 @@ def sort(self, *by: str, descending: bool | Sequence[bool], nulls_last: bool) -> # --- convert --- def collect( - self, backend: EagerImplementation | None, **kwargs: Any + self, backend: _EagerAllowedImpl | None, **kwargs: Any ) -> CompliantDataFrameAny: if backend is None: return PandasLikeDataFrame( @@ -760,7 +759,7 @@ def unique( ) # --- lazy-only --- - def lazy(self, backend: LazyImplementation | None = None) -> CompliantLazyFrameAny: + def lazy(self, backend: _LazyAllowedImpl | None = None) -> CompliantLazyFrameAny: pandas_df = self.to_pandas() if backend is None: return self diff --git a/narwhals/_polars/dataframe.py b/narwhals/_polars/dataframe.py index 19442f0381..1adbf1196f 100644 --- a/narwhals/_polars/dataframe.py +++ b/narwhals/_polars/dataframe.py @@ -40,14 +40,13 @@ from narwhals._polars.expr import PolarsExpr from narwhals._polars.group_by import PolarsGroupBy, PolarsLazyGroupBy from narwhals._translate import IntoArrowTable + from narwhals._typing import _EagerAllowedImpl, _LazyAllowedImpl from narwhals._utils import Version, _LimitedContext from narwhals.dataframe import DataFrame, LazyFrame from narwhals.dtypes import DType from narwhals.typing import ( - EagerImplementation, IntoSchema, JoinStrategy, - LazyImplementation, MultiColSelector, MultiIndexSelector, PivotAgg, @@ -442,7 +441,7 @@ def iter_columns(self) -> Iterator[PolarsSeries]: for series in self.native.iter_columns(): yield PolarsSeries.from_native(series, context=self) - def lazy(self, backend: LazyImplementation | None = None) -> CompliantLazyFrameAny: + def lazy(self, backend: _LazyAllowedImpl | None = None) -> CompliantLazyFrameAny: if backend is None or backend is Implementation.POLARS: return PolarsLazyFrame.from_native(self.native.lazy(), context=self) if backend is Implementation.DUCKDB: @@ -589,7 +588,7 @@ def collect_schema(self) -> dict[str, DType]: raise catch_polars_exception(e) from None def collect( - self, backend: EagerImplementation | None, **kwargs: Any + self, backend: _EagerAllowedImpl | None, **kwargs: Any ) -> CompliantDataFrameAny: try: result = self.native.collect(**kwargs) diff --git a/narwhals/_spark_like/dataframe.py b/narwhals/_spark_like/dataframe.py index 4a6ad7345d..ca66a1ed0b 100644 --- a/narwhals/_spark_like/dataframe.py +++ b/narwhals/_spark_like/dataframe.py @@ -41,10 +41,11 @@ from narwhals._spark_like.expr import SparkLikeExpr from narwhals._spark_like.group_by import SparkLikeLazyGroupBy from narwhals._spark_like.namespace import SparkLikeNamespace + from narwhals._typing import _EagerAllowedImpl from narwhals._utils import Version, _LimitedContext from narwhals.dataframe import LazyFrame from narwhals.dtypes import DType - from narwhals.typing import EagerImplementation, JoinStrategy, LazyUniqueKeepStrategy + from narwhals.typing import JoinStrategy, LazyUniqueKeepStrategy SQLFrameDataFrame = BaseDataFrame[Any, Any, Any, Any, Any] @@ -198,7 +199,7 @@ def columns(self) -> list[str]: return self._cached_columns def _collect( - self, backend: EagerImplementation | None, **kwargs: Any + self, backend: _EagerAllowedImpl | None, **kwargs: Any ) -> CompliantDataFrameAny: if backend is Implementation.PANDAS: from narwhals._pandas_like.dataframe import PandasLikeDataFrame @@ -236,7 +237,7 @@ def _collect( raise ValueError(msg) # pragma: no cover def collect( - self, backend: EagerImplementation | None, **kwargs: Any + self, backend: _EagerAllowedImpl | None, **kwargs: Any ) -> CompliantDataFrameAny: if self._implementation.is_pyspark_connect(): try: diff --git a/narwhals/_typing.py b/narwhals/_typing.py new file mode 100644 index 0000000000..ddac8bd1ef --- /dev/null +++ b/narwhals/_typing.py @@ -0,0 +1,141 @@ +from __future__ import annotations + +from types import ModuleType +from typing import TYPE_CHECKING, Literal, Union + +from narwhals._typing_compat import TypeVar +from narwhals._utils import Implementation + +if TYPE_CHECKING: + from typing_extensions import TypeAlias + +# `str` aliases +_Polars: TypeAlias = Literal["polars"] +_Arrow: TypeAlias = Literal["pyarrow"] +_Dask: TypeAlias = Literal["dask"] +_DuckDB: TypeAlias = Literal["duckdb"] +_Pandas: TypeAlias = Literal["pandas"] +_Modin: TypeAlias = Literal["modin"] +_Cudf: TypeAlias = Literal["cudf"] +_PySpark: TypeAlias = Literal["pyspark"] +_SQLFrame: TypeAlias = Literal["sqlframe"] +_PySparkConnect: TypeAlias = Literal["pyspark[connect]"] +_Ibis: TypeAlias = Literal["ibis"] +_PandasLike: TypeAlias = Literal[_Pandas, _Cudf, _Modin] +_SparkLike: TypeAlias = Literal[_PySpark, _SQLFrame, _PySparkConnect] +_EagerOnly: TypeAlias = Literal[_PandasLike, _Arrow] +_EagerAllowed: TypeAlias = Literal[_Polars, _EagerOnly] +_LazyOnly: TypeAlias = Literal[_SparkLike, _Dask, _DuckDB, _Ibis] +_LazyAllowed: TypeAlias = Literal[_Polars, _LazyOnly] + +# `Implementation` aliases +_PandasImpl: TypeAlias = Literal[Implementation.PANDAS] +_ModinImpl: TypeAlias = Literal[Implementation.MODIN] +_CudfImpl: TypeAlias = Literal[Implementation.CUDF] +_PySparkImpl: TypeAlias = Literal[Implementation.PYSPARK] +_SQLFrameImpl: TypeAlias = Literal[Implementation.SQLFRAME] +_PySparkConnectImpl: TypeAlias = Literal[Implementation.PYSPARK_CONNECT] +_PolarsImpl: TypeAlias = Literal[Implementation.POLARS] +_ArrowImpl: TypeAlias = Literal[Implementation.PYARROW] +_DaskImpl: TypeAlias = Literal[Implementation.DASK] +_DuckDBImpl: TypeAlias = Literal[Implementation.DUCKDB] +_IbisImpl: TypeAlias = Literal[Implementation.IBIS] +_PandasLikeImpl: TypeAlias = Literal[_PandasImpl, _CudfImpl, _ModinImpl] +_SparkLikeImpl: TypeAlias = Literal[_PySparkImpl, _SQLFrameImpl, _PySparkConnectImpl] +_EagerOnlyImpl: TypeAlias = Literal[_PandasLikeImpl, _ArrowImpl] +_EagerAllowedImpl: TypeAlias = Literal[_EagerOnlyImpl, _PolarsImpl] # noqa: PYI047 +_LazyOnlyImpl: TypeAlias = Literal[_SparkLikeImpl, _DaskImpl, _DuckDBImpl, _IbisImpl] +_LazyAllowedImpl: TypeAlias = Literal[_LazyOnlyImpl, _PolarsImpl] # noqa: PYI047 + +# NOTE: Temporary aliases for gaps in `LazyFrame.collect`, `DataFrame.lazy`, see: +# - https://github.com/narwhals-dev/narwhals/pull/2971#discussion_r2277137003 +# - https://github.com/narwhals-dev/narwhals/pull/3002#issuecomment-3194267667 +_LazyFrameCollectImpl: TypeAlias = Literal[_PandasImpl, _PolarsImpl, _ArrowImpl] # noqa: PYI047 +_DataFrameLazyImpl: TypeAlias = Literal[_PolarsImpl, _DaskImpl, _DuckDBImpl, _IbisImpl] # noqa: PYI047 + +# `str | Implementation` aliases +Pandas: TypeAlias = Literal[_Pandas, _PandasImpl] +Cudf: TypeAlias = Literal[_Cudf, _CudfImpl] +Modin: TypeAlias = Literal[_Modin, _ModinImpl] +PySpark: TypeAlias = Literal[_PySpark, _PySparkImpl] +SQLFrame: TypeAlias = Literal[_SQLFrame, _SQLFrameImpl] +PySparkConnect: TypeAlias = Literal[_PySparkConnect, _PySparkConnectImpl] +Polars: TypeAlias = Literal[_Polars, _PolarsImpl] +Arrow: TypeAlias = Literal[_Arrow, _ArrowImpl] +Dask: TypeAlias = Literal[_Dask, _DaskImpl] +DuckDB: TypeAlias = Literal[_DuckDB, _DuckDBImpl] +Ibis: TypeAlias = Literal[_Ibis, _IbisImpl] +PandasLike: TypeAlias = Literal[_PandasLike, _PandasLikeImpl] +SparkLike: TypeAlias = Literal[_SparkLike, _SparkLikeImpl] +EagerOnly: TypeAlias = Literal[PandasLike, Arrow] +EagerAllowed: TypeAlias = Literal[EagerOnly, Polars] +LazyOnly: TypeAlias = Literal[SparkLike, Dask, DuckDB, Ibis] +LazyAllowed: TypeAlias = Literal[LazyOnly, Polars] + +BackendName: TypeAlias = Literal[_EagerAllowed, _LazyAllowed] +Backend: TypeAlias = Literal[EagerAllowed, LazyAllowed] + +BackendT = TypeVar("BackendT", bound=Backend) +IntoBackend: TypeAlias = Union[BackendT, ModuleType] +"""Anything that can be converted into a Narwhals Implementation. + +`backend` can be specified in three ways. + +Examples: + A string backend name, such as: `"pandas"`, `"pyarrow"`, `"modin"`, `"cudf"` + + >>> import pandas as pd + >>> import narwhals as nw + >>> + >>> data = {"c": [5, 2], "d": [1, 4]} + >>> nw.DataFrame.from_dict(data, backend="pandas") + ┌──────────────────┐ + |Narwhals DataFrame| + |------------------| + | c d | + | 0 5 1 | + | 1 2 4 | + └──────────────────┘ + + An Implementation, such as: `Implementation.POLARS`, `Implementation.DUCKDB`, `Implementation.PYSPARK` + + >>> import narwhals as nw + >>> nw.read_parquet("file.parquet", backend=nw.Implementation.PYARROW) + ┌──────────────────┐ + |Narwhals DataFrame| + |------------------| + | pyarrow.Table | + | a: int64 | + | b: int64 | + | ---- | + | a: [[1,2]] | + | b: [[4,5]] | + └──────────────────┘ + + A python module, such as `dask`, `ibis`, `sqlframe` + + >>> import numpy as np + >>> import polars as pl + >>> import narwhals as nw + >>> + >>> arr = np.arange(5, 10) + >>> nw.Series.from_numpy("arr", arr, dtype=nw.Int8, backend=pl) + ┌──────────────────┐ + | Narwhals Series | + |------------------| + |shape: (5,) | + |Series: 'arr' [i8]| + |[ | + | 5 | + | 6 | + | 7 | + | 8 | + | 9 | + |] | + └──────────────────┘ +""" + + +IntoBackendAny: TypeAlias = IntoBackend[Backend] +IntoBackendEager: TypeAlias = IntoBackend[EagerAllowed] +IntoBackendLazy: TypeAlias = IntoBackend[LazyAllowed] diff --git a/narwhals/_utils.py b/narwhals/_utils.py index 7332c387f0..0a88a9f3c3 100644 --- a/narwhals/_utils.py +++ b/narwhals/_utils.py @@ -73,8 +73,15 @@ NativeSeriesT_co, ) from narwhals._compliant.typing import EvalNames - from narwhals._namespace import EagerAllowedImplementation, Namespace + from narwhals._namespace import Namespace from narwhals._translate import ArrowStreamExportable, IntoArrowTable, ToNarwhalsT_co + from narwhals._typing import ( + Backend, + IntoBackend, + _DataFrameLazyImpl, + _EagerAllowedImpl, + _LazyFrameCollectImpl, + ) from narwhals.dataframe import DataFrame, LazyFrame from narwhals.dtypes import DType from narwhals.series import Series @@ -84,7 +91,6 @@ CompliantSeries, DataFrameLike, DTypes, - IntoBackend, IntoSeriesT, MultiIndexSelector, SingleIndexSelector, @@ -357,7 +363,7 @@ def from_string(cls: type[Self], backend_name: str) -> Implementation: @classmethod def from_backend( - cls: type[Self], backend: UnknownBackendName | IntoBackend + cls: type[Self], backend: IntoBackend[Backend] | UnknownBackendName ) -> Implementation: """Instantiate from native namespace module, string, or Implementation. @@ -1599,8 +1605,9 @@ def is_compliant_expr( return hasattr(obj, "__narwhals_expr__") -def is_eager_allowed(obj: Implementation) -> TypeIs[EagerAllowedImplementation]: - return obj in { +def is_eager_allowed(impl: Implementation, /) -> TypeIs[_EagerAllowedImpl]: + """Return True if `impl` allows eager operations.""" + return impl in { Implementation.PANDAS, Implementation.MODIN, Implementation.CUDF, @@ -1609,20 +1616,14 @@ def is_eager_allowed(obj: Implementation) -> TypeIs[EagerAllowedImplementation]: } -_CanCollectInto: TypeAlias = Literal[ - Implementation.PANDAS, Implementation.POLARS, Implementation.PYARROW -] -_CanLazyInto: TypeAlias = Literal[ - Implementation.DASK, Implementation.DUCKDB, Implementation.POLARS, Implementation.IBIS -] - - -def can_collect_into(obj: Implementation) -> TypeIs[_CanCollectInto]: - return obj in {Implementation.PANDAS, Implementation.POLARS, Implementation.PYARROW} +def can_lazyframe_collect(impl: Implementation, /) -> TypeIs[_LazyFrameCollectImpl]: + """Return True if `LazyFrame.collect(impl)` is allowed.""" + return impl in {Implementation.PANDAS, Implementation.POLARS, Implementation.PYARROW} -def can_lazy_into(obj: Implementation) -> TypeIs[_CanLazyInto]: - return obj in { +def can_dataframe_lazy(impl: Implementation, /) -> TypeIs[_DataFrameLazyImpl]: + """Return True if `DataFrame.lazy(impl)` is allowed.""" + return impl in { Implementation.DASK, Implementation.DUCKDB, Implementation.POLARS, diff --git a/narwhals/dataframe.py b/narwhals/dataframe.py index 5549ba2ff7..f2c6133853 100644 --- a/narwhals/dataframe.py +++ b/narwhals/dataframe.py @@ -22,13 +22,12 @@ check_expressions_preserve_length, is_scalar_like, ) +from narwhals._typing import Arrow, Pandas, _DataFrameLazyImpl, _LazyFrameCollectImpl from narwhals._utils import ( Implementation, Version, - _CanCollectInto, - _CanLazyInto, - can_collect_into, - can_lazy_into, + can_dataframe_lazy, + can_lazyframe_collect, check_columns_exist, flatten, generate_repr, @@ -72,14 +71,13 @@ from narwhals._compliant import CompliantDataFrame, CompliantLazyFrame from narwhals._compliant.typing import CompliantExprAny, EagerNamespaceAny from narwhals._translate import IntoArrowTable + from narwhals._typing import Dask, DuckDB, EagerAllowed, Ibis, IntoBackend, Polars from narwhals.group_by import GroupBy, LazyGroupBy from narwhals.typing import ( AsofJoinStrategy, IntoDataFrame, - IntoEagerBackend, IntoExpr, IntoFrame, - IntoLazyBackend, IntoSchema, JoinStrategy, LazyUniqueKeepStrategy, @@ -480,7 +478,7 @@ def __init__(self, df: Any, *, level: Literal["full", "lazy", "interchange"]) -> @classmethod def from_arrow( - cls, native_frame: IntoArrowTable, *, backend: IntoEagerBackend + cls, native_frame: IntoArrowTable, *, backend: IntoBackend[EagerAllowed] ) -> DataFrame[Any]: """Construct a DataFrame from an object which supports the PyCapsule Interface. @@ -537,7 +535,7 @@ def from_dict( data: Mapping[str, Any], schema: IntoSchema | None = None, *, - backend: IntoEagerBackend | None = None, + backend: IntoBackend[EagerAllowed] | None = None, ) -> DataFrame[Any]: """Instantiate DataFrame from dictionary. @@ -596,7 +594,7 @@ def from_numpy( data: _2DArray, schema: IntoSchema | Sequence[str] | None = None, *, - backend: IntoEagerBackend, + backend: IntoBackend[EagerAllowed], ) -> DataFrame[Any]: """Construct a DataFrame from a NumPy ndarray. @@ -716,7 +714,9 @@ def __arrow_c_stream__(self, requested_schema: object | None = None) -> object: pa_table = self.to_arrow() return pa_table.__arrow_c_stream__(requested_schema=requested_schema) # type: ignore[no-untyped-call] - def lazy(self, backend: IntoLazyBackend | None = None) -> LazyFrame[Any]: + def lazy( + self, backend: IntoBackend[Polars | DuckDB | Ibis | Dask] | None = None + ) -> LazyFrame[Any]: """Restrict available API methods to lazy-only ones. If `backend` is specified, then a conversion between different backends @@ -776,9 +776,9 @@ def lazy(self, backend: IntoLazyBackend | None = None) -> LazyFrame[Any]: if backend is None: return self._lazyframe(lazy(None), level="lazy") lazy_backend = Implementation.from_backend(backend) - if can_lazy_into(lazy_backend): + if can_dataframe_lazy(lazy_backend): return self._lazyframe(lazy(lazy_backend), level="lazy") - msg = f"Not-supported backend.\n\nExpected one of {get_args(_CanLazyInto)} or `None`, got {lazy_backend}" + msg = f"Not-supported backend.\n\nExpected one of {get_args(_DataFrameLazyImpl)} or `None`, got {lazy_backend}" raise ValueError(msg) def to_native(self) -> DataFrameT: @@ -2315,7 +2315,7 @@ def __getitem__(self, item: str | slice) -> NoReturn: raise TypeError(msg) def collect( - self, backend: IntoEagerBackend | None = None, **kwargs: Any + self, backend: IntoBackend[Polars | Pandas | Arrow] | None = None, **kwargs: Any ) -> DataFrame[Any]: r"""Materialize this LazyFrame into a DataFrame. @@ -2378,9 +2378,9 @@ def collect( if backend is None: return self._dataframe(collect(None, **kwargs), level="full") eager_backend = Implementation.from_backend(backend) - if can_collect_into(eager_backend): + if can_lazyframe_collect(eager_backend): return self._dataframe(collect(eager_backend, **kwargs), level="full") - msg = f"Unsupported `backend` value.\nExpected one of {get_args(_CanCollectInto)} or None, got: {eager_backend}." + msg = f"Unsupported `backend` value.\nExpected one of {get_args(_LazyFrameCollectImpl)} or None, got: {eager_backend}." raise ValueError(msg) def to_native(self) -> FrameT: diff --git a/narwhals/functions.py b/narwhals/functions.py index a042239f24..0e2e3c430d 100644 --- a/narwhals/functions.py +++ b/narwhals/functions.py @@ -43,13 +43,12 @@ from narwhals._compliant import CompliantExpr, CompliantNamespace from narwhals._translate import IntoArrowTable + from narwhals._typing import Backend, EagerAllowed, IntoBackend from narwhals.dataframe import DataFrame, LazyFrame from narwhals.typing import ( ConcatMethod, FrameT, - IntoBackend, IntoDType, - IntoEagerBackend, IntoExpr, IntoSchema, NativeFrame, @@ -169,7 +168,11 @@ def concat(items: Iterable[FrameT], *, how: ConcatMethod = "vertical") -> FrameT def new_series( - name: str, values: Any, dtype: IntoDType | None = None, *, backend: IntoEagerBackend + name: str, + values: Any, + dtype: IntoDType | None = None, + *, + backend: IntoBackend[EagerAllowed], ) -> Series[Any]: """Instantiate Narwhals Series from iterable (e.g. list or array). @@ -210,7 +213,11 @@ def new_series( def _new_series_impl( - name: str, values: Any, dtype: IntoDType | None = None, *, backend: IntoEagerBackend + name: str, + values: Any, + dtype: IntoDType | None = None, + *, + backend: IntoBackend[EagerAllowed], ) -> Series[Any]: implementation = Implementation.from_backend(backend) if is_eager_allowed(implementation): @@ -240,7 +247,7 @@ def from_dict( data: Mapping[str, Any], schema: IntoSchema | None = None, *, - backend: IntoEagerBackend | None = None, + backend: IntoBackend[EagerAllowed] | None = None, native_namespace: ModuleType | None = None, # noqa: ARG001 ) -> DataFrame[Any]: """Instantiate DataFrame from dictionary. @@ -325,7 +332,7 @@ def from_numpy( data: _2DArray, schema: IntoSchema | Sequence[str] | None = None, *, - backend: IntoEagerBackend, + backend: IntoBackend[EagerAllowed], ) -> DataFrame[Any]: """Construct a DataFrame from a NumPy ndarray. @@ -412,7 +419,7 @@ def _is_into_schema(obj: Any) -> TypeIs[_IntoSchema]: def from_arrow( - native_frame: IntoArrowTable, *, backend: IntoEagerBackend + native_frame: IntoArrowTable, *, backend: IntoBackend[EagerAllowed] ) -> DataFrame[Any]: # pragma: no cover """Construct a DataFrame from an object which supports the PyCapsule Interface. @@ -553,7 +560,9 @@ def show_versions() -> None: print(f"{k:>13}: {stat}") # noqa: T201 -def read_csv(source: str, *, backend: IntoEagerBackend, **kwargs: Any) -> DataFrame[Any]: +def read_csv( + source: str, *, backend: IntoBackend[EagerAllowed], **kwargs: Any +) -> DataFrame[Any]: """Read a CSV file into a DataFrame. Arguments: @@ -621,7 +630,9 @@ def read_csv(source: str, *, backend: IntoEagerBackend, **kwargs: Any) -> DataFr return from_native(native_frame, eager_only=True) -def scan_csv(source: str, *, backend: IntoBackend, **kwargs: Any) -> LazyFrame[Any]: +def scan_csv( + source: str, *, backend: IntoBackend[Backend], **kwargs: Any +) -> LazyFrame[Any]: """Lazily read from a CSV file. For the libraries that do not support lazy dataframes, the function reads @@ -701,7 +712,7 @@ def scan_csv(source: str, *, backend: IntoBackend, **kwargs: Any) -> LazyFrame[A def read_parquet( - source: str, *, backend: IntoEagerBackend, **kwargs: Any + source: str, *, backend: IntoBackend[EagerAllowed], **kwargs: Any ) -> DataFrame[Any]: """Read into a DataFrame from a parquet file. @@ -775,7 +786,9 @@ def read_parquet( return from_native(native_frame, eager_only=True) -def scan_parquet(source: str, *, backend: IntoBackend, **kwargs: Any) -> LazyFrame[Any]: +def scan_parquet( + source: str, *, backend: IntoBackend[Backend], **kwargs: Any +) -> LazyFrame[Any]: """Lazily read from a parquet file. For the libraries that do not support lazy dataframes, the function reads diff --git a/narwhals/series.py b/narwhals/series.py index 7db9891ef3..b9ad3573de 100644 --- a/narwhals/series.py +++ b/narwhals/series.py @@ -36,13 +36,13 @@ from typing_extensions import Self from narwhals._compliant import CompliantSeries + from narwhals._typing import EagerAllowed, IntoBackend from narwhals.dataframe import DataFrame, MultiIndexSelector from narwhals.dtypes import DType from narwhals.typing import ( ClosedInterval, FillNullStrategy, IntoDType, - IntoEagerBackend, NonNestedLiteral, NumericLiteral, RankMethod, @@ -101,7 +101,7 @@ def from_numpy( values: _1DArray, dtype: IntoDType | None = None, *, - backend: IntoEagerBackend, + backend: IntoBackend[EagerAllowed], ) -> Series[Any]: """Construct a Series from a NumPy ndarray. @@ -166,7 +166,7 @@ def from_iterable( values: Iterable[Any], dtype: IntoDType | None = None, *, - backend: IntoEagerBackend, + backend: IntoBackend[EagerAllowed], ) -> Series[Any]: """Construct a Series from an iterable. diff --git a/narwhals/stable/v1/__init__.py b/narwhals/stable/v1/__init__.py index d454a0333c..0e70fe2f9b 100644 --- a/narwhals/stable/v1/__init__.py +++ b/narwhals/stable/v1/__init__.py @@ -69,15 +69,23 @@ from typing_extensions import ParamSpec, Self from narwhals._translate import IntoArrowTable + from narwhals._typing import ( + Arrow, + Backend, + Dask, + DuckDB, + EagerAllowed, + Ibis, + IntoBackend, + Pandas, + Polars, + ) from narwhals.dataframe import MultiColSelector, MultiIndexSelector from narwhals.dtypes import DType from narwhals.typing import ( - IntoBackend, IntoDType, - IntoEagerBackend, IntoExpr, IntoFrame, - IntoLazyBackend, IntoLazyFrameT, IntoSeries, NonNestedLiteral, @@ -110,7 +118,7 @@ def __init__(self, df: Any, *, level: Literal["full", "lazy", "interchange"]) -> @classmethod def from_arrow( - cls, native_frame: IntoArrowTable, *, backend: IntoEagerBackend + cls, native_frame: IntoArrowTable, *, backend: IntoBackend[EagerAllowed] ) -> DataFrame[Any]: result = super().from_arrow(native_frame, backend=backend) return cast("DataFrame[Any]", result) @@ -121,7 +129,7 @@ def from_dict( data: Mapping[str, Any], schema: Mapping[str, DType] | Schema | None = None, *, - backend: IntoEagerBackend | None = None, + backend: IntoBackend[EagerAllowed] | None = None, ) -> DataFrame[Any]: result = super().from_dict(data, schema, backend=backend) return cast("DataFrame[Any]", result) @@ -132,7 +140,7 @@ def from_numpy( data: _2DArray, schema: Mapping[str, DType] | Schema | Sequence[str] | None = None, *, - backend: IntoEagerBackend, + backend: IntoBackend[EagerAllowed], ) -> DataFrame[Any]: result = super().from_numpy(data, schema, backend=backend) return cast("DataFrame[Any]", result) @@ -184,7 +192,9 @@ def get_column(self, name: str) -> Series: # However the return type actually is `nw.v1.stable.Series`, check `tests/v1_test.py`. return super().get_column(name) # type: ignore[return-value] - def lazy(self, backend: IntoLazyBackend | None = None) -> LazyFrame[Any]: + def lazy( + self, backend: IntoBackend[Polars | DuckDB | Ibis | Dask] | None = None + ) -> LazyFrame[Any]: return _stableify(super().lazy(backend=backend)) @overload # type: ignore[override] @@ -252,7 +262,7 @@ def _extract_compliant(self, arg: Any) -> Any: raise InvalidIntoExprError.from_invalid_type(type(arg)) def collect( - self, backend: IntoEagerBackend | None = None, **kwargs: Any + self, backend: IntoBackend[Polars | Pandas | Arrow] | None = None, **kwargs: Any ) -> DataFrame[Any]: return _stableify(super().collect(backend=backend, **kwargs)) @@ -307,7 +317,7 @@ def from_numpy( values: _1DArray, dtype: IntoDType | None = None, *, - backend: IntoEagerBackend, + backend: IntoBackend[EagerAllowed], ) -> Series[Any]: result = super().from_numpy(name, values, dtype, backend=backend) return cast("Series[Any]", result) @@ -319,7 +329,7 @@ def from_iterable( values: Iterable[Any], dtype: IntoDType | None = None, *, - backend: IntoEagerBackend, + backend: IntoBackend[EagerAllowed], ) -> Series[Any]: result = super().from_iterable(name, values, dtype, backend=backend) return cast("Series[Any]", result) @@ -1190,7 +1200,7 @@ def new_series( values: Any, dtype: IntoDType | None = None, *, - backend: IntoEagerBackend | None = None, + backend: IntoBackend[EagerAllowed] | None = None, native_namespace: ModuleType | None = None, # noqa: ARG001 ) -> Series[Any]: """Instantiate Narwhals Series from iterable (e.g. list or array). @@ -1199,7 +1209,7 @@ def new_series( an is the same as `backend` but only accepts module types - for new code, we recommend using `backend`, as that's available beyond just `narwhals.stable.v1`. """ - backend = cast("IntoEagerBackend", backend) + backend = cast("IntoBackend[EagerAllowed]", backend) return _stableify(_new_series_impl(name, values, dtype, backend=backend)) @@ -1207,7 +1217,7 @@ def new_series( def from_arrow( native_frame: IntoArrowTable, *, - backend: IntoEagerBackend | None = None, + backend: IntoBackend[EagerAllowed] | None = None, native_namespace: ModuleType | None = None, # noqa: ARG001 ) -> DataFrame[Any]: """Construct a DataFrame from an object which supports the PyCapsule Interface. @@ -1216,7 +1226,7 @@ def from_arrow( an is the same as `backend` but only accepts module types - for new code, we recommend using `backend`, as that's available beyond just `narwhals.stable.v1`. """ - backend = cast("IntoEagerBackend", backend) + backend = cast("IntoBackend[EagerAllowed]", backend) return _stableify(nw_f.from_arrow(native_frame, backend=backend)) @@ -1225,7 +1235,7 @@ def from_dict( data: Mapping[str, Any], schema: Mapping[str, DType] | Schema | None = None, *, - backend: IntoEagerBackend | None = None, + backend: IntoBackend[EagerAllowed] | None = None, native_namespace: ModuleType | None = None, # noqa: ARG001 ) -> DataFrame[Any]: """Instantiate DataFrame from dictionary. @@ -1242,7 +1252,7 @@ def from_numpy( data: _2DArray, schema: Mapping[str, DType] | Schema | Sequence[str] | None = None, *, - backend: IntoEagerBackend | None = None, + backend: IntoBackend[EagerAllowed] | None = None, native_namespace: ModuleType | None = None, # noqa: ARG001 ) -> DataFrame[Any]: """Construct a DataFrame from a NumPy ndarray. @@ -1251,7 +1261,7 @@ def from_numpy( an is the same as `backend` but only accepts module types - for new code, we recommend using `backend`, as that's available beyond just `narwhals.stable.v1`. """ - backend = cast("IntoEagerBackend", backend) + backend = cast("IntoBackend[EagerAllowed]", backend) return _stableify(nw_f.from_numpy(data, schema, backend=backend)) @@ -1259,7 +1269,7 @@ def from_numpy( def read_csv( source: str, *, - backend: IntoEagerBackend | None = None, + backend: IntoBackend[EagerAllowed] | None = None, native_namespace: ModuleType | None = None, # noqa: ARG001 **kwargs: Any, ) -> DataFrame[Any]: @@ -1269,7 +1279,7 @@ def read_csv( an is the same as `backend` but only accepts module types - for new code, we recommend using `backend`, as that's available beyond just `narwhals.stable.v1`. """ - backend = cast("IntoEagerBackend", backend) + backend = cast("IntoBackend[EagerAllowed]", backend) return _stableify(nw_f.read_csv(source, backend=backend, **kwargs)) @@ -1277,7 +1287,7 @@ def read_csv( def scan_csv( source: str, *, - backend: IntoBackend | None = None, + backend: IntoBackend[Backend] | None = None, native_namespace: ModuleType | None = None, # noqa: ARG001 **kwargs: Any, ) -> LazyFrame[Any]: @@ -1287,7 +1297,7 @@ def scan_csv( an is the same as `backend` but only accepts module types - for new code, we recommend using `backend`, as that's available beyond just `narwhals.stable.v1`. """ - backend = cast("IntoBackend", backend) + backend = cast("IntoBackend[Backend]", backend) return _stableify(nw_f.scan_csv(source, backend=backend, **kwargs)) @@ -1295,7 +1305,7 @@ def scan_csv( def read_parquet( source: str, *, - backend: IntoEagerBackend | None = None, + backend: IntoBackend[EagerAllowed] | None = None, native_namespace: ModuleType | None = None, # noqa: ARG001 **kwargs: Any, ) -> DataFrame[Any]: @@ -1305,7 +1315,7 @@ def read_parquet( an is the same as `backend` but only accepts module types - for new code, we recommend using `backend`, as that's available beyond just `narwhals.stable.v1`. """ - backend = cast("IntoEagerBackend", backend) + backend = cast("IntoBackend[EagerAllowed]", backend) return _stableify(nw_f.read_parquet(source, backend=backend, **kwargs)) @@ -1313,7 +1323,7 @@ def read_parquet( def scan_parquet( source: str, *, - backend: IntoBackend | None = None, + backend: IntoBackend[Backend] | None = None, native_namespace: ModuleType | None = None, # noqa: ARG001 **kwargs: Any, ) -> LazyFrame[Any]: @@ -1323,7 +1333,7 @@ def scan_parquet( an is the same as `backend` but only accepts module types - for new code, we recommend using `backend`, as that's available beyond just `narwhals.stable.v1`. """ - backend = cast("IntoBackend", backend) + backend = cast("IntoBackend[Backend]", backend) return _stableify(nw_f.scan_parquet(source, backend=backend, **kwargs)) diff --git a/narwhals/stable/v2/__init__.py b/narwhals/stable/v2/__init__.py index 9659c7076a..e0bd6ffa2b 100644 --- a/narwhals/stable/v2/__init__.py +++ b/narwhals/stable/v2/__init__.py @@ -64,15 +64,23 @@ from typing_extensions import ParamSpec, Self from narwhals._translate import IntoArrowTable + from narwhals._typing import ( + Arrow, + Backend, + Dask, + DuckDB, + EagerAllowed, + Ibis, + IntoBackend, + Pandas, + Polars, + ) from narwhals.dataframe import MultiColSelector, MultiIndexSelector from narwhals.dtypes import DType from narwhals.typing import ( - IntoBackend, IntoDType, - IntoEagerBackend, IntoExpr, IntoFrame, - IntoLazyBackend, IntoSeries, NonNestedLiteral, SingleColSelector, @@ -147,7 +155,9 @@ def get_column(self, name: str) -> Series: # However the return type actually is `nw.v2.stable.Series`, check `tests/v2_test.py`. return super().get_column(name) # type: ignore[return-value] - def lazy(self, backend: IntoLazyBackend | None = None) -> LazyFrame[Any]: + def lazy( + self, backend: IntoBackend[Polars | DuckDB | Ibis | Dask] | None = None + ) -> LazyFrame[Any]: return _stableify(super().lazy(backend=backend)) @overload # type: ignore[override] @@ -183,7 +193,7 @@ def _dataframe(self) -> type[DataFrame[Any]]: return DataFrame def collect( - self, backend: IntoEagerBackend | None = None, **kwargs: Any + self, backend: IntoBackend[Polars | Pandas | Arrow] | None = None, **kwargs: Any ) -> DataFrame[Any]: return _stableify(super().collect(backend=backend, **kwargs)) @@ -214,7 +224,7 @@ def from_iterable( values: Iterable[Any], dtype: IntoDType | None = None, *, - backend: IntoEagerBackend, + backend: IntoBackend[EagerAllowed], ) -> Series[Any]: result = super().from_iterable(name, values, dtype, backend=backend) return cast("Series[Any]", result) @@ -953,7 +963,11 @@ def when(*predicates: IntoExpr | Iterable[IntoExpr]) -> When: def new_series( - name: str, values: Any, dtype: IntoDType | None = None, *, backend: IntoEagerBackend + name: str, + values: Any, + dtype: IntoDType | None = None, + *, + backend: IntoBackend[EagerAllowed], ) -> Series[Any]: """Instantiate Narwhals Series from iterable (e.g. list or array). @@ -978,7 +992,7 @@ def new_series( def from_arrow( - native_frame: IntoArrowTable, *, backend: IntoEagerBackend + native_frame: IntoArrowTable, *, backend: IntoBackend[EagerAllowed] ) -> DataFrame[Any]: """Construct a DataFrame from an object which supports the PyCapsule Interface. @@ -1003,7 +1017,7 @@ def from_dict( data: Mapping[str, Any], schema: Mapping[str, DType] | Schema | None = None, *, - backend: IntoEagerBackend | None = None, + backend: IntoBackend[EagerAllowed] | None = None, ) -> DataFrame[Any]: """Instantiate DataFrame from dictionary. @@ -1038,7 +1052,7 @@ def from_numpy( data: _2DArray, schema: Mapping[str, DType] | Schema | Sequence[str] | None = None, *, - backend: IntoEagerBackend, + backend: IntoBackend[EagerAllowed], ) -> DataFrame[Any]: """Construct a DataFrame from a NumPy ndarray. @@ -1066,7 +1080,9 @@ def from_numpy( return _stableify(nw_f.from_numpy(data, schema, backend=backend)) -def read_csv(source: str, *, backend: IntoEagerBackend, **kwargs: Any) -> DataFrame[Any]: +def read_csv( + source: str, *, backend: IntoBackend[EagerAllowed], **kwargs: Any +) -> DataFrame[Any]: """Read a CSV file into a DataFrame. Arguments: @@ -1088,7 +1104,9 @@ def read_csv(source: str, *, backend: IntoEagerBackend, **kwargs: Any) -> DataFr return _stableify(nw_f.read_csv(source, backend=backend, **kwargs)) -def scan_csv(source: str, *, backend: IntoBackend, **kwargs: Any) -> LazyFrame[Any]: +def scan_csv( + source: str, *, backend: IntoBackend[Backend], **kwargs: Any +) -> LazyFrame[Any]: """Lazily read from a CSV file. For the libraries that do not support lazy dataframes, the function reads @@ -1114,7 +1132,7 @@ def scan_csv(source: str, *, backend: IntoBackend, **kwargs: Any) -> LazyFrame[A def read_parquet( - source: str, *, backend: IntoEagerBackend, **kwargs: Any + source: str, *, backend: IntoBackend[EagerAllowed], **kwargs: Any ) -> DataFrame[Any]: """Read into a DataFrame from a parquet file. @@ -1137,7 +1155,9 @@ def read_parquet( return _stableify(nw_f.read_parquet(source, backend=backend, **kwargs)) -def scan_parquet(source: str, *, backend: IntoBackend, **kwargs: Any) -> LazyFrame[Any]: +def scan_parquet( + source: str, *, backend: IntoBackend[Backend], **kwargs: Any +) -> LazyFrame[Any]: """Lazily read from a parquet file. For the libraries that do not support lazy dataframes, the function reads diff --git a/narwhals/typing.py b/narwhals/typing.py index 68ec2aef96..8014e2d09e 100644 --- a/narwhals/typing.py +++ b/narwhals/typing.py @@ -3,6 +3,7 @@ from typing import TYPE_CHECKING, Any, Literal, Protocol, TypeVar, Union from narwhals._compliant import CompliantDataFrame, CompliantLazyFrame, CompliantSeries +from narwhals._typing import IntoBackend as IntoBackend # noqa: PLC0414 if TYPE_CHECKING: import datetime as dt @@ -14,7 +15,6 @@ from typing_extensions import TypeAlias from narwhals import dtypes - from narwhals._utils import Implementation from narwhals.dataframe import DataFrame, LazyFrame from narwhals.expr import Expr from narwhals.schema import Schema @@ -100,73 +100,6 @@ def Time(self) -> type[dtypes.Time]: ... @property def Binary(self) -> type[dtypes.Binary]: ... - _Polars: TypeAlias = Literal["polars"] - _Arrow: TypeAlias = Literal["pyarrow"] - _Dask: TypeAlias = Literal["dask"] - _DuckDB: TypeAlias = Literal["duckdb"] - _Pandas: TypeAlias = Literal["pandas"] - _Modin: TypeAlias = Literal["modin"] - _Cudf: TypeAlias = Literal["cudf"] - _PySpark: TypeAlias = Literal["pyspark"] - _SQLFrame: TypeAlias = Literal["sqlframe"] - _PySparkConnect: TypeAlias = Literal["pyspark[connect]"] - _Ibis: TypeAlias = Literal["ibis"] - _PandasLike: TypeAlias = "_Pandas | _Cudf | _Modin" - _SparkLike: TypeAlias = "_PySpark | _SQLFrame | _PySparkConnect" - - _EagerOnly: TypeAlias = "_PandasLike | _Arrow" - _EagerAllowed: TypeAlias = "_Polars | _EagerOnly" - _LazyOnly: TypeAlias = "_SparkLike | _Dask | _DuckDB | _Ibis" - _LazyAllowed: TypeAlias = "_Polars | _LazyOnly" - - Pandas: TypeAlias = Literal[_Pandas, Implementation.PANDAS] - Cudf: TypeAlias = Literal[_Cudf, Implementation.CUDF] - Modin: TypeAlias = Literal[_Modin, Implementation.MODIN] - PySpark: TypeAlias = Literal[_PySpark, Implementation.PYSPARK] - SQLFrame: TypeAlias = Literal[_SQLFrame, Implementation.SQLFRAME] - PySparkConnect: TypeAlias = Literal[_PySparkConnect, Implementation.PYSPARK_CONNECT] - Polars: TypeAlias = Literal[_Polars, Implementation.POLARS] - Arrow: TypeAlias = Literal[_Arrow, Implementation.PYARROW] - Dask: TypeAlias = Literal[_Dask, Implementation.DASK] - DuckDB: TypeAlias = Literal[_DuckDB, Implementation.DUCKDB] - Ibis: TypeAlias = Literal[_Ibis, Implementation.IBIS] - - PandasLike: TypeAlias = Literal[ - _PandasLike, Implementation.PANDAS, Implementation.CUDF, Implementation.MODIN - ] - SparkLike: TypeAlias = Literal[ - _SparkLike, - Implementation.PYSPARK, - Implementation.SQLFRAME, - Implementation.PYSPARK_CONNECT, - ] - EagerOnly: TypeAlias = "PandasLike | Arrow" - EagerAllowed: TypeAlias = "EagerOnly | Polars" - LazyOnly: TypeAlias = "SparkLike | Dask | DuckDB | Ibis" - LazyAllowed: TypeAlias = "LazyOnly | Polars" - - EagerBackendName: TypeAlias = _EagerAllowed - EagerImplementation: TypeAlias = Literal[ - Implementation.CUDF, - Implementation.MODIN, - Implementation.PANDAS, - Implementation.POLARS, - Implementation.PYARROW, - ] - - LazyBackendName: TypeAlias = _LazyAllowed - LazyImplementation: TypeAlias = Literal[ - Implementation.DASK, - Implementation.DUCKDB, - Implementation.IBIS, - Implementation.POLARS, - Implementation.PYSPARK, - Implementation.PYSPARK_CONNECT, - Implementation.SQLFRAME, - ] - - BackendName: TypeAlias = "_EagerAllowed | _LazyAllowed" - IntoExpr: TypeAlias = Union["Expr", str, "Series[Any]"] """Anything which can be converted to an expression. @@ -459,117 +392,6 @@ def Binary(self) -> type[dtypes.Binary]: ... └──────────────────┘ """ -IntoEagerBackend: TypeAlias = "EagerBackendName | EagerImplementation | ModuleType" -"""Anything that can be converted into a Narwhals Implementation of an eager backend. - -It can be specified as: - -- a string (backend name): `"polars"`, `"pandas"`, `"pyarrow"`, `"modin"`, and `"cudf"`. -- an Implementation: `Implementation.POLARS`, `Implementation.PANDAS`, `Implementation.PYARROW`, etc.. -- a python module: `polars`, `pandas`, `pyarrow`, `modin`, and `cudf` - -Examples: - >>> import numpy as np - >>> import polars as pl - >>> import narwhals as nw - >>> - >>> arr = np.array([[5, 2, 1], [1, 4, 3]]) - >>> schema = {"c": nw.Int16(), "d": nw.Float32(), "e": nw.Int8()} - >>> nw.DataFrame.from_numpy(arr, schema=schema, backend="polars") - ┌───────────────────┐ - |Narwhals DataFrame | - |-------------------| - |shape: (2, 3) | - |┌─────┬─────┬─────┐| - |│ c ┆ d ┆ e │| - |│ --- ┆ --- ┆ --- │| - |│ i16 ┆ f32 ┆ i8 │| - |╞═════╪═════╪═════╡| - |│ 5 ┆ 2.0 ┆ 1 │| - |│ 1 ┆ 4.0 ┆ 3 │| - |└─────┴─────┴─────┘| - └───────────────────┘ -""" - -IntoLazyBackend: TypeAlias = "LazyBackendName | LazyImplementation | ModuleType" -"""Anything that can be converted into a Narwhals Implementation of an lazy backend. - -It can be specified as: - -- a string (backend name): `"dask"`, `"duckdb"`, `"ibis"`, `"polars"`, `"pyspark"`, etc... -- an Implementation: `Implementation.DUCKDB`, `Implementation.SQLFRAME`, etc.. -- a python module: `polars`, `pyspark.sql.connect`, `ibis`, `dask.dataframe`, etc... - - Examples: - >>> import polars as pl - >>> import narwhals as nw - >>> df_native = pl.DataFrame({"a": [1, 2], "b": [4, 6]}) - >>> df = nw.from_native(df_native) - - If we call `df.lazy`, we get a `narwhals.LazyFrame` backed by a Polars - LazyFrame. - - >>> df.lazy() # doctest: +SKIP - ┌─────────────────────────────┐ - | Narwhals LazyFrame | - |-----------------------------| - || - └─────────────────────────────┘ - - We can also pass DuckDB as the backend, and then we'll get a - `narwhals.LazyFrame` backed by a `duckdb.DuckDBPyRelation`. - - >>> df.lazy(backend=nw.Implementation.DUCKDB) - ┌──────────────────┐ - |Narwhals LazyFrame| - |------------------| - |┌───────┬───────┐ | - |│ a │ b │ | - |│ int64 │ int64 │ | - |├───────┼───────┤ | - |│ 1 │ 4 │ | - |│ 2 │ 6 │ | - |└───────┴───────┘ | - └──────────────────┘ -""" - - -IntoBackend: TypeAlias = "BackendName | Implementation | ModuleType" -"""Anything that can be converted into a Narwhals Implementation. - -It can be specified as: - -- a string (backend name), such as: `"pandas"`, `"pyarrow"`, `"modin"`, `"cudf"`, etc.. -- an Implementation, such as: `Implementation.POLARS`, `Implementation.DUCKDB`, `Implementation.PYSPARK`, etc.. -- a python module, such as `dask`, `ibis`, `sqlframe`, etc.. - -Examples: - >>> import dask.dataframe as dd - >>> from sqlframe.duckdb import DuckDBSession - >>> import narwhals as nw - >>> - >>> nw.scan_parquet("file.parquet", backend="dask").collect() # doctest:+SKIP - ┌──────────────────┐ - |Narwhals DataFrame| - |------------------| - | a b | - | 0 1 4 | - | 1 2 5 | - └──────────────────┘ - >>> nw.scan_parquet( - ... "file.parquet", backend=Implementation.SQLFRAME, session=DuckDBSession() - ... ).collect() # doctest:+SKIP - ┌──────────────────┐ - |Narwhals DataFrame| - |------------------| - | pyarrow.Table | - | a: int64 | - | b: int64 | - | ---- | - | a: [[1,2]] | - | b: [[4,5]] | - └──────────────────┘ -""" # TODO @dangotbanned: fix this? # Constructor allows tuples, but we don't support that *everywhere* yet diff --git a/tests/conftest.py b/tests/conftest.py index 0690c4c4e0..5f33b47eac 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -25,7 +25,8 @@ from typing_extensions import TypeAlias from narwhals._spark_like.dataframe import SQLFrameDataFrame - from narwhals.typing import EagerAllowed, NativeFrame, NativeLazyFrame + from narwhals._typing import EagerAllowed + from narwhals.typing import NativeFrame, NativeLazyFrame from tests.utils import Constructor, ConstructorEager, ConstructorLazy Data: TypeAlias = "dict[str, list[Any]]" diff --git a/tests/frame/collect_test.py b/tests/frame/collect_test.py index 9b25121999..3cc0973d19 100644 --- a/tests/frame/collect_test.py +++ b/tests/frame/collect_test.py @@ -1,6 +1,6 @@ from __future__ import annotations -from typing import TYPE_CHECKING, Any +from typing import TYPE_CHECKING, Any, Literal import pytest @@ -10,9 +10,7 @@ from tests.utils import POLARS_VERSION, Constructor, assert_equal_data if TYPE_CHECKING: - from types import ModuleType - - from narwhals.typing import IntoEagerBackend + from narwhals._typing import Arrow, Dask, IntoBackend, Modin, Pandas, Polars data = {"a": [1, 2], "b": [3, 4]} @@ -53,7 +51,7 @@ def test_collect_to_default_backend(constructor: Constructor) -> None: ) @pytest.mark.parametrize("backend", ["pandas", Implementation.PANDAS]) def test_collect_to_valid_backend_pandas( - constructor: Constructor, backend: IntoEagerBackend + constructor: Constructor, backend: Pandas ) -> None: pytest.importorskip("pandas") import pandas as pd @@ -68,7 +66,7 @@ def test_collect_to_valid_backend_pandas( ) @pytest.mark.parametrize("backend", ["polars", Implementation.POLARS]) def test_collect_to_valid_backend_polars( - constructor: Constructor, backend: IntoEagerBackend + constructor: Constructor, backend: Polars ) -> None: pytest.importorskip("polars") import polars as pl @@ -83,7 +81,7 @@ def test_collect_to_valid_backend_polars( ) @pytest.mark.parametrize("backend", ["pyarrow", Implementation.PYARROW]) def test_collect_to_valid_backend_pyarrow( - constructor: Constructor, backend: IntoEagerBackend + constructor: Constructor, backend: Arrow ) -> None: pytest.importorskip("pyarrow") import pyarrow as pa @@ -133,12 +131,12 @@ def test_collect_to_valid_backend_pyarrow_mod(constructor: Constructor) -> None: "backend", ["foo", Implementation.DASK, Implementation.MODIN, pytest] ) def test_collect_to_invalid_backend( - constructor: Constructor, backend: ModuleType | IntoEagerBackend + constructor: Constructor, backend: Literal["foo"] | IntoBackend[Modin | Dask] ) -> None: df = nw.from_native(constructor(data)) with pytest.raises(ValueError, match="Unsupported `backend` value"): - df.lazy().collect(backend=backend).to_native() + df.lazy().collect(backend=backend).to_native() # type: ignore[arg-type] def test_collect_with_kwargs(constructor: Constructor) -> None: diff --git a/tests/frame/from_arrow_test.py b/tests/frame/from_arrow_test.py index d4dda02f9f..4ec17836a7 100644 --- a/tests/frame/from_arrow_test.py +++ b/tests/frame/from_arrow_test.py @@ -13,7 +13,7 @@ from tests.utils import PYARROW_VERSION, assert_equal_data if TYPE_CHECKING: - from narwhals.typing import EagerAllowed + from narwhals._typing import EagerAllowed @pytest.fixture diff --git a/tests/frame/from_dict_test.py b/tests/frame/from_dict_test.py index 2f27c8ea68..2ff336642b 100644 --- a/tests/frame/from_dict_test.py +++ b/tests/frame/from_dict_test.py @@ -9,7 +9,7 @@ from tests.utils import Constructor, assert_equal_data if TYPE_CHECKING: - from narwhals.typing import EagerAllowed + from narwhals._typing import EagerAllowed, Polars def test_from_dict(eager_backend: EagerAllowed) -> None: @@ -28,9 +28,7 @@ def test_from_dict_schema(eager_backend: EagerAllowed) -> None: @pytest.mark.parametrize("backend", [Implementation.POLARS, "polars"]) -def test_from_dict_without_backend( - constructor: Constructor, backend: EagerAllowed -) -> None: +def test_from_dict_without_backend(constructor: Constructor, backend: Polars) -> None: pytest.importorskip("polars") df = ( @@ -56,7 +54,7 @@ def test_from_dict_with_backend_invalid() -> None: @pytest.mark.parametrize("backend", [Implementation.POLARS, "polars"]) def test_from_dict_one_native_one_narwhals( - constructor: Constructor, backend: EagerAllowed + constructor: Constructor, backend: Polars ) -> None: pytest.importorskip("polars") diff --git a/tests/frame/from_numpy_test.py b/tests/frame/from_numpy_test.py index abcb7a92dd..d586912d2e 100644 --- a/tests/frame/from_numpy_test.py +++ b/tests/frame/from_numpy_test.py @@ -11,7 +11,8 @@ from tests.utils import assert_equal_data if TYPE_CHECKING: - from narwhals.typing import EagerAllowed, _2DArray + from narwhals._typing import EagerAllowed + from narwhals.typing import _2DArray arr: _2DArray = cast("_2DArray", np.array([[5, 2, 0, 1], [1, 4, 7, 8], [1, 2, 3, 9]])) diff --git a/tests/frame/lazy_test.py b/tests/frame/lazy_test.py index 0a4546cd94..46eff6dfef 100644 --- a/tests/frame/lazy_test.py +++ b/tests/frame/lazy_test.py @@ -9,7 +9,7 @@ from narwhals.dependencies import get_cudf, get_modin if TYPE_CHECKING: - from narwhals.typing import IntoLazyBackend + from narwhals._typing import Dask, DuckDB, Ibis, Polars from tests.utils import ConstructorEager @@ -61,7 +61,7 @@ def test_lazy_to_default(constructor_eager: ConstructorEager) -> None: ], ) def test_lazy_backend( - constructor_eager: ConstructorEager, backend: IntoLazyBackend + constructor_eager: ConstructorEager, backend: Polars | DuckDB | Ibis | Dask ) -> None: implementation = Implementation.from_backend(backend) pytest.importorskip(implementation.name.lower()) @@ -75,3 +75,5 @@ def test_lazy_backend_invalid(constructor_eager: ConstructorEager) -> None: df = nw.from_native(constructor_eager(data), eager_only=True) with pytest.raises(ValueError, match="Not-supported backend"): df.lazy(backend=Implementation.PANDAS) # type: ignore[arg-type] + with pytest.raises(ValueError, match="Not-supported backend"): + df.lazy(backend="pyspark") # type: ignore[arg-type] diff --git a/tests/from_dict_test.py b/tests/from_dict_test.py index 24a169477f..3a201f757f 100644 --- a/tests/from_dict_test.py +++ b/tests/from_dict_test.py @@ -11,9 +11,9 @@ from tests.utils import Constructor, assert_equal_data if TYPE_CHECKING: - from narwhals.typing import IntoEagerBackend + from narwhals._typing import Arrow, Pandas, Polars -TEST_EAGER_BACKENDS: list[IntoEagerBackend] = [] +TEST_EAGER_BACKENDS: list[Polars | Pandas | Arrow] = [] TEST_EAGER_BACKENDS.extend( (Implementation.POLARS, "polars") if find_spec("polars") is not None else () ) @@ -26,7 +26,7 @@ @pytest.mark.parametrize("backend", TEST_EAGER_BACKENDS) -def test_from_dict(backend: IntoEagerBackend) -> None: +def test_from_dict(backend: Polars | Pandas | Arrow) -> None: result = nw.from_dict({"c": [1, 2], "d": [5, 6]}, backend=backend) expected = {"c": [1, 2], "d": [5, 6]} assert_equal_data(result, expected) @@ -34,7 +34,7 @@ def test_from_dict(backend: IntoEagerBackend) -> None: @pytest.mark.parametrize("backend", TEST_EAGER_BACKENDS) -def test_from_dict_schema(backend: IntoEagerBackend) -> None: +def test_from_dict_schema(backend: Polars | Pandas | Arrow) -> None: schema = {"c": nw.Int16(), "d": nw.Float32()} result = nw.from_dict({"c": [1, 2], "d": [5, 6]}, backend=backend, schema=schema) assert result.collect_schema() == schema @@ -48,9 +48,7 @@ def test_from_dict_schema(backend: IntoEagerBackend) -> None: @pytest.mark.parametrize("backend", [Implementation.POLARS, "polars"]) -def test_from_dict_without_backend( - constructor: Constructor, backend: IntoEagerBackend -) -> None: +def test_from_dict_without_backend(constructor: Constructor, backend: Polars) -> None: pytest.importorskip("polars") df = ( @@ -76,7 +74,7 @@ def test_from_dict_with_backend_invalid() -> None: @pytest.mark.parametrize("backend", [Implementation.POLARS, "polars"]) def test_from_dict_one_native_one_narwhals( - constructor: Constructor, backend: IntoEagerBackend + constructor: Constructor, backend: Polars ) -> None: pytest.importorskip("polars") @@ -91,13 +89,13 @@ def test_from_dict_one_native_one_narwhals( @pytest.mark.parametrize("backend", TEST_EAGER_BACKENDS) -def test_from_dict_empty(backend: IntoEagerBackend) -> None: +def test_from_dict_empty(backend: Polars | Pandas | Arrow) -> None: result = nw.from_dict({}, backend=backend) assert result.shape == (0, 0) @pytest.mark.parametrize("backend", TEST_EAGER_BACKENDS) -def test_from_dict_empty_with_schema(backend: IntoEagerBackend) -> None: +def test_from_dict_empty_with_schema(backend: Polars | Pandas | Arrow) -> None: schema = nw.Schema({"a": nw.String(), "b": nw.Int8()}) result = nw.from_dict({}, schema, backend=backend) assert result.schema == schema diff --git a/tests/namespace_test.py b/tests/namespace_test.py index 25a7d45dc1..b1eae1df66 100644 --- a/tests/namespace_test.py +++ b/tests/namespace_test.py @@ -20,7 +20,8 @@ from narwhals._pandas_like.expr import PandasLikeExpr from narwhals._pandas_like.namespace import PandasLikeNamespace # noqa: F401 from narwhals._polars.namespace import PolarsNamespace # noqa: F401 - from narwhals.typing import BackendName, _2DArray, _EagerAllowed + from narwhals._typing import BackendName, _EagerAllowed + from narwhals.typing import _2DArray from tests.utils import Constructor ExprT = TypeVar("ExprT", bound="CompliantExprAny") diff --git a/tests/read_scan_test.py b/tests/read_scan_test.py index 869c4b7296..2c243314e0 100644 --- a/tests/read_scan_test.py +++ b/tests/read_scan_test.py @@ -16,10 +16,10 @@ if TYPE_CHECKING: from collections.abc import Mapping - from narwhals.typing import IntoEagerBackend, _LazyOnly + from narwhals._typing import Arrow, Pandas, Polars, _LazyOnly data: Mapping[str, Any] = {"a": [1, 2, 3], "b": [4.5, 6.7, 8.9], "z": ["x", "y", "w"]} -TEST_EAGER_BACKENDS = [ +TEST_EAGER_BACKENDS: list[Polars | Pandas | Arrow] = [ Implementation.POLARS, Implementation.PANDAS, Implementation.PYARROW, @@ -30,7 +30,9 @@ @pytest.mark.parametrize("backend", TEST_EAGER_BACKENDS) -def test_read_csv(tmpdir: pytest.TempdirFactory, backend: IntoEagerBackend) -> None: +def test_read_csv( + tmpdir: pytest.TempdirFactory, backend: Polars | Pandas | Arrow +) -> None: df_pl = pl.DataFrame(data) filepath = str(tmpdir / "file.csv") # type: ignore[operator] df_pl.write_csv(filepath) diff --git a/tests/series_only/from_iterable_test.py b/tests/series_only/from_iterable_test.py index 3ea824e10f..649ad368bd 100644 --- a/tests/series_only/from_iterable_test.py +++ b/tests/series_only/from_iterable_test.py @@ -23,7 +23,8 @@ from typing_extensions import TypeAlias - from narwhals.typing import EagerAllowed, IntoDType + from narwhals._typing import EagerAllowed + from narwhals.typing import IntoDType IntoIterable: TypeAlias = Callable[..., Iterable[Any]] diff --git a/tests/series_only/from_numpy_test.py b/tests/series_only/from_numpy_test.py index f6b1fa5f17..9696f742c6 100644 --- a/tests/series_only/from_numpy_test.py +++ b/tests/series_only/from_numpy_test.py @@ -14,8 +14,9 @@ if TYPE_CHECKING: from collections.abc import Sequence + from narwhals._typing import EagerAllowed from narwhals.dtypes import NestedType - from narwhals.typing import EagerAllowed, IntoDType, _1DArray + from narwhals.typing import IntoDType, _1DArray arr: _1DArray = cast("_1DArray", np.array([5, 2, 0, 1])) diff --git a/tests/v1_test.py b/tests/v1_test.py index 7fb174fc35..a0c054a20b 100644 --- a/tests/v1_test.py +++ b/tests/v1_test.py @@ -46,8 +46,9 @@ from typing_extensions import assert_type + from narwhals._typing import EagerAllowed from narwhals.stable.v1.typing import IntoDataFrameT - from narwhals.typing import EagerAllowed, IntoDType, _1DArray, _2DArray + from narwhals.typing import IntoDType, _1DArray, _2DArray from tests.utils import Constructor, ConstructorEager diff --git a/tests/v2_test.py b/tests/v2_test.py index a69ed03b6b..4e164fc731 100644 --- a/tests/v2_test.py +++ b/tests/v2_test.py @@ -24,8 +24,9 @@ from typing_extensions import assert_type + from narwhals._typing import EagerAllowed from narwhals.stable.v2.typing import IntoDataFrameT - from narwhals.typing import EagerAllowed, IntoDType + from narwhals.typing import IntoDType def test_toplevel() -> None: