diff --git a/narwhals/_arrow/dataframe.py b/narwhals/_arrow/dataframe.py index e85abbabc7..83e7104e1f 100644 --- a/narwhals/_arrow/dataframe.py +++ b/narwhals/_arrow/dataframe.py @@ -52,6 +52,8 @@ from narwhals._arrow.typing import Mask # type: ignore[attr-defined] from narwhals._arrow.typing import Order # type: ignore[attr-defined] from narwhals.dtypes import DType + from narwhals.typing import CompliantDataFrame + from narwhals.typing import CompliantLazyFrame from narwhals.typing import SizeUnit from narwhals.typing import _1DArray from narwhals.typing import _2DArray @@ -69,11 +71,8 @@ ] PromoteOptions: TypeAlias = Literal["none", "default", "permissive"] -from narwhals.typing import CompliantDataFrame -from narwhals.typing import CompliantLazyFrame - -class ArrowDataFrame(EagerDataFrame["ArrowSeries", "ArrowExpr"], CompliantLazyFrame): +class ArrowDataFrame(EagerDataFrame["ArrowSeries", "ArrowExpr", "pa.Table"]): # --- not in the spec --- def __init__( self: Self, @@ -349,6 +348,8 @@ def estimated_size(self: Self, unit: SizeUnit) -> int | float: sz = self._native_frame.nbytes return scale_bytes(sz, unit) + explode = not_implemented() + @property def columns(self: Self) -> list[str]: return self._native_frame.schema.names @@ -573,7 +574,9 @@ def tail(self: Self, n: int) -> Self: else: return self._from_native_frame(df.slice(abs(n)), validate_column_names=False) - def lazy(self: Self, *, backend: Implementation | None = None) -> CompliantLazyFrame: + def lazy( + self: Self, *, backend: Implementation | None = None + ) -> CompliantLazyFrame[Any, Any]: from narwhals.utils import parse_version if backend is None: diff --git a/narwhals/_compliant/dataframe.py b/narwhals/_compliant/dataframe.py index b64c138834..e23ddc0ab0 100644 --- a/narwhals/_compliant/dataframe.py +++ b/narwhals/_compliant/dataframe.py @@ -16,7 +16,11 @@ from narwhals._compliant.typing import CompliantSeriesT from narwhals._compliant.typing import EagerExprT_contra from narwhals._compliant.typing import EagerSeriesT +from narwhals._compliant.typing import NativeFrameT_co from narwhals._expression_parsing import evaluate_output_names_and_aliases +from narwhals.utils import Version +from narwhals.utils import _StoresNative +from narwhals.utils import deprecated if TYPE_CHECKING: from io import BytesIO @@ -70,6 +74,7 @@ def collect_schema(self) -> Mapping[str, DType]: ... def drop(self, columns: Sequence[str], *, strict: bool) -> Self: ... def drop_nulls(self, subset: Sequence[str] | None) -> Self: ... def estimated_size(self, unit: SizeUnit) -> int | float: ... + def explode(self: Self, columns: Sequence[str]) -> Self: ... def filter(self, predicate: CompliantExprT_contra | Incomplete) -> Self: ... def gather_every(self, n: int, offset: int) -> Self: ... def get_column(self, name: str) -> CompliantSeriesT: ... @@ -101,7 +106,7 @@ def join_asof( strategy: Literal["backward", "forward", "nearest"], suffix: str, ) -> Self: ... - def lazy(self, *, backend: Implementation | None) -> CompliantLazyFrame: ... + def lazy(self, *, backend: Implementation | None) -> CompliantLazyFrame[Any, Any]: ... def rename(self, mapping: Mapping[str, str]) -> Self: ... def row(self, index: int) -> tuple[Any, ...]: ... def rows( @@ -136,7 +141,7 @@ def unique( subset: Sequence[str] | None, *, keep: Literal["any", "first", "last", "none"], - maintain_order: bool | None, + maintain_order: bool | None = None, ) -> Self: ... def unpivot( self, @@ -155,26 +160,101 @@ def write_csv(self, file: str | Path | BytesIO | None) -> str | None: ... def write_parquet(self, file: str | Path | BytesIO) -> None: ... -class CompliantLazyFrame(Protocol): +class CompliantLazyFrame( + _StoresNative[NativeFrameT_co], Protocol[CompliantExprT_contra, NativeFrameT_co] +): + _native_frame: Any + _implementation: Implementation + _backend_version: tuple[int, ...] + _version: Version + def __narwhals_lazyframe__(self) -> Self: ... def __narwhals_namespace__(self) -> Any: ... - def simple_select( - self, *column_names: str - ) -> Self: ... # `select` where all args are column names. - def aggregate(self, *exprs: Any) -> Self: # pragma: no cover - ... # `select` where all args are aggregations or literals - # (so, no broadcasting is necessary). + + def simple_select(self, *column_names: str) -> Self: + """`select` where all args are column names.""" + ... + + def aggregate(self, *exprs: CompliantExprT_contra) -> Self: + """`select` where all args are aggregations or literals. + + (so, no broadcasting is necessary). + """ + ... + + def _change_version(self, version: Version) -> Self: ... + + @property + def native(self) -> NativeFrameT_co: + return self._native_frame # type: ignore[no-any-return] @property def columns(self) -> Sequence[str]: ... @property def schema(self) -> Mapping[str, DType]: ... def _iter_columns(self) -> Iterator[Any]: ... + def collect( + self, backend: Implementation | None, **kwargs: Any + ) -> CompliantDataFrame[Any, Any]: ... + def collect_schema(self) -> Mapping[str, DType]: ... + def drop(self, columns: Sequence[str], *, strict: bool) -> Self: ... + def drop_nulls(self, subset: Sequence[str] | None) -> Self: ... + def explode(self: Self, columns: Sequence[str]) -> Self: ... + def filter(self, predicate: CompliantExprT_contra | Incomplete) -> Self: ... + @deprecated( + "`LazyFrame.gather_every` is deprecated and will be removed in a future version." + ) + def gather_every(self, n: int, offset: int) -> Self: ... + def group_by(self, *keys: str, drop_null_keys: bool) -> Incomplete: ... + def head(self, n: int) -> Self: ... + def join( + self: Self, + other: Self, + *, + how: Literal["left", "inner", "cross", "anti", "semi"], + left_on: Sequence[str] | None, + right_on: Sequence[str] | None, + suffix: str, + ) -> Self: ... + def join_asof( + self: Self, + other: Self, + *, + left_on: str | None, + right_on: str | None, + by_left: Sequence[str] | None, + by_right: Sequence[str] | None, + strategy: Literal["backward", "forward", "nearest"], + suffix: str, + ) -> Self: ... + def rename(self, mapping: Mapping[str, str]) -> Self: ... + def select(self, *exprs: CompliantExprT_contra) -> Self: ... + def sort( + self, *by: str, descending: bool | Sequence[bool], nulls_last: bool + ) -> Self: ... + @deprecated("`LazyFrame.tail` is deprecated and will be removed in a future version.") + def tail(self, n: int) -> Self: ... + def unique( + self, + subset: Sequence[str] | None, + *, + keep: Literal["any", "none"], + ) -> Self: ... + def unpivot( + self, + on: Sequence[str] | None, + index: Sequence[str] | None, + variable_name: str, + value_name: str, + ) -> Self: ... + def with_columns(self, *exprs: CompliantExprT_contra) -> Self: ... + def with_row_index(self, name: str) -> Self: ... class EagerDataFrame( CompliantDataFrame[EagerSeriesT, EagerExprT_contra], - Protocol[EagerSeriesT, EagerExprT_contra], + CompliantLazyFrame[EagerExprT_contra, NativeFrameT_co], + Protocol[EagerSeriesT, EagerExprT_contra, NativeFrameT_co], ): def _evaluate_expr(self, expr: EagerExprT_contra, /) -> EagerSeriesT: """Evaluate `expr` and ensure it has a **single** output.""" diff --git a/narwhals/_compliant/selectors.py b/narwhals/_compliant/selectors.py index 38c02026a3..efcb5a4f33 100644 --- a/narwhals/_compliant/selectors.py +++ b/narwhals/_compliant/selectors.py @@ -65,9 +65,11 @@ SeriesOrExprT = TypeVar("SeriesOrExprT", bound="CompliantSeries | NativeExpr") SeriesT = TypeVar("SeriesT", bound="CompliantSeries") ExprT = TypeVar("ExprT", bound="NativeExpr") -FrameT = TypeVar("FrameT", bound="CompliantDataFrame[Any, Any] | CompliantLazyFrame") +FrameT = TypeVar( + "FrameT", bound="CompliantDataFrame[Any, Any] | CompliantLazyFrame[Any, Any]" +) DataFrameT = TypeVar("DataFrameT", bound="CompliantDataFrame[Any, Any]") -LazyFrameT = TypeVar("LazyFrameT", bound="CompliantLazyFrame") +LazyFrameT = TypeVar("LazyFrameT", bound="CompliantLazyFrame[Any, Any]") SelectorOrExpr: TypeAlias = ( "CompliantSelector[FrameT, SeriesOrExprT] | CompliantExpr[FrameT, SeriesOrExprT]" ) @@ -309,7 +311,7 @@ def __repr__(self: Self) -> str: # pragma: no cover def _eval_lhs_rhs( - df: CompliantDataFrame[Any, Any] | CompliantLazyFrame, + df: CompliantDataFrame[Any, Any] | CompliantLazyFrame[Any, Any], lhs: CompliantExpr[Any, Any], rhs: CompliantExpr[Any, Any], ) -> tuple[Sequence[str], Sequence[str]]: diff --git a/narwhals/_compliant/typing.py b/narwhals/_compliant/typing.py index 5e728e9656..d2efbaa489 100644 --- a/narwhals/_compliant/typing.py +++ b/narwhals/_compliant/typing.py @@ -18,6 +18,7 @@ from narwhals._compliant.namespace import EagerNamespace from narwhals._compliant.series import CompliantSeries from narwhals._compliant.series import EagerSeries + from narwhals.typing import NativeFrame __all__ = [ "AliasName", @@ -35,18 +36,20 @@ bound="CompliantSeries | NativeExpr", covariant=True, ) + +NativeFrameT_co = TypeVar("NativeFrameT_co", bound="NativeFrame", covariant=True) CompliantFrameT = TypeVar( - "CompliantFrameT", bound="CompliantDataFrame[Any, Any] | CompliantLazyFrame" + "CompliantFrameT", bound="CompliantDataFrame[Any, Any] | CompliantLazyFrame[Any, Any]" ) CompliantDataFrameT = TypeVar("CompliantDataFrameT", bound="CompliantDataFrame[Any, Any]") -CompliantLazyFrameT = TypeVar("CompliantLazyFrameT", bound="CompliantLazyFrame") +CompliantLazyFrameT = TypeVar("CompliantLazyFrameT", bound="CompliantLazyFrame[Any, Any]") IntoCompliantExpr: TypeAlias = "CompliantExpr[CompliantFrameT, CompliantSeriesOrNativeExprT_co] | CompliantSeriesOrNativeExprT_co" CompliantExprT = TypeVar("CompliantExprT", bound="CompliantExpr[Any, Any]") CompliantExprT_contra = TypeVar( "CompliantExprT_contra", bound="CompliantExpr[Any, Any]", contravariant=True ) -EagerDataFrameT = TypeVar("EagerDataFrameT", bound="EagerDataFrame[Any, Any]") +EagerDataFrameT = TypeVar("EagerDataFrameT", bound="EagerDataFrame[Any, Any, Any]") EagerSeriesT = TypeVar("EagerSeriesT", bound="EagerSeries[Any]") EagerSeriesT_co = TypeVar("EagerSeriesT_co", bound="EagerSeries[Any]", covariant=True) EagerExprT = TypeVar("EagerExprT", bound="EagerExpr[Any, Any]") @@ -54,7 +57,7 @@ "EagerExprT_contra", bound="EagerExpr[Any, Any]", contravariant=True ) EagerNamespaceAny: TypeAlias = ( - "EagerNamespace[EagerDataFrame[Any, Any], EagerSeries[Any], EagerExpr[Any, Any]]" + "EagerNamespace[EagerDataFrame[Any, Any, Any], EagerSeries[Any], EagerExpr[Any, Any]]" ) AliasNames: TypeAlias = Callable[[Sequence[str]], Sequence[str]] diff --git a/narwhals/_dask/dataframe.py b/narwhals/_dask/dataframe.py index 12f1706951..95ea9aa6cb 100644 --- a/narwhals/_dask/dataframe.py +++ b/narwhals/_dask/dataframe.py @@ -4,6 +4,7 @@ from typing import Any from typing import Iterator from typing import Literal +from typing import Mapping from typing import Sequence import dask.dataframe as dd @@ -18,6 +19,7 @@ from narwhals.utils import Implementation from narwhals.utils import check_column_exists from narwhals.utils import generate_temporary_column_name +from narwhals.utils import not_implemented from narwhals.utils import parse_columns_to_drop from narwhals.utils import parse_version from narwhals.utils import validate_backend_version @@ -35,7 +37,7 @@ from narwhals.utils import Version -class DaskLazyFrame(CompliantLazyFrame): +class DaskLazyFrame(CompliantLazyFrame["DaskExpr", "dd.DataFrame"]): def __init__( self: Self, native_dataframe: dd.DataFrame, @@ -168,7 +170,7 @@ def select(self: Self, *exprs: DaskExpr) -> Self: ) return self._from_native_frame(df) - def drop_nulls(self: Self, subset: list[str] | None) -> Self: + def drop_nulls(self: Self, subset: Sequence[str] | None) -> Self: if subset is None: return self._from_native_frame(self._native_frame.dropna()) plx = self.__narwhals_namespace__() @@ -189,7 +191,7 @@ def schema(self: Self) -> dict[str, DType]: def collect_schema(self: Self) -> dict[str, DType]: return self.schema - def drop(self: Self, columns: list[str], strict: bool) -> Self: # noqa: FBT001 + def drop(self: Self, columns: Sequence[str], *, strict: bool) -> Self: to_drop = parse_columns_to_drop( compliant_frame=self, columns=columns, strict=strict ) @@ -205,7 +207,7 @@ def with_row_index(self: Self, name: str) -> Self: ) ) - def rename(self: Self, mapping: dict[str, str]) -> Self: + def rename(self: Self, mapping: Mapping[str, str]) -> Self: return self._from_native_frame(self._native_frame.rename(columns=mapping)) def head(self: Self, n: int) -> Self: @@ -215,7 +217,7 @@ def head(self: Self, n: int) -> Self: def unique( self: Self, - subset: list[str] | None, + subset: Sequence[str] | None, *, keep: Literal["any", "none"], ) -> Self: @@ -254,8 +256,8 @@ def join( other: Self, *, how: Literal["left", "inner", "cross", "anti", "semi"], - left_on: list[str] | None, - right_on: list[str] | None, + left_on: Sequence[str] | None, + right_on: Sequence[str] | None, suffix: str, ) -> Self: if how == "cross": @@ -286,7 +288,7 @@ def join( other_native = ( select_columns_by_name( other._native_frame, - right_on, + list(right_on), self._backend_version, self._implementation, ) @@ -313,7 +315,7 @@ def join( other_native = ( select_columns_by_name( other._native_frame, - right_on, + list(right_on), self._backend_version, self._implementation, ) @@ -364,8 +366,8 @@ def join_asof( *, left_on: str | None, right_on: str | None, - by_left: list[str] | None, - by_right: list[str] | None, + by_left: Sequence[str] | None, + by_right: Sequence[str] | None, strategy: Literal["backward", "forward", "nearest"], suffix: str, ) -> Self: @@ -412,8 +414,8 @@ def gather_every(self: Self, n: int, offset: int) -> Self: def unpivot( self: Self, - on: list[str] | None, - index: list[str] | None, + on: Sequence[str] | None, + index: Sequence[str] | None, variable_name: str, value_name: str, ) -> Self: @@ -425,3 +427,5 @@ def unpivot( value_name=value_name, ) ) + + explode = not_implemented() diff --git a/narwhals/_duckdb/dataframe.py b/narwhals/_duckdb/dataframe.py index 6f998a153b..34197c779b 100644 --- a/narwhals/_duckdb/dataframe.py +++ b/narwhals/_duckdb/dataframe.py @@ -4,6 +4,7 @@ from typing import Any from typing import Iterator from typing import Literal +from typing import Mapping from typing import Sequence import duckdb @@ -21,6 +22,7 @@ from narwhals.utils import Version from narwhals.utils import generate_temporary_column_name from narwhals.utils import import_dtypes_module +from narwhals.utils import not_implemented from narwhals.utils import parse_columns_to_drop from narwhals.utils import parse_version from narwhals.utils import validate_backend_version @@ -41,7 +43,7 @@ from narwhals.typing import CompliantLazyFrame -class DuckDBLazyFrame(CompliantLazyFrame): +class DuckDBLazyFrame(CompliantLazyFrame["DuckDBExpr", "duckdb.DuckDBPyRelation"]): _implementation = Implementation.DUCKDB def __init__( @@ -155,7 +157,7 @@ def select( self._native_frame.select(*(val.alias(col) for col, val in new_columns_map)), ) - def drop(self: Self, columns: list[str], strict: bool) -> Self: # noqa: FBT001 + def drop(self: Self, columns: Sequence[str], *, strict: bool) -> Self: columns_to_drop = parse_columns_to_drop( compliant_frame=self, columns=columns, strict=strict ) @@ -240,7 +242,7 @@ def group_by(self: Self, *keys: str, drop_null_keys: bool) -> DuckDBGroupBy: compliant_frame=self, keys=list(keys), drop_null_keys=drop_null_keys ) - def rename(self: Self, mapping: dict[str, str]) -> Self: + def rename(self: Self, mapping: Mapping[str, str]) -> Self: df = self._native_frame selection = [ f"{col} as {mapping[col]}" if col in mapping else col for col in df.columns @@ -252,8 +254,8 @@ def join( other: Self, *, how: Literal["left", "inner", "cross", "anti", "semi"], - left_on: list[str] | None, - right_on: list[str] | None, + left_on: Sequence[str] | None, + right_on: Sequence[str] | None, suffix: str, ) -> Self: original_alias = self._native_frame.alias @@ -299,8 +301,8 @@ def join_asof( *, left_on: str | None, right_on: str | None, - by_left: list[str] | None, - by_right: list[str] | None, + by_left: Sequence[str] | None, + by_right: Sequence[str] | None, strategy: Literal["backward", "forward", "nearest"], suffix: str, ) -> Self: @@ -347,7 +349,7 @@ def collect_schema(self: Self) -> dict[str, DType]: } def unique( - self: Self, subset: Sequence[str] | None, keep: Literal["any", "none"] + self: Self, subset: Sequence[str] | None, *, keep: Literal["any", "none"] ) -> Self: if subset is not None: rel = self._native_frame @@ -399,14 +401,14 @@ def sort( ) return self._from_native_frame(result) - def drop_nulls(self: Self, subset: list[str] | None) -> Self: + def drop_nulls(self: Self, subset: Sequence[str] | None) -> Self: rel = self._native_frame subset_ = subset if subset is not None else rel.columns keep_condition = " and ".join(f'"{col}" is not null' for col in subset_) query = f"select * from rel where {keep_condition}" # noqa: S608 return self._from_native_frame(duckdb.sql(query)) - def explode(self: Self, columns: list[str]) -> Self: + def explode(self: Self, columns: Sequence[str]) -> Self: dtypes = import_dtypes_module(self._version) schema = self.collect_schema() for col in columns: @@ -450,15 +452,13 @@ def explode(self: Self, columns: list[str]) -> Self: def unpivot( self: Self, - on: list[str] | None, - index: list[str] | None, + on: Sequence[str] | None, + index: Sequence[str] | None, variable_name: str, value_name: str, ) -> Self: - index_: list[str] = [] if index is None else index - on_: list[str] = ( - [c for c in self.columns if c not in index_] if on is None else on - ) + index_ = [] if index is None else index + on_ = [c for c in self.columns if c not in index_] if on is None else on if variable_name == "": msg = "`variable_name` cannot be empty string for duckdb backend." @@ -486,3 +486,11 @@ def unpivot( from unpivot_cte; """ # noqa: S608 return self._from_native_frame(duckdb.sql(query)) + + gather_every = not_implemented.deprecated( + "`LazyFrame.gather_every` is deprecated and will be removed in a future version." + ) + tail = not_implemented.deprecated( + "`LazyFrame.tail` is deprecated and will be removed in a future version." + ) + with_row_index = not_implemented() diff --git a/narwhals/_expression_parsing.py b/narwhals/_expression_parsing.py index 1325cddd8e..95f34c7033 100644 --- a/narwhals/_expression_parsing.py +++ b/narwhals/_expression_parsing.py @@ -109,7 +109,7 @@ def extract_compliant( def evaluate_output_names_and_aliases( expr: CompliantExpr[Any, Any], - df: CompliantDataFrame[Any, Any] | CompliantLazyFrame, + df: CompliantDataFrame[Any, Any] | CompliantLazyFrame[Any, Any], exclude: Sequence[str], ) -> tuple[Sequence[str], Sequence[str]]: output_names = expr._evaluate_output_names(df) diff --git a/narwhals/_pandas_like/dataframe.py b/narwhals/_pandas_like/dataframe.py index e5a80e2043..5a4043e4f3 100644 --- a/narwhals/_pandas_like/dataframe.py +++ b/narwhals/_pandas_like/dataframe.py @@ -26,8 +26,6 @@ from narwhals._pandas_like.utils import select_columns_by_name from narwhals.dependencies import is_numpy_array_1d from narwhals.exceptions import InvalidOperationError -from narwhals.typing import CompliantDataFrame -from narwhals.typing import CompliantLazyFrame from narwhals.utils import Implementation from narwhals.utils import check_column_exists from narwhals.utils import generate_temporary_column_name @@ -51,6 +49,8 @@ from narwhals._pandas_like.group_by import PandasLikeGroupBy from narwhals._pandas_like.namespace import PandasLikeNamespace from narwhals.dtypes import DType + from narwhals.typing import CompliantDataFrame + from narwhals.typing import CompliantLazyFrame from narwhals.typing import SizeUnit from narwhals.typing import _1DArray from narwhals.typing import _2DArray @@ -83,9 +83,7 @@ ) -class PandasLikeDataFrame( - EagerDataFrame["PandasLikeSeries", "PandasLikeExpr"], CompliantLazyFrame -): +class PandasLikeDataFrame(EagerDataFrame["PandasLikeSeries", "PandasLikeExpr", "Any"]): # --- not in the spec --- def __init__( self: Self, @@ -779,7 +777,9 @@ def unique( ) # --- lazy-only --- - def lazy(self: Self, *, backend: Implementation | None = None) -> CompliantLazyFrame: + def lazy( + self: Self, *, backend: Implementation | None = None + ) -> CompliantLazyFrame[Any, Any]: from narwhals.utils import parse_version pandas_df = self.to_pandas() @@ -1078,7 +1078,7 @@ def unpivot( ) ) - def explode(self: Self, columns: list[str]) -> Self: + def explode(self: Self, columns: Sequence[str]) -> Self: dtypes = import_dtypes_module(self._version) schema = self.collect_schema() diff --git a/narwhals/_polars/dataframe.py b/narwhals/_polars/dataframe.py index 439528bfac..53e8d46461 100644 --- a/narwhals/_polars/dataframe.py +++ b/narwhals/_polars/dataframe.py @@ -56,6 +56,7 @@ class PolarsDataFrame: collect: Method[CompliantDataFrame[Any, Any]] drop_nulls: Method[Self] estimated_size: Method[int | float] + explode: Method[Self] filter: Method[Self] gather_every: Method[Self] item: Method[Any] @@ -292,7 +293,9 @@ def schema(self: Self) -> dict[str, DType]: for name, dtype in schema.items() } - def lazy(self: Self, *, backend: Implementation | None = None) -> CompliantLazyFrame: + def lazy( + self: Self, *, backend: Implementation | None = None + ) -> CompliantLazyFrame[Any, Any]: from narwhals.utils import parse_version if backend is None or backend is Implementation.POLARS: @@ -422,6 +425,20 @@ def to_polars(self: Self) -> pl.DataFrame: class PolarsLazyFrame: + drop_nulls: Method[Self] + explode: Method[Self] + filter: Method[Self] + gather_every: Method[Self] + head: Method[Self] + join: Method[Self] + join_asof: Method[Self] + rename: Method[Self] + select: Method[Self] + sort: Method[Self] + tail: Method[Self] + unique: Method[Self] + with_columns: Method[Self] + def __init__( self: Self, df: pl.LazyFrame, @@ -460,7 +477,7 @@ def _from_native_frame(self: Self, df: pl.LazyFrame) -> Self: def _change_version(self: Self, version: Version) -> Self: return self.__class__( - self._native_frame, backend_version=self._backend_version, version=version + self.native, backend_version=self._backend_version, version=version ) def __getattr__(self: Self, attr: str) -> Any: @@ -468,7 +485,7 @@ def func(*args: Any, **kwargs: Any) -> Any: args, kwargs = extract_args_kwargs(args, kwargs) # type: ignore[assignment] try: return self._from_native_frame( - getattr(self._native_frame, attr)(*args, **kwargs) + getattr(self.native, attr)(*args, **kwargs) ) except pl.exceptions.ColumnNotFoundError as e: # pragma: no cover raise ColumnNotFoundError(str(e)) from e @@ -478,13 +495,17 @@ def func(*args: Any, **kwargs: Any) -> Any: def _iter_columns(self) -> Iterator[PolarsSeries]: # pragma: no cover yield from self.collect(self._implementation).iter_columns() + @property + def native(self) -> pl.LazyFrame: + return self._native_frame + @property def columns(self: Self) -> list[str]: - return self._native_frame.columns + return self.native.columns @property def schema(self: Self) -> dict[str, DType]: - schema = self._native_frame.schema + schema = self.native.schema return { name: native_to_narwhals_dtype(dtype, self._version, self._backend_version) for name, dtype in schema.items() @@ -496,11 +517,11 @@ def collect_schema(self: Self) -> dict[str, DType]: name: native_to_narwhals_dtype( dtype, self._version, self._backend_version ) - for name, dtype in self._native_frame.schema.items() + for name, dtype in self.native.schema.items() } else: try: - collected_schema = self._native_frame.collect_schema() + collected_schema = self.native.collect_schema() except Exception as e: # noqa: BLE001 raise catch_polars_exception(e, self._backend_version) from None return { @@ -516,7 +537,7 @@ def collect( **kwargs: Any, ) -> CompliantDataFrame[Any, Any]: try: - result = self._native_frame.collect(**kwargs) + result = self.native.collect(**kwargs) except Exception as e: # noqa: BLE001 raise catch_polars_exception(e, self._backend_version) from None @@ -564,24 +585,24 @@ def group_by(self: Self, *by: str, drop_null_keys: bool) -> PolarsLazyGroupBy: def with_row_index(self: Self, name: str) -> Self: if self._backend_version < (0, 20, 4): - return self._from_native_frame(self._native_frame.with_row_count(name)) - return self._from_native_frame(self._native_frame.with_row_index(name)) + return self._from_native_frame(self.native.with_row_count(name)) + return self._from_native_frame(self.native.with_row_index(name)) - def drop(self: Self, columns: list[str], strict: bool) -> Self: # noqa: FBT001 + def drop(self: Self, columns: Sequence[str], *, strict: bool) -> Self: if self._backend_version < (1, 0, 0): - return self._from_native_frame(self._native_frame.drop(columns)) - return self._from_native_frame(self._native_frame.drop(columns, strict=strict)) + return self._from_native_frame(self.native.drop(columns)) + return self._from_native_frame(self.native.drop(columns, strict=strict)) def unpivot( self: Self, - on: list[str] | None, - index: list[str] | None, + on: Sequence[str] | None, + index: Sequence[str] | None, variable_name: str, value_name: str, ) -> Self: if self._backend_version < (1, 0, 0): return self._from_native_frame( - self._native_frame.melt( + self.native.melt( id_vars=index, value_vars=on, variable_name=variable_name, @@ -589,13 +610,13 @@ def unpivot( ) ) return self._from_native_frame( - self._native_frame.unpivot( + self.native.unpivot( on=on, index=index, variable_name=variable_name, value_name=value_name ) ) def simple_select(self, *column_names: str) -> Self: - return self._from_native_frame(self._native_frame.select(*column_names)) + return self._from_native_frame(self.native.select(*column_names)) def aggregate(self: Self, *exprs: Any) -> Self: - return self.select(*exprs) # type: ignore[no-any-return] + return self.select(*exprs) diff --git a/narwhals/_spark_like/dataframe.py b/narwhals/_spark_like/dataframe.py index fa44d440c4..f01fb0f0a9 100644 --- a/narwhals/_spark_like/dataframe.py +++ b/narwhals/_spark_like/dataframe.py @@ -5,6 +5,7 @@ from typing import Any from typing import Iterator from typing import Literal +from typing import Mapping from typing import Sequence from narwhals._spark_like.utils import evaluate_exprs @@ -19,6 +20,7 @@ from narwhals.utils import check_column_exists from narwhals.utils import find_stacklevel from narwhals.utils import import_dtypes_module +from narwhals.utils import not_implemented from narwhals.utils import parse_columns_to_drop from narwhals.utils import parse_version from narwhals.utils import validate_backend_version @@ -45,7 +47,7 @@ """Marker for working code that fails type checking.""" -class SparkLikeLazyFrame(CompliantLazyFrame): +class SparkLikeLazyFrame(CompliantLazyFrame["SparkLikeExpr", "SQLFrameDataFrame"]): def __init__( self: Self, native_dataframe: SQLFrameDataFrame, @@ -259,7 +261,7 @@ def schema(self: Self) -> dict[str, DType]: def collect_schema(self: Self) -> dict[str, DType]: return self.schema - def drop(self: Self, columns: list[str], strict: bool) -> Self: # noqa: FBT001 + def drop(self: Self, columns: Sequence[str], *, strict: bool) -> Self: columns_to_drop = parse_columns_to_drop( compliant_frame=self, columns=columns, strict=strict ) @@ -298,10 +300,11 @@ def sort( sort_cols = [sort_f(col) for col, sort_f in zip(by, sort_funcs)] return self._from_native_frame(self._native_frame.sort(*sort_cols)) - def drop_nulls(self: Self, subset: list[str] | None) -> Self: + def drop_nulls(self: Self, subset: Sequence[str] | None) -> Self: + subset = list(subset) if subset else None return self._from_native_frame(self._native_frame.dropna(subset=subset)) - def rename(self: Self, mapping: dict[str, str]) -> Self: + def rename(self: Self, mapping: Mapping[str, str]) -> Self: rename_mapping = { colname: mapping.get(colname, colname) for colname in self.columns } @@ -313,7 +316,7 @@ def rename(self: Self, mapping: dict[str, str]) -> Self: def unique( self: Self, - subset: list[str] | None, + subset: Sequence[str] | None, *, keep: Literal["any", "none"], ) -> Self: @@ -321,14 +324,15 @@ def unique( msg = "`LazyFrame.unique` with PySpark backend only supports `keep='any'`." raise ValueError(msg) check_column_exists(self.columns, subset) + subset = list(subset) if subset else None return self._from_native_frame(self._native_frame.dropDuplicates(subset=subset)) def join( self: Self, other: Self, how: Literal["inner", "left", "cross", "semi", "anti"], - left_on: list[str] | None, - right_on: list[str] | None, + left_on: Sequence[str] | None, + right_on: Sequence[str] | None, suffix: str, ) -> Self: self_native = self._native_frame @@ -364,11 +368,12 @@ def join( if colname not in (right_on or []) ] ) + on = list(left_on) if left_on else None return self._from_native_frame( - self_native.join(other_native, on=left_on, how=how).select(col_order) + self_native.join(other_native, on=on, how=how).select(col_order) ) - def explode(self: Self, columns: list[str]) -> Self: + def explode(self: Self, columns: Sequence[str]) -> Self: dtypes = import_dtypes_module(self._version) schema = self.collect_schema() @@ -438,8 +443,8 @@ def null_condition(col_name: str) -> Column: def unpivot( self: Self, - on: list[str] | None, - index: list[str] | None, + on: Sequence[str] | None, + index: Sequence[str] | None, variable_name: str, value_name: str, ) -> Self: @@ -467,3 +472,12 @@ def unpivot( if index is None: unpivoted_native_frame = unpivoted_native_frame.drop(*ids) return self._from_native_frame(unpivoted_native_frame) + + gather_every = not_implemented.deprecated( + "`LazyFrame.gather_every` is deprecated and will be removed in a future version." + ) + join_asof = not_implemented() + tail = not_implemented.deprecated( + "`LazyFrame.tail` is deprecated and will be removed in a future version." + ) + with_row_index = not_implemented() diff --git a/narwhals/dataframe.py b/narwhals/dataframe.py index 28b841159f..a43cc3319c 100644 --- a/narwhals/dataframe.py +++ b/narwhals/dataframe.py @@ -33,6 +33,7 @@ from narwhals.utils import find_stacklevel from narwhals.utils import flatten from narwhals.utils import generate_repr +from narwhals.utils import is_compliant_lazyframe from narwhals.utils import is_sequence_but_not_str from narwhals.utils import issue_deprecation_warning from narwhals.utils import parse_version @@ -2209,8 +2210,10 @@ def __init__( level: Literal["full", "lazy", "interchange"], ) -> None: self._level = level - if hasattr(df, "__narwhals_lazyframe__"): - self._compliant_frame: Any = df.__narwhals_lazyframe__() + if is_compliant_lazyframe(df): + # NOTE: Blocked by (#2239) + # self._compliant_frame: CompliantLazyFrame[Any, FrameT] = df.__narwhals_lazyframe__() # noqa: ERA001 + self._compliant_frame = df.__narwhals_lazyframe__() else: # pragma: no cover msg = f"Expected Polars LazyFrame or an object that implements `__narwhals_lazyframe__`, got: {type(df)}" raise AssertionError(msg) diff --git a/narwhals/utils.py b/narwhals/utils.py index 9572257813..97dd571904 100644 --- a/narwhals/utils.py +++ b/narwhals/utils.py @@ -49,6 +49,7 @@ from typing import AbstractSet as Set import pandas as pd + from typing_extensions import LiteralString from typing_extensions import ParamSpec from typing_extensions import Self from typing_extensions import TypeAlias @@ -57,6 +58,7 @@ from narwhals._compliant import CompliantExpr from narwhals._compliant import CompliantFrameT from narwhals._compliant import CompliantSeriesOrNativeExprT_co + from narwhals._compliant.typing import NativeFrameT_co from narwhals.dataframe import DataFrame from narwhals.dataframe import LazyFrame from narwhals.dtypes import DType @@ -1145,7 +1147,9 @@ def is_ordered_categorical(series: Series[Any]) -> bool: return False # pragma: no cover -def generate_unique_token(n_bytes: int, columns: list[str]) -> str: # pragma: no cover +def generate_unique_token( + n_bytes: int, columns: Sequence[str] +) -> str: # pragma: no cover msg = ( "Use `generate_temporary_column_name` instead. `generate_unique_token` is " "deprecated and it will be removed in future versions" @@ -1154,7 +1158,7 @@ def generate_unique_token(n_bytes: int, columns: list[str]) -> str: # pragma: n return generate_temporary_column_name(n_bytes=n_bytes, columns=columns) -def generate_temporary_column_name(n_bytes: int, columns: list[str]) -> str: +def generate_temporary_column_name(n_bytes: int, columns: Sequence[str]) -> str: """Generates a unique column name that is not present in the given list of columns. It relies on [python secrets token_hex](https://docs.python.org/3/library/secrets.html#secrets.token_hex) @@ -1477,7 +1481,9 @@ def is_compliant_dataframe( return _hasattr_static(obj, "__narwhals_dataframe__") -def is_compliant_lazyframe(obj: Any) -> TypeIs[CompliantLazyFrame]: +def is_compliant_lazyframe( + obj: CompliantLazyFrame[CompliantExprT_co, NativeFrameT_co] | Any, +) -> TypeIs[CompliantLazyFrame[CompliantExprT_co, NativeFrameT_co]]: return _hasattr_static(obj, "__narwhals_lazyframe__") @@ -1529,6 +1535,24 @@ def unstable(fn: _Fn, /) -> _Fn: return fn +if TYPE_CHECKING: + import sys + + if sys.version_info >= (3, 13): + # NOTE: avoids `mypy` + # error: Module "narwhals.utils" does not explicitly export attribute "deprecated" [attr-defined] + from warnings import deprecated as deprecated # noqa: PLC0414 + else: + from typing_extensions import deprecated as deprecated # noqa: PLC0414 +else: + + def deprecated(message: str, /) -> Callable[[_Fn], _Fn]: # noqa: ARG001 + def wrapper(func: _Fn, /) -> _Fn: + return func + + return wrapper + + class not_implemented: # noqa: N801 """Mark some functionality as unsupported. @@ -1597,6 +1621,21 @@ def __call__(self, *args: Any, **kwds: Any) -> Any: # Wouldn't be reachable through *regular* attribute access return self.__get__("raise") + @classmethod + def deprecated(cls, message: LiteralString, /) -> Self: + """Alt constructor, wraps with `@deprecated`. + + Arguments: + message: **Static-only** deprecation message, emitted in an IDE. + + Returns: + An exception-raising [descriptor]. + + [descriptor]: https://docs.python.org/3/howto/descriptor.html + """ + obj = cls() + return deprecated(message)(obj) + def _not_implemented_error(what: str, who: str, /) -> NotImplementedError: msg = ( @@ -1605,21 +1644,3 @@ def _not_implemented_error(what: str, who: str, /) -> NotImplementedError: "please open an issue at: https://github.com/narwhals-dev/narwhals/issues" ) return NotImplementedError(msg) - - -if TYPE_CHECKING: - import sys - - if sys.version_info >= (3, 13): - # NOTE: avoids `mypy` - # error: Module "narwhals.utils" does not explicitly export attribute "deprecated" [attr-defined] - from warnings import deprecated as deprecated # noqa: PLC0414 - else: - from typing_extensions import deprecated as deprecated # noqa: PLC0414 -else: - - def deprecated(message: str, /) -> Callable[[_Fn], _Fn]: # noqa: ARG001 - def wrapper(func: _Fn, /) -> _Fn: - return func - - return wrapper