Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
27 commits
Select commit Hold shift + click to select a range
952f5aa
chore(typing): Mostly finish `CompliantLazyFrame`
dangotbanned Mar 17, 2025
af28859
fix: add missing `CompliantDataFrame.explode`
dangotbanned Mar 17, 2025
b15976e
feat(typing): Adds `not_implemented.deprecated`
dangotbanned Mar 17, 2025
3b72e9d
chore(typing): Finish `PolarsLazyFrame`
dangotbanned Mar 17, 2025
4441917
chore(typing): Finish `SparkLikeLazyFrame`
dangotbanned Mar 17, 2025
d34581b
chore(typing): Finish `DaskLazyFrame`
dangotbanned Mar 17, 2025
8ec6aba
chore(typing): Finish `DuckDBLazyFrame`
dangotbanned Mar 17, 2025
7e9e2bb
fix(typing): `PandasLikeDataFrame.explode`
dangotbanned Mar 17, 2025
40625d4
chore(typing): Mark `.lazy` return as `Incomplete`
dangotbanned Mar 17, 2025
169ddc7
Merge remote-tracking branch 'upstream/main' into compliant-lazyframe…
dangotbanned Mar 17, 2025
2f9d223
Merge remote-tracking branch 'upstream/main' into compliant-lazyframe…
dangotbanned Mar 17, 2025
4922bb9
Merge remote-tracking branch 'upstream/main' into compliant-lazyframe…
dangotbanned Mar 17, 2025
df00903
chore(typing): Fill `Incomplete` for `*Expr`
dangotbanned Mar 17, 2025
0757d3c
revert: remove `.to_(arrow|pandas)`
dangotbanned Mar 18, 2025
3b81619
fix(DRAFT): Expose `CompliantLazyFrame.native`
dangotbanned Mar 18, 2025
e70cc47
fix: remove default in `CompliantLazyFrame.lazy`
dangotbanned Mar 18, 2025
17ca5e8
fix(typing): Align `unique` sigantures
dangotbanned Mar 18, 2025
62de258
fix: coverage for `PolarsLazyFrame.native`
dangotbanned Mar 18, 2025
ac25fde
fix(typing): Add missing `_change_version` method
dangotbanned Mar 18, 2025
4100cb7
Merge branch 'main' into compliant-lazyframe-spec
dangotbanned Mar 18, 2025
53c579e
chore(typing): Mark intended annotation, that isn't valid yet
dangotbanned Mar 18, 2025
9a523ff
revert: remove `CompliantLazyFrame.lazy`
dangotbanned Mar 18, 2025
3b6506b
Merge branch 'main' into compliant-lazyframe-spec
dangotbanned Mar 18, 2025
f906cce
lol `maintain_order` default one side only πŸ˜…
dangotbanned Mar 19, 2025
22f4467
revert: don't widen `keep` for lazy
dangotbanned Mar 19, 2025
8204537
Merge remote-tracking branch 'upstream/main' into compliant-lazyframe…
dangotbanned Mar 19, 2025
3f3e8a3
make `keep` a keyword again
dangotbanned Mar 19, 2025
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
13 changes: 8 additions & 5 deletions narwhals/_arrow/dataframe.py
Original file line number Diff line number Diff line change
Expand Up @@ -52,6 +52,8 @@
from narwhals._arrow.typing import Mask # type: ignore[attr-defined]
from narwhals._arrow.typing import Order # type: ignore[attr-defined]
from narwhals.dtypes import DType
from narwhals.typing import CompliantDataFrame
from narwhals.typing import CompliantLazyFrame
from narwhals.typing import SizeUnit
from narwhals.typing import _1DArray
from narwhals.typing import _2DArray
Expand All @@ -69,11 +71,8 @@
]
PromoteOptions: TypeAlias = Literal["none", "default", "permissive"]

from narwhals.typing import CompliantDataFrame
from narwhals.typing import CompliantLazyFrame


class ArrowDataFrame(EagerDataFrame["ArrowSeries", "ArrowExpr"], CompliantLazyFrame):
class ArrowDataFrame(EagerDataFrame["ArrowSeries", "ArrowExpr", "pa.Table"]):
# --- not in the spec ---
def __init__(
self: Self,
Expand Down Expand Up @@ -349,6 +348,8 @@ def estimated_size(self: Self, unit: SizeUnit) -> int | float:
sz = self._native_frame.nbytes
return scale_bytes(sz, unit)

explode = not_implemented()

@property
def columns(self: Self) -> list[str]:
return self._native_frame.schema.names
Expand Down Expand Up @@ -573,7 +574,9 @@ def tail(self: Self, n: int) -> Self:
else:
return self._from_native_frame(df.slice(abs(n)), validate_column_names=False)

def lazy(self: Self, *, backend: Implementation | None = None) -> CompliantLazyFrame:
def lazy(
self: Self, *, backend: Implementation | None = None
) -> CompliantLazyFrame[Any, Any]:
from narwhals.utils import parse_version

if backend is None:
Expand Down
100 changes: 90 additions & 10 deletions narwhals/_compliant/dataframe.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,11 @@
from narwhals._compliant.typing import CompliantSeriesT
from narwhals._compliant.typing import EagerExprT_contra
from narwhals._compliant.typing import EagerSeriesT
from narwhals._compliant.typing import NativeFrameT_co
from narwhals._expression_parsing import evaluate_output_names_and_aliases
from narwhals.utils import Version
from narwhals.utils import _StoresNative
from narwhals.utils import deprecated

if TYPE_CHECKING:
from io import BytesIO
Expand Down Expand Up @@ -70,6 +74,7 @@ def collect_schema(self) -> Mapping[str, DType]: ...
def drop(self, columns: Sequence[str], *, strict: bool) -> Self: ...
def drop_nulls(self, subset: Sequence[str] | None) -> Self: ...
def estimated_size(self, unit: SizeUnit) -> int | float: ...
def explode(self: Self, columns: Sequence[str]) -> Self: ...
def filter(self, predicate: CompliantExprT_contra | Incomplete) -> Self: ...
def gather_every(self, n: int, offset: int) -> Self: ...
def get_column(self, name: str) -> CompliantSeriesT: ...
Expand Down Expand Up @@ -101,7 +106,7 @@ def join_asof(
strategy: Literal["backward", "forward", "nearest"],
suffix: str,
) -> Self: ...
def lazy(self, *, backend: Implementation | None) -> CompliantLazyFrame: ...
def lazy(self, *, backend: Implementation | None) -> CompliantLazyFrame[Any, Any]: ...
def rename(self, mapping: Mapping[str, str]) -> Self: ...
def row(self, index: int) -> tuple[Any, ...]: ...
def rows(
Expand Down Expand Up @@ -136,7 +141,7 @@ def unique(
subset: Sequence[str] | None,
*,
keep: Literal["any", "first", "last", "none"],
maintain_order: bool | None,
maintain_order: bool | None = None,
) -> Self: ...
def unpivot(
self,
Expand All @@ -155,26 +160,101 @@ def write_csv(self, file: str | Path | BytesIO | None) -> str | None: ...
def write_parquet(self, file: str | Path | BytesIO) -> None: ...


class CompliantLazyFrame(Protocol):
class CompliantLazyFrame(
_StoresNative[NativeFrameT_co], Protocol[CompliantExprT_contra, NativeFrameT_co]
):
_native_frame: Any
_implementation: Implementation
_backend_version: tuple[int, ...]
_version: Version

def __narwhals_lazyframe__(self) -> Self: ...
def __narwhals_namespace__(self) -> Any: ...
def simple_select(
self, *column_names: str
) -> Self: ... # `select` where all args are column names.
def aggregate(self, *exprs: Any) -> Self: # pragma: no cover
... # `select` where all args are aggregations or literals
# (so, no broadcasting is necessary).

def simple_select(self, *column_names: str) -> Self:
"""`select` where all args are column names."""
...

def aggregate(self, *exprs: CompliantExprT_contra) -> Self:
"""`select` where all args are aggregations or literals.

(so, no broadcasting is necessary).
"""
...

def _change_version(self, version: Version) -> Self: ...

@property
def native(self) -> NativeFrameT_co:
return self._native_frame # type: ignore[no-any-return]

@property
def columns(self) -> Sequence[str]: ...
@property
def schema(self) -> Mapping[str, DType]: ...
def _iter_columns(self) -> Iterator[Any]: ...
def collect(
self, backend: Implementation | None, **kwargs: Any
) -> CompliantDataFrame[Any, Any]: ...
def collect_schema(self) -> Mapping[str, DType]: ...
def drop(self, columns: Sequence[str], *, strict: bool) -> Self: ...
def drop_nulls(self, subset: Sequence[str] | None) -> Self: ...
def explode(self: Self, columns: Sequence[str]) -> Self: ...
def filter(self, predicate: CompliantExprT_contra | Incomplete) -> Self: ...
@deprecated(
"`LazyFrame.gather_every` is deprecated and will be removed in a future version."
)
def gather_every(self, n: int, offset: int) -> Self: ...
def group_by(self, *keys: str, drop_null_keys: bool) -> Incomplete: ...
def head(self, n: int) -> Self: ...
def join(
self: Self,
other: Self,
*,
how: Literal["left", "inner", "cross", "anti", "semi"],
left_on: Sequence[str] | None,
right_on: Sequence[str] | None,
suffix: str,
) -> Self: ...
def join_asof(
self: Self,
other: Self,
*,
left_on: str | None,
right_on: str | None,
by_left: Sequence[str] | None,
by_right: Sequence[str] | None,
strategy: Literal["backward", "forward", "nearest"],
suffix: str,
) -> Self: ...
def rename(self, mapping: Mapping[str, str]) -> Self: ...
def select(self, *exprs: CompliantExprT_contra) -> Self: ...
def sort(
self, *by: str, descending: bool | Sequence[bool], nulls_last: bool
) -> Self: ...
@deprecated("`LazyFrame.tail` is deprecated and will be removed in a future version.")
def tail(self, n: int) -> Self: ...
def unique(
self,
subset: Sequence[str] | None,
*,
keep: Literal["any", "none"],
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This was overkill

The real problem to solve was duckdb using keep: str

def unique(self: Self, subset: Sequence[str] | None, keep: str) -> Self:

) -> Self: ...
def unpivot(
self,
on: Sequence[str] | None,
index: Sequence[str] | None,
variable_name: str,
value_name: str,
) -> Self: ...
def with_columns(self, *exprs: CompliantExprT_contra) -> Self: ...
def with_row_index(self, name: str) -> Self: ...


class EagerDataFrame(
CompliantDataFrame[EagerSeriesT, EagerExprT_contra],
Protocol[EagerSeriesT, EagerExprT_contra],
CompliantLazyFrame[EagerExprT_contra, NativeFrameT_co],
Protocol[EagerSeriesT, EagerExprT_contra, NativeFrameT_co],
):
def _evaluate_expr(self, expr: EagerExprT_contra, /) -> EagerSeriesT:
"""Evaluate `expr` and ensure it has a **single** output."""
Expand Down
8 changes: 5 additions & 3 deletions narwhals/_compliant/selectors.py
Original file line number Diff line number Diff line change
Expand Up @@ -65,9 +65,11 @@
SeriesOrExprT = TypeVar("SeriesOrExprT", bound="CompliantSeries | NativeExpr")
SeriesT = TypeVar("SeriesT", bound="CompliantSeries")
ExprT = TypeVar("ExprT", bound="NativeExpr")
FrameT = TypeVar("FrameT", bound="CompliantDataFrame[Any, Any] | CompliantLazyFrame")
FrameT = TypeVar(
"FrameT", bound="CompliantDataFrame[Any, Any] | CompliantLazyFrame[Any, Any]"
)
DataFrameT = TypeVar("DataFrameT", bound="CompliantDataFrame[Any, Any]")
LazyFrameT = TypeVar("LazyFrameT", bound="CompliantLazyFrame")
LazyFrameT = TypeVar("LazyFrameT", bound="CompliantLazyFrame[Any, Any]")
SelectorOrExpr: TypeAlias = (
"CompliantSelector[FrameT, SeriesOrExprT] | CompliantExpr[FrameT, SeriesOrExprT]"
)
Expand Down Expand Up @@ -309,7 +311,7 @@ def __repr__(self: Self) -> str: # pragma: no cover


def _eval_lhs_rhs(
df: CompliantDataFrame[Any, Any] | CompliantLazyFrame,
df: CompliantDataFrame[Any, Any] | CompliantLazyFrame[Any, Any],
lhs: CompliantExpr[Any, Any],
rhs: CompliantExpr[Any, Any],
) -> tuple[Sequence[str], Sequence[str]]:
Expand Down
11 changes: 7 additions & 4 deletions narwhals/_compliant/typing.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@
from narwhals._compliant.namespace import EagerNamespace
from narwhals._compliant.series import CompliantSeries
from narwhals._compliant.series import EagerSeries
from narwhals.typing import NativeFrame

__all__ = [
"AliasName",
Expand All @@ -35,26 +36,28 @@
bound="CompliantSeries | NativeExpr",
covariant=True,
)

NativeFrameT_co = TypeVar("NativeFrameT_co", bound="NativeFrame", covariant=True)
CompliantFrameT = TypeVar(
"CompliantFrameT", bound="CompliantDataFrame[Any, Any] | CompliantLazyFrame"
"CompliantFrameT", bound="CompliantDataFrame[Any, Any] | CompliantLazyFrame[Any, Any]"
)
CompliantDataFrameT = TypeVar("CompliantDataFrameT", bound="CompliantDataFrame[Any, Any]")
CompliantLazyFrameT = TypeVar("CompliantLazyFrameT", bound="CompliantLazyFrame")
CompliantLazyFrameT = TypeVar("CompliantLazyFrameT", bound="CompliantLazyFrame[Any, Any]")
IntoCompliantExpr: TypeAlias = "CompliantExpr[CompliantFrameT, CompliantSeriesOrNativeExprT_co] | CompliantSeriesOrNativeExprT_co"
CompliantExprT = TypeVar("CompliantExprT", bound="CompliantExpr[Any, Any]")
CompliantExprT_contra = TypeVar(
"CompliantExprT_contra", bound="CompliantExpr[Any, Any]", contravariant=True
)

EagerDataFrameT = TypeVar("EagerDataFrameT", bound="EagerDataFrame[Any, Any]")
EagerDataFrameT = TypeVar("EagerDataFrameT", bound="EagerDataFrame[Any, Any, Any]")
EagerSeriesT = TypeVar("EagerSeriesT", bound="EagerSeries[Any]")
EagerSeriesT_co = TypeVar("EagerSeriesT_co", bound="EagerSeries[Any]", covariant=True)
EagerExprT = TypeVar("EagerExprT", bound="EagerExpr[Any, Any]")
EagerExprT_contra = TypeVar(
"EagerExprT_contra", bound="EagerExpr[Any, Any]", contravariant=True
)
EagerNamespaceAny: TypeAlias = (
"EagerNamespace[EagerDataFrame[Any, Any], EagerSeries[Any], EagerExpr[Any, Any]]"
"EagerNamespace[EagerDataFrame[Any, Any, Any], EagerSeries[Any], EagerExpr[Any, Any]]"
)

AliasNames: TypeAlias = Callable[[Sequence[str]], Sequence[str]]
Expand Down
30 changes: 17 additions & 13 deletions narwhals/_dask/dataframe.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
from typing import Any
from typing import Iterator
from typing import Literal
from typing import Mapping
from typing import Sequence

import dask.dataframe as dd
Expand All @@ -18,6 +19,7 @@
from narwhals.utils import Implementation
from narwhals.utils import check_column_exists
from narwhals.utils import generate_temporary_column_name
from narwhals.utils import not_implemented
from narwhals.utils import parse_columns_to_drop
from narwhals.utils import parse_version
from narwhals.utils import validate_backend_version
Expand All @@ -35,7 +37,7 @@
from narwhals.utils import Version


class DaskLazyFrame(CompliantLazyFrame):
class DaskLazyFrame(CompliantLazyFrame["DaskExpr", "dd.DataFrame"]):
def __init__(
self: Self,
native_dataframe: dd.DataFrame,
Expand Down Expand Up @@ -168,7 +170,7 @@ def select(self: Self, *exprs: DaskExpr) -> Self:
)
return self._from_native_frame(df)

def drop_nulls(self: Self, subset: list[str] | None) -> Self:
def drop_nulls(self: Self, subset: Sequence[str] | None) -> Self:
if subset is None:
return self._from_native_frame(self._native_frame.dropna())
plx = self.__narwhals_namespace__()
Expand All @@ -189,7 +191,7 @@ def schema(self: Self) -> dict[str, DType]:
def collect_schema(self: Self) -> dict[str, DType]:
return self.schema

def drop(self: Self, columns: list[str], strict: bool) -> Self: # noqa: FBT001
def drop(self: Self, columns: Sequence[str], *, strict: bool) -> Self:
to_drop = parse_columns_to_drop(
compliant_frame=self, columns=columns, strict=strict
)
Expand All @@ -205,7 +207,7 @@ def with_row_index(self: Self, name: str) -> Self:
)
)

def rename(self: Self, mapping: dict[str, str]) -> Self:
def rename(self: Self, mapping: Mapping[str, str]) -> Self:
return self._from_native_frame(self._native_frame.rename(columns=mapping))

def head(self: Self, n: int) -> Self:
Expand All @@ -215,7 +217,7 @@ def head(self: Self, n: int) -> Self:

def unique(
self: Self,
subset: list[str] | None,
subset: Sequence[str] | None,
*,
keep: Literal["any", "none"],
) -> Self:
Expand Down Expand Up @@ -254,8 +256,8 @@ def join(
other: Self,
*,
how: Literal["left", "inner", "cross", "anti", "semi"],
left_on: list[str] | None,
right_on: list[str] | None,
left_on: Sequence[str] | None,
right_on: Sequence[str] | None,
suffix: str,
) -> Self:
if how == "cross":
Expand Down Expand Up @@ -286,7 +288,7 @@ def join(
other_native = (
select_columns_by_name(
other._native_frame,
right_on,
list(right_on),
self._backend_version,
self._implementation,
)
Expand All @@ -313,7 +315,7 @@ def join(
other_native = (
select_columns_by_name(
other._native_frame,
right_on,
list(right_on),
self._backend_version,
self._implementation,
)
Expand Down Expand Up @@ -364,8 +366,8 @@ def join_asof(
*,
left_on: str | None,
right_on: str | None,
by_left: list[str] | None,
by_right: list[str] | None,
by_left: Sequence[str] | None,
by_right: Sequence[str] | None,
strategy: Literal["backward", "forward", "nearest"],
suffix: str,
) -> Self:
Expand Down Expand Up @@ -412,8 +414,8 @@ def gather_every(self: Self, n: int, offset: int) -> Self:

def unpivot(
self: Self,
on: list[str] | None,
index: list[str] | None,
on: Sequence[str] | None,
index: Sequence[str] | None,
variable_name: str,
value_name: str,
) -> Self:
Expand All @@ -425,3 +427,5 @@ def unpivot(
value_name=value_name,
)
)

explode = not_implemented()
Loading
Loading