From fadddc3da260c19eaa299593c63eab204315a2fc Mon Sep 17 00:00:00 2001 From: raisadz <34237447+raisadz@users.noreply.github.com> Date: Fri, 28 Nov 2025 10:52:00 +0000 Subject: [PATCH 01/34] feat: add list aggregate methods --- docs/api-reference/expr_list.md | 5 + docs/api-reference/series_list.md | 5 + narwhals/_arrow/series_list.py | 17 ++- narwhals/_arrow/utils.py | 16 +++ narwhals/_compliant/any_namespace.py | 5 + narwhals/_compliant/expr.py | 15 +++ narwhals/_duckdb/expr_list.py | 15 +++ narwhals/_ibis/expr_list.py | 15 +++ narwhals/_pandas_like/series_list.py | 40 +++++++ narwhals/_spark_like/expr_list.py | 19 ++++ narwhals/expr_list.py | 125 ++++++++++++++++++++++ narwhals/series_list.py | 100 +++++++++++++++++ tests/expr_and_series/list/max_test.py | 40 +++++++ tests/expr_and_series/list/mean_test.py | 39 +++++++ tests/expr_and_series/list/median_test.py | 40 +++++++ tests/expr_and_series/list/min_test.py | 37 +++++++ tests/expr_and_series/list/sum_test.py | 40 +++++++ 17 files changed, 572 insertions(+), 1 deletion(-) create mode 100644 tests/expr_and_series/list/max_test.py create mode 100644 tests/expr_and_series/list/mean_test.py create mode 100644 tests/expr_and_series/list/median_test.py create mode 100644 tests/expr_and_series/list/min_test.py create mode 100644 tests/expr_and_series/list/sum_test.py diff --git a/docs/api-reference/expr_list.md b/docs/api-reference/expr_list.md index 84fb831c50..f44a25d751 100644 --- a/docs/api-reference/expr_list.md +++ b/docs/api-reference/expr_list.md @@ -7,6 +7,11 @@ - contains - get - len + - max + - mean + - median + - min + - sum - unique show_source: false show_bases: false diff --git a/docs/api-reference/series_list.md b/docs/api-reference/series_list.md index 7590732dee..39adbad185 100644 --- a/docs/api-reference/series_list.md +++ b/docs/api-reference/series_list.md @@ -7,6 +7,11 @@ - contains - get - len + - max + - mean + - median + - min + - sum - unique show_source: false show_bases: false diff --git a/narwhals/_arrow/series_list.py b/narwhals/_arrow/series_list.py index defad3dad6..25e598aedd 100644 --- a/narwhals/_arrow/series_list.py +++ b/narwhals/_arrow/series_list.py @@ -5,7 +5,7 @@ import pyarrow as pa import pyarrow.compute as pc -from narwhals._arrow.utils import ArrowSeriesNamespace +from narwhals._arrow.utils import ArrowSeriesNamespace, list_agg from narwhals._compliant.any_namespace import ListNamespace from narwhals._utils import not_implemented @@ -20,5 +20,20 @@ def len(self) -> ArrowSeries: def get(self, index: int) -> ArrowSeries: return self.with_native(pc.list_element(self.native, index)) + def min(self) -> ArrowSeries: + return self.with_native(list_agg(self.native, "min")) + + def max(self) -> ArrowSeries: + return self.with_native(list_agg(self.native, "max")) + + def mean(self) -> ArrowSeries: + return self.with_native(list_agg(self.native, "mean")) + + def median(self) -> ArrowSeries: + return self.with_native(list_agg(self.native, "approximate_median")) + + def sum(self) -> ArrowSeries: + return self.with_native(list_agg(self.native, "sum")) + unique = not_implemented() contains = not_implemented() diff --git a/narwhals/_arrow/utils.py b/narwhals/_arrow/utils.py index 46b5985e1d..6ca76eb239 100644 --- a/narwhals/_arrow/utils.py +++ b/narwhals/_arrow/utils.py @@ -11,6 +11,7 @@ if TYPE_CHECKING: from collections.abc import Iterable, Iterator, Mapping + from typing import Literal from typing_extensions import TypeAlias, TypeIs @@ -494,3 +495,18 @@ def arange(start: int, end: int, step: int) -> ArrayAny: return pa.array(np.arange(start, end, step)) # NOTE: Added in https://github.com/apache/arrow/pull/46778 return pa.arange(start, end, step) # type: ignore[attr-defined] + + +def list_agg( + array: ChunkedArrayAny, + func: Literal["min", "max", "mean", "approximate_median", "sum"], +) -> ChunkedArrayAny: + return ( + pa.Table.from_arrays( + [pc.list_flatten(array), pc.list_parent_indices(array)], + names=["values", "offsets"], + ) + .group_by("offsets") + .aggregate([("values", func)]) + .column(f"values_{func}") + ) diff --git a/narwhals/_compliant/any_namespace.py b/narwhals/_compliant/any_namespace.py index b7e48a273f..cd76470d89 100644 --- a/narwhals/_compliant/any_namespace.py +++ b/narwhals/_compliant/any_namespace.py @@ -70,6 +70,11 @@ def get(self, index: int) -> CompliantT_co: ... def len(self) -> CompliantT_co: ... def unique(self) -> CompliantT_co: ... def contains(self, item: NonNestedLiteral) -> CompliantT_co: ... + def min(self) -> CompliantT_co: ... + def max(self) -> CompliantT_co: ... + def mean(self) -> CompliantT_co: ... + def median(self) -> CompliantT_co: ... + def sum(self) -> CompliantT_co: ... class NameNamespace(_StoresCompliant[CompliantT_co], Protocol[CompliantT_co]): diff --git a/narwhals/_compliant/expr.py b/narwhals/_compliant/expr.py index cc0d6cc3f8..6ff9417960 100644 --- a/narwhals/_compliant/expr.py +++ b/narwhals/_compliant/expr.py @@ -993,6 +993,21 @@ def contains(self, item: NonNestedLiteral) -> EagerExprT: def get(self, index: int) -> EagerExprT: return self.compliant._reuse_series_namespace("list", "get", index=index) + def min(self) -> EagerExprT: + return self.compliant._reuse_series_namespace("list", "min") + + def max(self) -> EagerExprT: + return self.compliant._reuse_series_namespace("list", "max") + + def mean(self) -> EagerExprT: + return self.compliant._reuse_series_namespace("list", "mean") + + def median(self) -> EagerExprT: + return self.compliant._reuse_series_namespace("list", "median") + + def sum(self) -> EagerExprT: + return self.compliant._reuse_series_namespace("list", "sum") + class CompliantExprNameNamespace( # type: ignore[misc] _ExprNamespace[CompliantExprT_co], diff --git a/narwhals/_duckdb/expr_list.py b/narwhals/_duckdb/expr_list.py index b726f2fc78..184f57252f 100644 --- a/narwhals/_duckdb/expr_list.py +++ b/narwhals/_duckdb/expr_list.py @@ -40,3 +40,18 @@ def get(self, index: int) -> DuckDBExpr: return self.compliant._with_elementwise( lambda expr: F("list_extract", expr, lit(index + 1)) ) + + def min(self) -> DuckDBExpr: + return self.compliant._with_elementwise(lambda expr: F("list_min", expr)) + + def max(self) -> DuckDBExpr: + return self.compliant._with_elementwise(lambda expr: F("list_max", expr)) + + def mean(self) -> DuckDBExpr: + return self.compliant._with_elementwise(lambda expr: F("list_avg", expr)) + + def median(self) -> DuckDBExpr: + return self.compliant._with_elementwise(lambda expr: F("list_median", expr)) + + def sum(self) -> DuckDBExpr: + return self.compliant._with_elementwise(lambda expr: F("list_sum", expr)) diff --git a/narwhals/_ibis/expr_list.py b/narwhals/_ibis/expr_list.py index 8070769308..ff0cd76c08 100644 --- a/narwhals/_ibis/expr_list.py +++ b/narwhals/_ibis/expr_list.py @@ -4,6 +4,7 @@ from narwhals._compliant import LazyExprNamespace from narwhals._compliant.any_namespace import ListNamespace +from narwhals._utils import not_implemented if TYPE_CHECKING: import ibis.expr.types as ir @@ -27,3 +28,17 @@ def _get(expr: ir.ArrayColumn) -> ir.Column: return expr[index] return self.compliant._with_callable(_get) + + def min(self) -> IbisExpr: + return self.compliant._with_callable(lambda expr: expr.mins()) + + def max(self) -> IbisExpr: + return self.compliant._with_callable(lambda expr: expr.maxs()) + + def mean(self) -> IbisExpr: + return self.compliant._with_callable(lambda expr: expr.means()) + + def sum(self) -> IbisExpr: + return self.compliant._with_callable(lambda expr: expr.sums()) + + median = not_implemented() diff --git a/narwhals/_pandas_like/series_list.py b/narwhals/_pandas_like/series_list.py index 2d087493df..04f424e14d 100644 --- a/narwhals/_pandas_like/series_list.py +++ b/narwhals/_pandas_like/series_list.py @@ -11,6 +11,8 @@ from narwhals._utils import not_implemented if TYPE_CHECKING: + from typing import Literal + from narwhals._pandas_like.series import PandasLikeSeries @@ -40,3 +42,41 @@ def get(self, index: int) -> PandasLikeSeries: result = self.native.list[index] result.name = self.native.name return self.with_native(result) + + def _agg( + self, func: Literal["min", "max", "mean", "approximate_median", "sum"] + ) -> PandasLikeSeries: + dtype_backend = get_dtype_backend( + self.native.dtype, self.compliant._implementation + ) + if dtype_backend != "pyarrow": + msg = "Only pyarrow backend is currently supported." + raise NotImplementedError(msg) + + from narwhals._arrow.utils import list_agg, native_to_narwhals_dtype + + ca = self.native.array._pa_array + result_arr = list_agg(ca, func) + nw_dtype = native_to_narwhals_dtype(result_arr.type, self.version) + out_dtype = narwhals_to_native_dtype( + nw_dtype, "pyarrow", self.implementation, self.version + ) + result_native = type(self.native)( + result_arr, dtype=out_dtype, index=self.native.index, name=self.native.name + ) + return self.with_native(result_native) + + def min(self) -> PandasLikeSeries: + return self._agg("min") + + def max(self) -> PandasLikeSeries: + return self._agg("max") + + def mean(self) -> PandasLikeSeries: + return self._agg("mean") + + def median(self) -> PandasLikeSeries: + return self._agg("approximate_median") + + def sum(self) -> PandasLikeSeries: + return self._agg("sum") diff --git a/narwhals/_spark_like/expr_list.py b/narwhals/_spark_like/expr_list.py index 31be5f5bb9..f8ee83dfe5 100644 --- a/narwhals/_spark_like/expr_list.py +++ b/narwhals/_spark_like/expr_list.py @@ -4,6 +4,7 @@ from narwhals._compliant import LazyExprNamespace from narwhals._compliant.any_namespace import ListNamespace +from narwhals._utils import not_implemented if TYPE_CHECKING: from sqlframe.base.column import Column @@ -33,3 +34,21 @@ def _get(expr: Column) -> Column: return expr.getItem(index) return self.compliant._with_elementwise(_get) + + def min(self) -> SparkLikeExpr: + def func(expr: Column) -> Column: + F = self.compliant._F + return F.array_min(expr) + + return self.compliant._with_elementwise(func) + + def max(self) -> SparkLikeExpr: + def func(expr: Column) -> Column: + F = self.compliant._F + return F.array_max(F.array_compact(expr)) + + return self.compliant._with_elementwise(func) + + mean = not_implemented() + median = not_implemented() + sum = not_implemented() diff --git a/narwhals/expr_list.py b/narwhals/expr_list.py index 8f9c94c6ab..94e4c4f38d 100644 --- a/narwhals/expr_list.py +++ b/narwhals/expr_list.py @@ -143,3 +143,128 @@ def get(self, index: int) -> ExprT: return self._expr._append_node( ExprNode(ExprKind.ELEMENTWISE, "list.get", index=index) ) + + def min(self) -> ExprT: + """Compute the min value of the lists in the array. + + Examples: + >>> import polars as pl + >>> import narwhals as nw + >>> df_native = pl.DataFrame({"a": [[1], [3, 4, None]]}) + >>> df = nw.from_native(df_native) + >>> df.with_columns(a_min=nw.col("a").list.min()) + ┌────────────────────────┐ + | Narwhals DataFrame | + |------------------------| + |shape: (2, 2) | + |┌──────────────┬───────┐| + |│ a ┆ a_min │| + |│ --- ┆ --- │| + |│ list[i64] ┆ i64 │| + |╞══════════════╪═══════╡| + |│ [1] ┆ 1 │| + |│ [3, 4, null] ┆ 3 │| + |└──────────────┴───────┘| + └────────────────────────┘ + """ + return self._expr._append_node(ExprNode(ExprKind.ELEMENTWISE, "list.min")) + + def max(self) -> ExprT: + """Compute the max value of the lists in the array. + + Examples: + >>> import polars as pl + >>> import narwhals as nw + >>> df_native = pl.DataFrame({"a": [[1], [3, 4, None]]}) + >>> df = nw.from_native(df_native) + >>> df.with_columns(a_max=nw.col("a").list.max()) + ┌────────────────────────┐ + | Narwhals DataFrame | + |------------------------| + |shape: (2, 2) | + |┌──────────────┬───────┐| + |│ a ┆ a_max │| + |│ --- ┆ --- │| + |│ list[i64] ┆ i64 │| + |╞══════════════╪═══════╡| + |│ [1] ┆ 1 │| + |│ [3, 4, null] ┆ 4 │| + |└──────────────┴───────┘| + └────────────────────────┘ + """ + return self._expr._append_node(ExprNode(ExprKind.ELEMENTWISE, "list.max")) + + def mean(self) -> ExprT: + """Compute the mean value of the lists in the array. + + Examples: + >>> import polars as pl + >>> import narwhals as nw + >>> df_native = pl.DataFrame({"a": [[1], [3, 4, None]]}) + >>> df = nw.from_native(df_native) + >>> df.with_columns(a_mean=nw.col("a").list.mean()) + ┌─────────────────────────┐ + | Narwhals DataFrame | + |-------------------------| + |shape: (2, 2) | + |┌──────────────┬────────┐| + |│ a ┆ a_mean │| + |│ --- ┆ --- │| + |│ list[i64] ┆ f64 │| + |╞══════════════╪════════╡| + |│ [1] ┆ 1.0 │| + |│ [3, 4, null] ┆ 3.5 │| + |└──────────────┴────────┘| + └─────────────────────────┘ + """ + return self._expr._append_node(ExprNode(ExprKind.ELEMENTWISE, "list.mean")) + + def median(self) -> ExprT: + """Compute the median value of the lists in the array. + + Examples: + >>> import polars as pl + >>> import narwhals as nw + >>> df_native = pl.DataFrame({"a": [[1], [3, 4, None]]}) + >>> df = nw.from_native(df_native) + >>> df.with_columns(a_median=nw.col("a").list.median()) + ┌───────────────────────────┐ + | Narwhals DataFrame | + |---------------------------| + |shape: (2, 2) | + |┌──────────────┬──────────┐| + |│ a ┆ a_median │| + |│ --- ┆ --- │| + |│ list[i64] ┆ f64 │| + |╞══════════════╪══════════╡| + |│ [1] ┆ 1.0 │| + |│ [3, 4, null] ┆ 3.5 │| + |└──────────────┴──────────┘| + └───────────────────────────┘ + """ + return self._expr._append_node(ExprNode(ExprKind.ELEMENTWISE, "list.median")) + + def sum(self) -> ExprT: + """Compute the sum value of the lists in the array. + + Examples: + >>> import polars as pl + >>> import narwhals as nw + >>> df_native = pl.DataFrame({"a": [[1], [3, 4, None]]}) + >>> df = nw.from_native(df_native) + >>> df.with_columns(a_sum=nw.col("a").list.sum()) + ┌────────────────────────┐ + | Narwhals DataFrame | + |------------------------| + |shape: (2, 2) | + |┌──────────────┬───────┐| + |│ a ┆ a_sum │| + |│ --- ┆ --- │| + |│ list[i64] ┆ i64 │| + |╞══════════════╪═══════╡| + |│ [1] ┆ 1 │| + |│ [3, 4, null] ┆ 7 │| + |└──────────────┴───────┘| + └────────────────────────┘ + """ + return self._expr._append_node(ExprNode(ExprKind.ELEMENTWISE, "list.sum")) diff --git a/narwhals/series_list.py b/narwhals/series_list.py index baa7ed8c8e..a771ff1c59 100644 --- a/narwhals/series_list.py +++ b/narwhals/series_list.py @@ -117,3 +117,103 @@ def get(self, index: int) -> SeriesT: return self._narwhals_series._with_compliant( self._narwhals_series._compliant_series.list.get(index) ) + + def min(self) -> SeriesT: + """Compute the min value of the lists in the array. + + Examples: + >>> import polars as pl + >>> import narwhals as nw + >>> s_native = pl.Series([[1], [3, 4, None]]) + >>> s = nw.from_native(s_native, series_only=True) + >>> s.list.min().to_native() # doctest: +NORMALIZE_WHITESPACE + shape: (2,) + Series: '' [i64] + [ + 1 + 3 + ] + """ + return self._narwhals_series._with_compliant( + self._narwhals_series._compliant_series.list.min() + ) + + def max(self) -> SeriesT: + """Compute the max value of the lists in the array. + + Examples: + >>> import polars as pl + >>> import narwhals as nw + >>> s_native = pl.Series([[1], [3, 4, None]]) + >>> s = nw.from_native(s_native, series_only=True) + >>> s.list.max().to_native() # doctest: +NORMALIZE_WHITESPACE + shape: (2,) + Series: '' [i64] + [ + 1 + 4 + ] + """ + return self._narwhals_series._with_compliant( + self._narwhals_series._compliant_series.list.max() + ) + + def mean(self) -> SeriesT: + """Compute the mean value of the lists in the array. + + Examples: + >>> import polars as pl + >>> import narwhals as nw + >>> s_native = pl.Series([[1], [3, 4, None]]) + >>> s = nw.from_native(s_native, series_only=True) + >>> s.list.mean().to_native() # doctest: +NORMALIZE_WHITESPACE + shape: (2,) + Series: '' [f64] + [ + 1.0 + 3.5 + ] + """ + return self._narwhals_series._with_compliant( + self._narwhals_series._compliant_series.list.mean() + ) + + def median(self) -> SeriesT: + """Compute the median value of the lists in the array. + + Examples: + >>> import polars as pl + >>> import narwhals as nw + >>> s_native = pl.Series([[1], [3, 4, None]]) + >>> s = nw.from_native(s_native, series_only=True) + >>> s.list.median().to_native() # doctest: +NORMALIZE_WHITESPACE + shape: (2,) + Series: '' [f64] + [ + 1.0 + 3.5 + ] + """ + return self._narwhals_series._with_compliant( + self._narwhals_series._compliant_series.list.median() + ) + + def sum(self) -> SeriesT: + """Compute the sum value of the lists in the array. + + Examples: + >>> import polars as pl + >>> import narwhals as nw + >>> s_native = pl.Series([[1], [3, 4, None]]) + >>> s = nw.from_native(s_native, series_only=True) + >>> s.list.sum().to_native() # doctest: +NORMALIZE_WHITESPACE + shape: (2,) + Series: '' [i64] + [ + 1 + 7 + ] + """ + return self._narwhals_series._with_compliant( + self._narwhals_series._compliant_series.list.sum() + ) diff --git a/tests/expr_and_series/list/max_test.py b/tests/expr_and_series/list/max_test.py new file mode 100644 index 0000000000..152fa77719 --- /dev/null +++ b/tests/expr_and_series/list/max_test.py @@ -0,0 +1,40 @@ +from __future__ import annotations + +from typing import TYPE_CHECKING + +import pytest + +import narwhals as nw + +if TYPE_CHECKING: + from tests.utils import Constructor, ConstructorEager + +data = {"a": [[3, 2, 2, 4, None], [-1]]} + + +def test_max_expr(request: pytest.FixtureRequest, constructor: Constructor) -> None: + if any( + backend in str(constructor) for backend in ("dask", "modin", "cudf", "sqlframe") + ): + # sqlframe issue: https://github.com/eakmanrq/sqlframe/issues/548 + request.applymarker(pytest.mark.xfail) + result = ( + nw.from_native(constructor(data)) + .select(nw.col("a").cast(nw.List(nw.Int32())).list.max()) + .lazy() + .collect()["a"] + .to_list() + ) + assert result[0] == 4 + assert result[1] == -1 + + +def test_max_series( + request: pytest.FixtureRequest, constructor_eager: ConstructorEager +) -> None: + if any(backend in str(constructor_eager) for backend in ("modin", "cudf")): + request.applymarker(pytest.mark.xfail) + df = nw.from_native(constructor_eager(data), eager_only=True) + result = df["a"].cast(nw.List(nw.Int32())).list.max().to_list() + assert result[0] == 4 + assert result[1] == -1 diff --git a/tests/expr_and_series/list/mean_test.py b/tests/expr_and_series/list/mean_test.py new file mode 100644 index 0000000000..aa657a3574 --- /dev/null +++ b/tests/expr_and_series/list/mean_test.py @@ -0,0 +1,39 @@ +from __future__ import annotations + +from typing import TYPE_CHECKING + +import pytest + +import narwhals as nw + +if TYPE_CHECKING: + from tests.utils import Constructor, ConstructorEager + +data = {"a": [[3, 2, 2, 4, None], [-1]]} + + +def test_mean_expr(request: pytest.FixtureRequest, constructor: Constructor) -> None: + if any( + backend in str(constructor) for backend in ("dask", "modin", "cudf", "sqlframe") + ): + request.applymarker(pytest.mark.xfail) + result = ( + nw.from_native(constructor(data)) + .select(nw.col("a").cast(nw.List(nw.Int32())).list.mean()) + .lazy() + .collect()["a"] + .to_list() + ) + assert result[0] == 2.75 + assert result[1] == -1 + + +def test_mean_series( + request: pytest.FixtureRequest, constructor_eager: ConstructorEager +) -> None: + if any(backend in str(constructor_eager) for backend in ("modin", "cudf")): + request.applymarker(pytest.mark.xfail) + df = nw.from_native(constructor_eager(data), eager_only=True) + result = df["a"].cast(nw.List(nw.Int32())).list.mean().to_list() + assert result[0] == 2.75 + assert result[1] == -1 diff --git a/tests/expr_and_series/list/median_test.py b/tests/expr_and_series/list/median_test.py new file mode 100644 index 0000000000..cc253b53f1 --- /dev/null +++ b/tests/expr_and_series/list/median_test.py @@ -0,0 +1,40 @@ +from __future__ import annotations + +from typing import TYPE_CHECKING + +import pytest + +import narwhals as nw + +if TYPE_CHECKING: + from tests.utils import Constructor, ConstructorEager + +data = {"a": [[3, 2, 2, 4, None], [-1]]} + + +def test_median_expr(request: pytest.FixtureRequest, constructor: Constructor) -> None: + if any( + backend in str(constructor) + for backend in ("dask", "modin", "cudf", "sqlframe", "ibis") + ): + request.applymarker(pytest.mark.xfail) + result = ( + nw.from_native(constructor(data)) + .select(nw.col("a").cast(nw.List(nw.Int32())).list.median()) + .lazy() + .collect()["a"] + .to_list() + ) + assert result[0] == 2.5 + assert result[1] == -1 + + +def test_median_series( + request: pytest.FixtureRequest, constructor_eager: ConstructorEager +) -> None: + if any(backend in str(constructor_eager) for backend in ("modin", "cudf")): + request.applymarker(pytest.mark.xfail) + df = nw.from_native(constructor_eager(data), eager_only=True) + result = df["a"].cast(nw.List(nw.Int32())).list.median().to_list() + assert result[0] == 2.5 + assert result[1] == -1 diff --git a/tests/expr_and_series/list/min_test.py b/tests/expr_and_series/list/min_test.py new file mode 100644 index 0000000000..fc0df66805 --- /dev/null +++ b/tests/expr_and_series/list/min_test.py @@ -0,0 +1,37 @@ +from __future__ import annotations + +from typing import TYPE_CHECKING + +import pytest + +import narwhals as nw + +if TYPE_CHECKING: + from tests.utils import Constructor, ConstructorEager + +data = {"a": [[3, 2, 2, 4, None], [-1]]} + + +def test_min_expr(request: pytest.FixtureRequest, constructor: Constructor) -> None: + if any(backend in str(constructor) for backend in ("dask", "modin", "cudf")): + request.applymarker(pytest.mark.xfail) + result = ( + nw.from_native(constructor(data)) + .select(nw.col("a").cast(nw.List(nw.Int32())).list.min()) + .lazy() + .collect()["a"] + .to_list() + ) + assert result[0] == 2 + assert result[1] == -1 + + +def test_min_series( + request: pytest.FixtureRequest, constructor_eager: ConstructorEager +) -> None: + if any(backend in str(constructor_eager) for backend in ("modin", "cudf")): + request.applymarker(pytest.mark.xfail) + df = nw.from_native(constructor_eager(data), eager_only=True) + result = df["a"].cast(nw.List(nw.Int32())).list.min().to_list() + assert result[0] == 2 + assert result[1] == -1 diff --git a/tests/expr_and_series/list/sum_test.py b/tests/expr_and_series/list/sum_test.py new file mode 100644 index 0000000000..d1f82ce622 --- /dev/null +++ b/tests/expr_and_series/list/sum_test.py @@ -0,0 +1,40 @@ +from __future__ import annotations + +from typing import TYPE_CHECKING + +import pytest + +import narwhals as nw + +if TYPE_CHECKING: + from tests.utils import Constructor, ConstructorEager + +data = {"a": [[3, 2, 2, 4, None], [-1]]} + + +def test_sum_expr(request: pytest.FixtureRequest, constructor: Constructor) -> None: + if any( + backend in str(constructor) for backend in ("dask", "modin", "cudf", "sqlframe") + ): + # sqlframe issue: https://github.com/eakmanrq/sqlframe/issues/548 + request.applymarker(pytest.mark.xfail) + result = ( + nw.from_native(constructor(data)) + .select(nw.col("a").cast(nw.List(nw.Int32())).list.sum()) + .lazy() + .collect()["a"] + .to_list() + ) + assert result[0] == 11 + assert result[1] == -1 + + +def test_sum_series( + request: pytest.FixtureRequest, constructor_eager: ConstructorEager +) -> None: + if any(backend in str(constructor_eager) for backend in ("modin", "cudf")): + request.applymarker(pytest.mark.xfail) + df = nw.from_native(constructor_eager(data), eager_only=True) + result = df["a"].cast(nw.List(nw.Int32())).list.sum().to_list() + assert result[0] == 11 + assert result[1] == -1 From 040527b45b98b2e5918f622ac4dd2d0c34a544f0 Mon Sep 17 00:00:00 2001 From: raisadz <34237447+raisadz@users.noreply.github.com> Date: Fri, 28 Nov 2025 11:36:23 +0000 Subject: [PATCH 02/34] xfail old pandas and skip if no pyarrow --- tests/expr_and_series/list/max_test.py | 9 +++++++++ tests/expr_and_series/list/mean_test.py | 9 +++++++++ tests/expr_and_series/list/median_test.py | 9 +++++++++ tests/expr_and_series/list/min_test.py | 9 +++++++++ tests/expr_and_series/list/sum_test.py | 9 +++++++++ 5 files changed, 45 insertions(+) diff --git a/tests/expr_and_series/list/max_test.py b/tests/expr_and_series/list/max_test.py index 152fa77719..7b9fe5ac4a 100644 --- a/tests/expr_and_series/list/max_test.py +++ b/tests/expr_and_series/list/max_test.py @@ -5,6 +5,7 @@ import pytest import narwhals as nw +from tests.utils import PANDAS_VERSION if TYPE_CHECKING: from tests.utils import Constructor, ConstructorEager @@ -18,6 +19,10 @@ def test_max_expr(request: pytest.FixtureRequest, constructor: Constructor) -> N ): # sqlframe issue: https://github.com/eakmanrq/sqlframe/issues/548 request.applymarker(pytest.mark.xfail) + if "pandas" in str(constructor): + if PANDAS_VERSION < (2, 2): + pytest.skip() + pytest.importorskip("pyarrow") result = ( nw.from_native(constructor(data)) .select(nw.col("a").cast(nw.List(nw.Int32())).list.max()) @@ -34,6 +39,10 @@ def test_max_series( ) -> None: if any(backend in str(constructor_eager) for backend in ("modin", "cudf")): request.applymarker(pytest.mark.xfail) + if "pandas" in str(constructor_eager): + if PANDAS_VERSION < (2, 2): + pytest.skip() + pytest.importorskip("pyarrow") df = nw.from_native(constructor_eager(data), eager_only=True) result = df["a"].cast(nw.List(nw.Int32())).list.max().to_list() assert result[0] == 4 diff --git a/tests/expr_and_series/list/mean_test.py b/tests/expr_and_series/list/mean_test.py index aa657a3574..4f05090b81 100644 --- a/tests/expr_and_series/list/mean_test.py +++ b/tests/expr_and_series/list/mean_test.py @@ -5,6 +5,7 @@ import pytest import narwhals as nw +from tests.utils import PANDAS_VERSION if TYPE_CHECKING: from tests.utils import Constructor, ConstructorEager @@ -17,6 +18,10 @@ def test_mean_expr(request: pytest.FixtureRequest, constructor: Constructor) -> backend in str(constructor) for backend in ("dask", "modin", "cudf", "sqlframe") ): request.applymarker(pytest.mark.xfail) + if "pandas" in str(constructor): + if PANDAS_VERSION < (2, 2): + pytest.skip() + pytest.importorskip("pyarrow") result = ( nw.from_native(constructor(data)) .select(nw.col("a").cast(nw.List(nw.Int32())).list.mean()) @@ -33,6 +38,10 @@ def test_mean_series( ) -> None: if any(backend in str(constructor_eager) for backend in ("modin", "cudf")): request.applymarker(pytest.mark.xfail) + if "pandas" in str(constructor_eager): + if PANDAS_VERSION < (2, 2): + pytest.skip() + pytest.importorskip("pyarrow") df = nw.from_native(constructor_eager(data), eager_only=True) result = df["a"].cast(nw.List(nw.Int32())).list.mean().to_list() assert result[0] == 2.75 diff --git a/tests/expr_and_series/list/median_test.py b/tests/expr_and_series/list/median_test.py index cc253b53f1..760f817ba7 100644 --- a/tests/expr_and_series/list/median_test.py +++ b/tests/expr_and_series/list/median_test.py @@ -5,6 +5,7 @@ import pytest import narwhals as nw +from tests.utils import PANDAS_VERSION if TYPE_CHECKING: from tests.utils import Constructor, ConstructorEager @@ -18,6 +19,10 @@ def test_median_expr(request: pytest.FixtureRequest, constructor: Constructor) - for backend in ("dask", "modin", "cudf", "sqlframe", "ibis") ): request.applymarker(pytest.mark.xfail) + if "pandas" in str(constructor): + if PANDAS_VERSION < (2, 2): + pytest.skip() + pytest.importorskip("pyarrow") result = ( nw.from_native(constructor(data)) .select(nw.col("a").cast(nw.List(nw.Int32())).list.median()) @@ -34,6 +39,10 @@ def test_median_series( ) -> None: if any(backend in str(constructor_eager) for backend in ("modin", "cudf")): request.applymarker(pytest.mark.xfail) + if "pandas" in str(constructor_eager): + if PANDAS_VERSION < (2, 2): + pytest.skip() + pytest.importorskip("pyarrow") df = nw.from_native(constructor_eager(data), eager_only=True) result = df["a"].cast(nw.List(nw.Int32())).list.median().to_list() assert result[0] == 2.5 diff --git a/tests/expr_and_series/list/min_test.py b/tests/expr_and_series/list/min_test.py index fc0df66805..ab98f066b6 100644 --- a/tests/expr_and_series/list/min_test.py +++ b/tests/expr_and_series/list/min_test.py @@ -5,6 +5,7 @@ import pytest import narwhals as nw +from tests.utils import PANDAS_VERSION if TYPE_CHECKING: from tests.utils import Constructor, ConstructorEager @@ -15,6 +16,10 @@ def test_min_expr(request: pytest.FixtureRequest, constructor: Constructor) -> None: if any(backend in str(constructor) for backend in ("dask", "modin", "cudf")): request.applymarker(pytest.mark.xfail) + if "pandas" in str(constructor): + if PANDAS_VERSION < (2, 2): + pytest.skip() + pytest.importorskip("pyarrow") result = ( nw.from_native(constructor(data)) .select(nw.col("a").cast(nw.List(nw.Int32())).list.min()) @@ -31,6 +36,10 @@ def test_min_series( ) -> None: if any(backend in str(constructor_eager) for backend in ("modin", "cudf")): request.applymarker(pytest.mark.xfail) + if "pandas" in str(constructor_eager): + if PANDAS_VERSION < (2, 2): + pytest.skip() + pytest.importorskip("pyarrow") df = nw.from_native(constructor_eager(data), eager_only=True) result = df["a"].cast(nw.List(nw.Int32())).list.min().to_list() assert result[0] == 2 diff --git a/tests/expr_and_series/list/sum_test.py b/tests/expr_and_series/list/sum_test.py index d1f82ce622..e68a1720e7 100644 --- a/tests/expr_and_series/list/sum_test.py +++ b/tests/expr_and_series/list/sum_test.py @@ -5,6 +5,7 @@ import pytest import narwhals as nw +from tests.utils import PANDAS_VERSION if TYPE_CHECKING: from tests.utils import Constructor, ConstructorEager @@ -18,6 +19,10 @@ def test_sum_expr(request: pytest.FixtureRequest, constructor: Constructor) -> N ): # sqlframe issue: https://github.com/eakmanrq/sqlframe/issues/548 request.applymarker(pytest.mark.xfail) + if "pandas" in str(constructor): + if PANDAS_VERSION < (2, 2): + pytest.skip() + pytest.importorskip("pyarrow") result = ( nw.from_native(constructor(data)) .select(nw.col("a").cast(nw.List(nw.Int32())).list.sum()) @@ -34,6 +39,10 @@ def test_sum_series( ) -> None: if any(backend in str(constructor_eager) for backend in ("modin", "cudf")): request.applymarker(pytest.mark.xfail) + if "pandas" in str(constructor_eager): + if PANDAS_VERSION < (2, 2): + pytest.skip() + pytest.importorskip("pyarrow") df = nw.from_native(constructor_eager(data), eager_only=True) result = df["a"].cast(nw.List(nw.Int32())).list.sum().to_list() assert result[0] == 11 From 9b4555537ae20d987d8432d69ea6431837fe6a83 Mon Sep 17 00:00:00 2001 From: raisadz <34237447+raisadz@users.noreply.github.com> Date: Fri, 28 Nov 2025 11:38:00 +0000 Subject: [PATCH 03/34] add the new methods to the polars list namespace --- narwhals/_polars/utils.py | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/narwhals/_polars/utils.py b/narwhals/_polars/utils.py index d638c791fd..f2081f99dc 100644 --- a/narwhals/_polars/utils.py +++ b/narwhals/_polars/utils.py @@ -362,6 +362,16 @@ def len(self) -> CompliantT: ... unique: Method[CompliantT] + max: Method[CompliantT] + + mean: Method[CompliantT] + + median: Method[CompliantT] + + min: Method[CompliantT] + + sum: Method[CompliantT] + class PolarsStructNamespace(PolarsAnyNamespace[CompliantT, NativeT_co]): _accessor: ClassVar[Accessor] = "struct" From 7bdd2d2c4a6cc3c95b327b8ad44e791e8af16ce4 Mon Sep 17 00:00:00 2001 From: raisadz <34237447+raisadz@users.noreply.github.com> Date: Fri, 28 Nov 2025 11:53:25 +0000 Subject: [PATCH 04/34] xfail old polars for median and pyspark for non implemented methods --- tests/expr_and_series/list/mean_test.py | 3 ++- tests/expr_and_series/list/median_test.py | 10 ++++++---- tests/expr_and_series/list/sum_test.py | 3 ++- 3 files changed, 10 insertions(+), 6 deletions(-) diff --git a/tests/expr_and_series/list/mean_test.py b/tests/expr_and_series/list/mean_test.py index 4f05090b81..ba77450fb9 100644 --- a/tests/expr_and_series/list/mean_test.py +++ b/tests/expr_and_series/list/mean_test.py @@ -15,7 +15,8 @@ def test_mean_expr(request: pytest.FixtureRequest, constructor: Constructor) -> None: if any( - backend in str(constructor) for backend in ("dask", "modin", "cudf", "sqlframe") + backend in str(constructor) + for backend in ("dask", "modin", "cudf", "sqlframe", "pyspark") ): request.applymarker(pytest.mark.xfail) if "pandas" in str(constructor): diff --git a/tests/expr_and_series/list/median_test.py b/tests/expr_and_series/list/median_test.py index 760f817ba7..d08516551d 100644 --- a/tests/expr_and_series/list/median_test.py +++ b/tests/expr_and_series/list/median_test.py @@ -5,7 +5,7 @@ import pytest import narwhals as nw -from tests.utils import PANDAS_VERSION +from tests.utils import PANDAS_VERSION, POLARS_VERSION if TYPE_CHECKING: from tests.utils import Constructor, ConstructorEager @@ -16,8 +16,8 @@ def test_median_expr(request: pytest.FixtureRequest, constructor: Constructor) -> None: if any( backend in str(constructor) - for backend in ("dask", "modin", "cudf", "sqlframe", "ibis") - ): + for backend in ("dask", "modin", "cudf", "sqlframe", "ibis", "pyspark") + ) or ("polars" in str(constructor) and POLARS_VERSION < (0, 20, 7)): request.applymarker(pytest.mark.xfail) if "pandas" in str(constructor): if PANDAS_VERSION < (2, 2): @@ -37,7 +37,9 @@ def test_median_expr(request: pytest.FixtureRequest, constructor: Constructor) - def test_median_series( request: pytest.FixtureRequest, constructor_eager: ConstructorEager ) -> None: - if any(backend in str(constructor_eager) for backend in ("modin", "cudf")): + if any(backend in str(constructor_eager) for backend in ("modin", "cudf")) or ( + "polars" in str(constructor_eager) and POLARS_VERSION < (0, 20, 7) + ): request.applymarker(pytest.mark.xfail) if "pandas" in str(constructor_eager): if PANDAS_VERSION < (2, 2): diff --git a/tests/expr_and_series/list/sum_test.py b/tests/expr_and_series/list/sum_test.py index e68a1720e7..9760a09c71 100644 --- a/tests/expr_and_series/list/sum_test.py +++ b/tests/expr_and_series/list/sum_test.py @@ -15,7 +15,8 @@ def test_sum_expr(request: pytest.FixtureRequest, constructor: Constructor) -> None: if any( - backend in str(constructor) for backend in ("dask", "modin", "cudf", "sqlframe") + backend in str(constructor) + for backend in ("dask", "modin", "cudf", "sqlframe", "pyspark") ): # sqlframe issue: https://github.com/eakmanrq/sqlframe/issues/548 request.applymarker(pytest.mark.xfail) From 74934c0c8dbea3caece6904fc63628e13ce73906 Mon Sep 17 00:00:00 2001 From: raisadz <34237447+raisadz@users.noreply.github.com> Date: Fri, 28 Nov 2025 11:56:52 +0000 Subject: [PATCH 05/34] unxfail modin --- tests/expr_and_series/list/max_test.py | 6 ++---- tests/expr_and_series/list/mean_test.py | 5 ++--- tests/expr_and_series/list/median_test.py | 4 ++-- tests/expr_and_series/list/min_test.py | 4 ++-- tests/expr_and_series/list/sum_test.py | 5 ++--- 5 files changed, 10 insertions(+), 14 deletions(-) diff --git a/tests/expr_and_series/list/max_test.py b/tests/expr_and_series/list/max_test.py index 7b9fe5ac4a..8f112410cd 100644 --- a/tests/expr_and_series/list/max_test.py +++ b/tests/expr_and_series/list/max_test.py @@ -14,9 +14,7 @@ def test_max_expr(request: pytest.FixtureRequest, constructor: Constructor) -> None: - if any( - backend in str(constructor) for backend in ("dask", "modin", "cudf", "sqlframe") - ): + if any(backend in str(constructor) for backend in ("dask", "cudf", "sqlframe")): # sqlframe issue: https://github.com/eakmanrq/sqlframe/issues/548 request.applymarker(pytest.mark.xfail) if "pandas" in str(constructor): @@ -37,7 +35,7 @@ def test_max_expr(request: pytest.FixtureRequest, constructor: Constructor) -> N def test_max_series( request: pytest.FixtureRequest, constructor_eager: ConstructorEager ) -> None: - if any(backend in str(constructor_eager) for backend in ("modin", "cudf")): + if any(backend in str(constructor_eager) for backend in ("cudf",)): request.applymarker(pytest.mark.xfail) if "pandas" in str(constructor_eager): if PANDAS_VERSION < (2, 2): diff --git a/tests/expr_and_series/list/mean_test.py b/tests/expr_and_series/list/mean_test.py index ba77450fb9..cbe09f1b64 100644 --- a/tests/expr_and_series/list/mean_test.py +++ b/tests/expr_and_series/list/mean_test.py @@ -15,8 +15,7 @@ def test_mean_expr(request: pytest.FixtureRequest, constructor: Constructor) -> None: if any( - backend in str(constructor) - for backend in ("dask", "modin", "cudf", "sqlframe", "pyspark") + backend in str(constructor) for backend in ("dask", "cudf", "sqlframe", "pyspark") ): request.applymarker(pytest.mark.xfail) if "pandas" in str(constructor): @@ -37,7 +36,7 @@ def test_mean_expr(request: pytest.FixtureRequest, constructor: Constructor) -> def test_mean_series( request: pytest.FixtureRequest, constructor_eager: ConstructorEager ) -> None: - if any(backend in str(constructor_eager) for backend in ("modin", "cudf")): + if any(backend in str(constructor_eager) for backend in ("cudf",)): request.applymarker(pytest.mark.xfail) if "pandas" in str(constructor_eager): if PANDAS_VERSION < (2, 2): diff --git a/tests/expr_and_series/list/median_test.py b/tests/expr_and_series/list/median_test.py index d08516551d..d9e577f5ad 100644 --- a/tests/expr_and_series/list/median_test.py +++ b/tests/expr_and_series/list/median_test.py @@ -16,7 +16,7 @@ def test_median_expr(request: pytest.FixtureRequest, constructor: Constructor) -> None: if any( backend in str(constructor) - for backend in ("dask", "modin", "cudf", "sqlframe", "ibis", "pyspark") + for backend in ("dask", "cudf", "sqlframe", "ibis", "pyspark") ) or ("polars" in str(constructor) and POLARS_VERSION < (0, 20, 7)): request.applymarker(pytest.mark.xfail) if "pandas" in str(constructor): @@ -37,7 +37,7 @@ def test_median_expr(request: pytest.FixtureRequest, constructor: Constructor) - def test_median_series( request: pytest.FixtureRequest, constructor_eager: ConstructorEager ) -> None: - if any(backend in str(constructor_eager) for backend in ("modin", "cudf")) or ( + if any(backend in str(constructor_eager) for backend in ("cudf",)) or ( "polars" in str(constructor_eager) and POLARS_VERSION < (0, 20, 7) ): request.applymarker(pytest.mark.xfail) diff --git a/tests/expr_and_series/list/min_test.py b/tests/expr_and_series/list/min_test.py index ab98f066b6..d2655c00a1 100644 --- a/tests/expr_and_series/list/min_test.py +++ b/tests/expr_and_series/list/min_test.py @@ -14,7 +14,7 @@ def test_min_expr(request: pytest.FixtureRequest, constructor: Constructor) -> None: - if any(backend in str(constructor) for backend in ("dask", "modin", "cudf")): + if any(backend in str(constructor) for backend in ("dask", "cudf")): request.applymarker(pytest.mark.xfail) if "pandas" in str(constructor): if PANDAS_VERSION < (2, 2): @@ -34,7 +34,7 @@ def test_min_expr(request: pytest.FixtureRequest, constructor: Constructor) -> N def test_min_series( request: pytest.FixtureRequest, constructor_eager: ConstructorEager ) -> None: - if any(backend in str(constructor_eager) for backend in ("modin", "cudf")): + if any(backend in str(constructor_eager) for backend in ("cudf",)): request.applymarker(pytest.mark.xfail) if "pandas" in str(constructor_eager): if PANDAS_VERSION < (2, 2): diff --git a/tests/expr_and_series/list/sum_test.py b/tests/expr_and_series/list/sum_test.py index 9760a09c71..fedd51d639 100644 --- a/tests/expr_and_series/list/sum_test.py +++ b/tests/expr_and_series/list/sum_test.py @@ -15,8 +15,7 @@ def test_sum_expr(request: pytest.FixtureRequest, constructor: Constructor) -> None: if any( - backend in str(constructor) - for backend in ("dask", "modin", "cudf", "sqlframe", "pyspark") + backend in str(constructor) for backend in ("dask", "cudf", "sqlframe", "pyspark") ): # sqlframe issue: https://github.com/eakmanrq/sqlframe/issues/548 request.applymarker(pytest.mark.xfail) @@ -38,7 +37,7 @@ def test_sum_expr(request: pytest.FixtureRequest, constructor: Constructor) -> N def test_sum_series( request: pytest.FixtureRequest, constructor_eager: ConstructorEager ) -> None: - if any(backend in str(constructor_eager) for backend in ("modin", "cudf")): + if any(backend in str(constructor_eager) for backend in ("cudf",)): request.applymarker(pytest.mark.xfail) if "pandas" in str(constructor_eager): if PANDAS_VERSION < (2, 2): From 7efe2d22607fba4919138a136cd3ef9978af60b0 Mon Sep 17 00:00:00 2001 From: raisadz <34237447+raisadz@users.noreply.github.com> Date: Fri, 28 Nov 2025 12:04:55 +0000 Subject: [PATCH 06/34] add no cover for non-pyarrow backends --- narwhals/_pandas_like/series_list.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/narwhals/_pandas_like/series_list.py b/narwhals/_pandas_like/series_list.py index 04f424e14d..d1fb085262 100644 --- a/narwhals/_pandas_like/series_list.py +++ b/narwhals/_pandas_like/series_list.py @@ -49,7 +49,7 @@ def _agg( dtype_backend = get_dtype_backend( self.native.dtype, self.compliant._implementation ) - if dtype_backend != "pyarrow": + if dtype_backend != "pyarrow": # pragma: no cover msg = "Only pyarrow backend is currently supported." raise NotImplementedError(msg) From 8e04fc1ec19ea1f623487982b9bfa032c2b37b37 Mon Sep 17 00:00:00 2001 From: raisadz <34237447+raisadz@users.noreply.github.com> Date: Fri, 28 Nov 2025 12:24:30 +0000 Subject: [PATCH 07/34] link pyspark and ibis issues --- tests/expr_and_series/list/mean_test.py | 1 + tests/expr_and_series/list/median_test.py | 2 ++ tests/expr_and_series/list/sum_test.py | 2 +- 3 files changed, 4 insertions(+), 1 deletion(-) diff --git a/tests/expr_and_series/list/mean_test.py b/tests/expr_and_series/list/mean_test.py index cbe09f1b64..f707a3183a 100644 --- a/tests/expr_and_series/list/mean_test.py +++ b/tests/expr_and_series/list/mean_test.py @@ -17,6 +17,7 @@ def test_mean_expr(request: pytest.FixtureRequest, constructor: Constructor) -> if any( backend in str(constructor) for backend in ("dask", "cudf", "sqlframe", "pyspark") ): + # PySpark issue: https://issues.apache.org/jira/browse/SPARK-54382 request.applymarker(pytest.mark.xfail) if "pandas" in str(constructor): if PANDAS_VERSION < (2, 2): diff --git a/tests/expr_and_series/list/median_test.py b/tests/expr_and_series/list/median_test.py index d9e577f5ad..6c0f863782 100644 --- a/tests/expr_and_series/list/median_test.py +++ b/tests/expr_and_series/list/median_test.py @@ -18,6 +18,8 @@ def test_median_expr(request: pytest.FixtureRequest, constructor: Constructor) - backend in str(constructor) for backend in ("dask", "cudf", "sqlframe", "ibis", "pyspark") ) or ("polars" in str(constructor) and POLARS_VERSION < (0, 20, 7)): + # PySpark issue: https://issues.apache.org/jira/browse/SPARK-54382 + # ibis issue: https://github.com/ibis-project/ibis/issues/11788 request.applymarker(pytest.mark.xfail) if "pandas" in str(constructor): if PANDAS_VERSION < (2, 2): diff --git a/tests/expr_and_series/list/sum_test.py b/tests/expr_and_series/list/sum_test.py index fedd51d639..0ddbe4c9b3 100644 --- a/tests/expr_and_series/list/sum_test.py +++ b/tests/expr_and_series/list/sum_test.py @@ -17,7 +17,7 @@ def test_sum_expr(request: pytest.FixtureRequest, constructor: Constructor) -> N if any( backend in str(constructor) for backend in ("dask", "cudf", "sqlframe", "pyspark") ): - # sqlframe issue: https://github.com/eakmanrq/sqlframe/issues/548 + # PySpark issue: https://issues.apache.org/jira/browse/SPARK-54382 request.applymarker(pytest.mark.xfail) if "pandas" in str(constructor): if PANDAS_VERSION < (2, 2): From 7ab1ebf5bd536cc687b32bda277cb98f16072c98 Mon Sep 17 00:00:00 2001 From: raisadz <34237447+raisadz@users.noreply.github.com> Date: Fri, 28 Nov 2025 14:20:45 +0000 Subject: [PATCH 08/34] add sum/mean/median for PySpark --- narwhals/_spark_like/expr_list.py | 32 ++++++++++++++++++++--- tests/expr_and_series/list/mean_test.py | 6 ++--- tests/expr_and_series/list/median_test.py | 5 ++-- tests/expr_and_series/list/sum_test.py | 6 ++--- 4 files changed, 34 insertions(+), 15 deletions(-) diff --git a/narwhals/_spark_like/expr_list.py b/narwhals/_spark_like/expr_list.py index f8ee83dfe5..3a888f88a7 100644 --- a/narwhals/_spark_like/expr_list.py +++ b/narwhals/_spark_like/expr_list.py @@ -1,10 +1,10 @@ from __future__ import annotations +import operator from typing import TYPE_CHECKING from narwhals._compliant import LazyExprNamespace from narwhals._compliant.any_namespace import ListNamespace -from narwhals._utils import not_implemented if TYPE_CHECKING: from sqlframe.base.column import Column @@ -49,6 +49,30 @@ def func(expr: Column) -> Column: return self.compliant._with_elementwise(func) - mean = not_implemented() - median = not_implemented() - sum = not_implemented() + def sum(self) -> SparkLikeExpr: + def func(expr: Column) -> Column: + F = self.compliant._F + return F.aggregate(F.array_compact(expr), F.lit(0.0), operator.add) + + return self.compliant._with_elementwise(func) + + def mean(self) -> SparkLikeExpr: + def func(expr: Column) -> Column: + F = self.compliant._F + return F.aggregate( + F.array_compact(expr), F.lit(0.0), operator.add + ) / F.array_size(F.array_compact(expr)) + + return self.compliant._with_elementwise(func) + + def median(self) -> SparkLikeExpr: + def func(expr: Column) -> Column: + F = self.compliant._F + sorted_expr = F.array_compact(F.sort_array(expr)) + size = F.array_size(sorted_expr) + mid_index = (size / 2).cast("int") + odd_case = sorted_expr[mid_index] + even_case = (sorted_expr[mid_index] - 1 + sorted_expr[mid_index]) / 2 + return F.when(size % 2 == 1, odd_case).otherwise(even_case) + + return self.compliant._with_elementwise(func) diff --git a/tests/expr_and_series/list/mean_test.py b/tests/expr_and_series/list/mean_test.py index f707a3183a..1c0551df71 100644 --- a/tests/expr_and_series/list/mean_test.py +++ b/tests/expr_and_series/list/mean_test.py @@ -14,10 +14,8 @@ def test_mean_expr(request: pytest.FixtureRequest, constructor: Constructor) -> None: - if any( - backend in str(constructor) for backend in ("dask", "cudf", "sqlframe", "pyspark") - ): - # PySpark issue: https://issues.apache.org/jira/browse/SPARK-54382 + if any(backend in str(constructor) for backend in ("dask", "cudf", "sqlframe")): + # sqlframe issue: https://github.com/eakmanrq/sqlframe/issues/548 request.applymarker(pytest.mark.xfail) if "pandas" in str(constructor): if PANDAS_VERSION < (2, 2): diff --git a/tests/expr_and_series/list/median_test.py b/tests/expr_and_series/list/median_test.py index 6c0f863782..256e1f8111 100644 --- a/tests/expr_and_series/list/median_test.py +++ b/tests/expr_and_series/list/median_test.py @@ -15,10 +15,9 @@ def test_median_expr(request: pytest.FixtureRequest, constructor: Constructor) -> None: if any( - backend in str(constructor) - for backend in ("dask", "cudf", "sqlframe", "ibis", "pyspark") + backend in str(constructor) for backend in ("dask", "cudf", "sqlframe", "ibis") ) or ("polars" in str(constructor) and POLARS_VERSION < (0, 20, 7)): - # PySpark issue: https://issues.apache.org/jira/browse/SPARK-54382 + # sqlframe issue: https://github.com/eakmanrq/sqlframe/issues/548 # ibis issue: https://github.com/ibis-project/ibis/issues/11788 request.applymarker(pytest.mark.xfail) if "pandas" in str(constructor): diff --git a/tests/expr_and_series/list/sum_test.py b/tests/expr_and_series/list/sum_test.py index 0ddbe4c9b3..d72b321164 100644 --- a/tests/expr_and_series/list/sum_test.py +++ b/tests/expr_and_series/list/sum_test.py @@ -14,10 +14,8 @@ def test_sum_expr(request: pytest.FixtureRequest, constructor: Constructor) -> None: - if any( - backend in str(constructor) for backend in ("dask", "cudf", "sqlframe", "pyspark") - ): - # PySpark issue: https://issues.apache.org/jira/browse/SPARK-54382 + if any(backend in str(constructor) for backend in ("dask", "cudf", "sqlframe")): + # sqlframe issue: https://github.com/eakmanrq/sqlframe/issues/548 request.applymarker(pytest.mark.xfail) if "pandas" in str(constructor): if PANDAS_VERSION < (2, 2): From 6b5780938a82afcb1564b58a8047a3526f1b917a Mon Sep 17 00:00:00 2001 From: raisadz <34237447+raisadz@users.noreply.github.com> Date: Fri, 28 Nov 2025 14:30:44 +0000 Subject: [PATCH 09/34] xfail pyspark[connect], add no cover for sqlframe --- narwhals/_spark_like/expr_list.py | 3 ++- tests/expr_and_series/list/median_test.py | 3 ++- 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/narwhals/_spark_like/expr_list.py b/narwhals/_spark_like/expr_list.py index 3a888f88a7..454e088244 100644 --- a/narwhals/_spark_like/expr_list.py +++ b/narwhals/_spark_like/expr_list.py @@ -66,7 +66,8 @@ def func(expr: Column) -> Column: return self.compliant._with_elementwise(func) def median(self) -> SparkLikeExpr: - def func(expr: Column) -> Column: + def func(expr: Column) -> Column: # pragma: no cover + # sqlframe issue: https://github.com/eakmanrq/sqlframe/issues/548 F = self.compliant._F sorted_expr = F.array_compact(F.sort_array(expr)) size = F.array_size(sorted_expr) diff --git a/tests/expr_and_series/list/median_test.py b/tests/expr_and_series/list/median_test.py index 256e1f8111..2af71c8083 100644 --- a/tests/expr_and_series/list/median_test.py +++ b/tests/expr_and_series/list/median_test.py @@ -15,7 +15,8 @@ def test_median_expr(request: pytest.FixtureRequest, constructor: Constructor) -> None: if any( - backend in str(constructor) for backend in ("dask", "cudf", "sqlframe", "ibis") + backend in str(constructor) + for backend in ("dask", "cudf", "sqlframe", "ibis", "pyspark[connect]") ) or ("polars" in str(constructor) and POLARS_VERSION < (0, 20, 7)): # sqlframe issue: https://github.com/eakmanrq/sqlframe/issues/548 # ibis issue: https://github.com/ibis-project/ibis/issues/11788 From 7dfc9fa9e87918f563e3eb8cffee5e5b59639136 Mon Sep 17 00:00:00 2001 From: raisadz <34237447+raisadz@users.noreply.github.com> Date: Fri, 28 Nov 2025 14:40:50 +0000 Subject: [PATCH 10/34] xfail pyspark connect --- tests/expr_and_series/list/median_test.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/tests/expr_and_series/list/median_test.py b/tests/expr_and_series/list/median_test.py index 2af71c8083..b15c674635 100644 --- a/tests/expr_and_series/list/median_test.py +++ b/tests/expr_and_series/list/median_test.py @@ -1,5 +1,6 @@ from __future__ import annotations +import os from typing import TYPE_CHECKING import pytest @@ -15,12 +16,13 @@ def test_median_expr(request: pytest.FixtureRequest, constructor: Constructor) -> None: if any( - backend in str(constructor) - for backend in ("dask", "cudf", "sqlframe", "ibis", "pyspark[connect]") + backend in str(constructor) for backend in ("dask", "cudf", "sqlframe", "ibis") ) or ("polars" in str(constructor) and POLARS_VERSION < (0, 20, 7)): # sqlframe issue: https://github.com/eakmanrq/sqlframe/issues/548 # ibis issue: https://github.com/ibis-project/ibis/issues/11788 request.applymarker(pytest.mark.xfail) + if os.environ.get("SPARK_CONNECT", None) and "pyspark" in str(constructor): + request.applymarker(pytest.mark.xfail) if "pandas" in str(constructor): if PANDAS_VERSION < (2, 2): pytest.skip() From 1243980ed0d1b638ab9bdc702be5065334a38007 Mon Sep 17 00:00:00 2001 From: raisadz <34237447+raisadz@users.noreply.github.com> Date: Sat, 29 Nov 2025 12:25:15 +0000 Subject: [PATCH 11/34] handle empty lists for pyarrow, tests for empty lists --- narwhals/_arrow/utils.py | 6 ++++-- tests/expr_and_series/list/sum_test.py | 23 ++++++++++++++++------- tests/expr_and_series/lit_test.py | 2 +- 3 files changed, 21 insertions(+), 10 deletions(-) diff --git a/narwhals/_arrow/utils.py b/narwhals/_arrow/utils.py index 6ca76eb239..94f046ffd8 100644 --- a/narwhals/_arrow/utils.py +++ b/narwhals/_arrow/utils.py @@ -501,12 +501,14 @@ def list_agg( array: ChunkedArrayAny, func: Literal["min", "max", "mean", "approximate_median", "sum"], ) -> ChunkedArrayAny: - return ( + agg = pa.array( pa.Table.from_arrays( [pc.list_flatten(array), pc.list_parent_indices(array)], names=["values", "offsets"], ) .group_by("offsets") - .aggregate([("values", func)]) + .aggregate([("values", func, pc.CountOptions("all"))]) .column(f"values_{func}") ) + non_empty_mask = pa.array(pc.not_equal(pc.list_value_length(array), 0)) # type: ignore[type-var] + return pa.chunked_array([pc.replace_with_mask([0] * len(array), non_empty_mask, agg)]) diff --git a/tests/expr_and_series/list/sum_test.py b/tests/expr_and_series/list/sum_test.py index d72b321164..ef804e84ce 100644 --- a/tests/expr_and_series/list/sum_test.py +++ b/tests/expr_and_series/list/sum_test.py @@ -10,10 +10,15 @@ if TYPE_CHECKING: from tests.utils import Constructor, ConstructorEager -data = {"a": [[3, 2, 2, 4, None], [-1]]} +data = {"a": [[3, None, 2, 2, 4, None], [], [-1], [None, None, None], []]} -def test_sum_expr(request: pytest.FixtureRequest, constructor: Constructor) -> None: +@pytest.mark.parametrize( + ("index", "expected"), [(0, 11), (1, 0), (2, -1), (3, 0), (4, 0)] +) +def test_sum_expr( + request: pytest.FixtureRequest, constructor: Constructor, index: int, expected: int +) -> None: if any(backend in str(constructor) for backend in ("dask", "cudf", "sqlframe")): # sqlframe issue: https://github.com/eakmanrq/sqlframe/issues/548 request.applymarker(pytest.mark.xfail) @@ -28,12 +33,17 @@ def test_sum_expr(request: pytest.FixtureRequest, constructor: Constructor) -> N .collect()["a"] .to_list() ) - assert result[0] == 11 - assert result[1] == -1 + assert result[index] == expected +@pytest.mark.parametrize( + ("index", "expected"), [(0, 11), (1, 0), (2, -1), (3, 0), (4, 0)] +) def test_sum_series( - request: pytest.FixtureRequest, constructor_eager: ConstructorEager + request: pytest.FixtureRequest, + constructor_eager: ConstructorEager, + index: int, + expected: int, ) -> None: if any(backend in str(constructor_eager) for backend in ("cudf",)): request.applymarker(pytest.mark.xfail) @@ -43,5 +53,4 @@ def test_sum_series( pytest.importorskip("pyarrow") df = nw.from_native(constructor_eager(data), eager_only=True) result = df["a"].cast(nw.List(nw.Int32())).list.sum().to_list() - assert result[0] == 11 - assert result[1] == -1 + assert result[index] == expected diff --git a/tests/expr_and_series/lit_test.py b/tests/expr_and_series/lit_test.py index 4a23ab9629..f742d46092 100644 --- a/tests/expr_and_series/lit_test.py +++ b/tests/expr_and_series/lit_test.py @@ -19,7 +19,7 @@ from narwhals.dtypes import DType -@pytest.mark.parametrize( +@pytest.mark.rize( ("dtype", "expected_lit"), [(None, [2, 2, 2]), (nw.String, ["2", "2", "2"]), (nw.Float32, [2.0, 2.0, 2.0])], ) From 7eca29a6b41aca4df06ad4338bc9e10479d0cdb8 Mon Sep 17 00:00:00 2001 From: raisadz <34237447+raisadz@users.noreply.github.com> Date: Sat, 29 Nov 2025 12:38:07 +0000 Subject: [PATCH 12/34] undo typo --- tests/expr_and_series/lit_test.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/expr_and_series/lit_test.py b/tests/expr_and_series/lit_test.py index f742d46092..4a23ab9629 100644 --- a/tests/expr_and_series/lit_test.py +++ b/tests/expr_and_series/lit_test.py @@ -19,7 +19,7 @@ from narwhals.dtypes import DType -@pytest.mark.rize( +@pytest.mark.parametrize( ("dtype", "expected_lit"), [(None, [2, 2, 2]), (nw.String, ["2", "2", "2"]), (nw.Float32, [2.0, 2.0, 2.0])], ) From 545abf8ecc5da2e25c5f6bbf54527f06c6b421cc Mon Sep 17 00:00:00 2001 From: raisadz <34237447+raisadz@users.noreply.github.com> Date: Sat, 29 Nov 2025 14:35:33 +0000 Subject: [PATCH 13/34] add None case --- narwhals/_arrow/utils.py | 5 ++++- tests/expr_and_series/list/sum_test.py | 6 +++--- 2 files changed, 7 insertions(+), 4 deletions(-) diff --git a/narwhals/_arrow/utils.py b/narwhals/_arrow/utils.py index 94f046ffd8..a9d702d01b 100644 --- a/narwhals/_arrow/utils.py +++ b/narwhals/_arrow/utils.py @@ -511,4 +511,7 @@ def list_agg( .column(f"values_{func}") ) non_empty_mask = pa.array(pc.not_equal(pc.list_value_length(array), 0)) # type: ignore[type-var] - return pa.chunked_array([pc.replace_with_mask([0] * len(array), non_empty_mask, agg)]) + base_array = [None if x else 0 for x in non_empty_mask.is_null()] + return pa.chunked_array( + [pc.replace_with_mask(base_array, non_empty_mask.fill_null(False), agg)] + ) diff --git a/tests/expr_and_series/list/sum_test.py b/tests/expr_and_series/list/sum_test.py index ef804e84ce..f206d75911 100644 --- a/tests/expr_and_series/list/sum_test.py +++ b/tests/expr_and_series/list/sum_test.py @@ -10,11 +10,11 @@ if TYPE_CHECKING: from tests.utils import Constructor, ConstructorEager -data = {"a": [[3, None, 2, 2, 4, None], [], [-1], [None, None, None], []]} +data = {"a": [[3, None, 2, 2, 4, None], [], [-1], None, [None, None, None], []]} @pytest.mark.parametrize( - ("index", "expected"), [(0, 11), (1, 0), (2, -1), (3, 0), (4, 0)] + ("index", "expected"), [(0, 11), (1, 0), (2, -1), (3, None), (4, 0), (5, 0)] ) def test_sum_expr( request: pytest.FixtureRequest, constructor: Constructor, index: int, expected: int @@ -37,7 +37,7 @@ def test_sum_expr( @pytest.mark.parametrize( - ("index", "expected"), [(0, 11), (1, 0), (2, -1), (3, 0), (4, 0)] + ("index", "expected"), [(0, 11), (1, 0), (2, -1), (3, None), (4, 0), (5, 0)] ) def test_sum_series( request: pytest.FixtureRequest, From 74de5c635c36e32a80613daab9a28979046adcd7 Mon Sep 17 00:00:00 2001 From: raisadz <34237447+raisadz@users.noreply.github.com> Date: Sat, 29 Nov 2025 15:44:02 +0000 Subject: [PATCH 14/34] fix list_agg and tests --- narwhals/_arrow/utils.py | 7 +++++-- tests/expr_and_series/list/sum_test.py | 6 +++--- 2 files changed, 8 insertions(+), 5 deletions(-) diff --git a/narwhals/_arrow/utils.py b/narwhals/_arrow/utils.py index a9d702d01b..e968ad548d 100644 --- a/narwhals/_arrow/utils.py +++ b/narwhals/_arrow/utils.py @@ -507,11 +507,14 @@ def list_agg( names=["values", "offsets"], ) .group_by("offsets") - .aggregate([("values", func, pc.CountOptions("all"))]) + .aggregate([("values", func)]) + .sort_by("offsets") .column(f"values_{func}") ) + if func == "sum": + agg = agg.fill_null(lit(0)) # type: ignore[type-var] non_empty_mask = pa.array(pc.not_equal(pc.list_value_length(array), 0)) # type: ignore[type-var] - base_array = [None if x else 0 for x in non_empty_mask.is_null()] + base_array = pc.if_else(non_empty_mask.is_null(), None, 0) return pa.chunked_array( [pc.replace_with_mask(base_array, non_empty_mask.fill_null(False), agg)] ) diff --git a/tests/expr_and_series/list/sum_test.py b/tests/expr_and_series/list/sum_test.py index f206d75911..9b97815c12 100644 --- a/tests/expr_and_series/list/sum_test.py +++ b/tests/expr_and_series/list/sum_test.py @@ -5,7 +5,7 @@ import pytest import narwhals as nw -from tests.utils import PANDAS_VERSION +from tests.utils import PANDAS_VERSION, assert_equal_data if TYPE_CHECKING: from tests.utils import Constructor, ConstructorEager @@ -33,7 +33,7 @@ def test_sum_expr( .collect()["a"] .to_list() ) - assert result[index] == expected + assert_equal_data({"a": [result[index]]}, {"a": [expected]}) @pytest.mark.parametrize( @@ -53,4 +53,4 @@ def test_sum_series( pytest.importorskip("pyarrow") df = nw.from_native(constructor_eager(data), eager_only=True) result = df["a"].cast(nw.List(nw.Int32())).list.sum().to_list() - assert result[index] == expected + assert_equal_data({"a": [result[index]]}, {"a": [expected]}) From 65072ffe31884f85c6e110a52dee5d6695fb7ce8 Mon Sep 17 00:00:00 2001 From: raisadz <34237447+raisadz@users.noreply.github.com> Date: Sun, 30 Nov 2025 11:21:20 +0000 Subject: [PATCH 15/34] adjust duckdb --- narwhals/_duckdb/expr_list.py | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) diff --git a/narwhals/_duckdb/expr_list.py b/narwhals/_duckdb/expr_list.py index 184f57252f..d20589e9ea 100644 --- a/narwhals/_duckdb/expr_list.py +++ b/narwhals/_duckdb/expr_list.py @@ -4,7 +4,7 @@ from narwhals._compliant import LazyExprNamespace from narwhals._compliant.any_namespace import ListNamespace -from narwhals._duckdb.utils import F, lit, when +from narwhals._duckdb.utils import F, col, lambda_expr, lit, when from narwhals._utils import requires if TYPE_CHECKING: @@ -54,4 +54,12 @@ def median(self) -> DuckDBExpr: return self.compliant._with_elementwise(lambda expr: F("list_median", expr)) def sum(self) -> DuckDBExpr: - return self.compliant._with_elementwise(lambda expr: F("list_sum", expr)) + def func(expr: Expression) -> Expression: + elem = col("_") + expr_no_nulls = F("list_filter", expr, lambda_expr(elem, elem.isnotnull())) + expr_sum = F("list_sum", expr_no_nulls) + return when(F("array_length", expr_no_nulls) == lit(0), lit(0)).otherwise( + expr_sum + ) + + return self.compliant._with_callable(func) From ca4c7944c8c65be3136647b9effbb8cb11a0c80c Mon Sep 17 00:00:00 2001 From: raisadz <34237447+raisadz@users.noreply.github.com> Date: Sun, 30 Nov 2025 12:23:22 +0000 Subject: [PATCH 16/34] adjust pyarrow and tests --- narwhals/_arrow/utils.py | 12 ++++++++--- tests/expr_and_series/list/max_test.py | 25 +++++++++++++++-------- tests/expr_and_series/list/mean_test.py | 25 +++++++++++++++-------- tests/expr_and_series/list/median_test.py | 25 +++++++++++++++-------- tests/expr_and_series/list/min_test.py | 25 +++++++++++++++-------- tests/expr_and_series/list/sum_test.py | 6 +++--- 6 files changed, 80 insertions(+), 38 deletions(-) diff --git a/narwhals/_arrow/utils.py b/narwhals/_arrow/utils.py index e968ad548d..f3bd421283 100644 --- a/narwhals/_arrow/utils.py +++ b/narwhals/_arrow/utils.py @@ -511,10 +511,16 @@ def list_agg( .sort_by("offsets") .column(f"values_{func}") ) + non_empty_mask = pa.array(pc.not_equal(pc.list_value_length(array), 0)) # type: ignore[type-var] if func == "sum": agg = agg.fill_null(lit(0)) # type: ignore[type-var] - non_empty_mask = pa.array(pc.not_equal(pc.list_value_length(array), 0)) # type: ignore[type-var] - base_array = pc.if_else(non_empty_mask.is_null(), None, 0) + base_array = pc.if_else(non_empty_mask.is_null(), None, 0) + else: + base_array = pc.if_else(non_empty_mask, 0, None) return pa.chunked_array( - [pc.replace_with_mask(base_array, non_empty_mask.fill_null(False), agg)] + [ + pc.replace_with_mask( + base_array.cast(agg.type), non_empty_mask.fill_null(False), agg + ) + ] ) diff --git a/tests/expr_and_series/list/max_test.py b/tests/expr_and_series/list/max_test.py index 8f112410cd..6b79c75c5e 100644 --- a/tests/expr_and_series/list/max_test.py +++ b/tests/expr_and_series/list/max_test.py @@ -5,15 +5,20 @@ import pytest import narwhals as nw -from tests.utils import PANDAS_VERSION +from tests.utils import PANDAS_VERSION, assert_equal_data if TYPE_CHECKING: from tests.utils import Constructor, ConstructorEager -data = {"a": [[3, 2, 2, 4, None], [-1]]} +data = {"a": [[3, None, 2, 2, 4, None], [-1], None, [None, None, None], []]} -def test_max_expr(request: pytest.FixtureRequest, constructor: Constructor) -> None: +@pytest.mark.parametrize( + ("index", "expected"), [(0, 4), (1, -1), (2, None), (3, None), (4, None)] +) +def test_max_expr( + request: pytest.FixtureRequest, constructor: Constructor, index: int, expected: int +) -> None: if any(backend in str(constructor) for backend in ("dask", "cudf", "sqlframe")): # sqlframe issue: https://github.com/eakmanrq/sqlframe/issues/548 request.applymarker(pytest.mark.xfail) @@ -28,12 +33,17 @@ def test_max_expr(request: pytest.FixtureRequest, constructor: Constructor) -> N .collect()["a"] .to_list() ) - assert result[0] == 4 - assert result[1] == -1 + assert_equal_data({"a": [result[index]]}, {"a": [expected]}) +@pytest.mark.parametrize( + ("index", "expected"), [(0, 4), (1, -1), (2, None), (3, None), (4, None)] +) def test_max_series( - request: pytest.FixtureRequest, constructor_eager: ConstructorEager + request: pytest.FixtureRequest, + constructor_eager: ConstructorEager, + index: int, + expected: int, ) -> None: if any(backend in str(constructor_eager) for backend in ("cudf",)): request.applymarker(pytest.mark.xfail) @@ -43,5 +53,4 @@ def test_max_series( pytest.importorskip("pyarrow") df = nw.from_native(constructor_eager(data), eager_only=True) result = df["a"].cast(nw.List(nw.Int32())).list.max().to_list() - assert result[0] == 4 - assert result[1] == -1 + assert_equal_data({"a": [result[index]]}, {"a": [expected]}) diff --git a/tests/expr_and_series/list/mean_test.py b/tests/expr_and_series/list/mean_test.py index 1c0551df71..d2c42be932 100644 --- a/tests/expr_and_series/list/mean_test.py +++ b/tests/expr_and_series/list/mean_test.py @@ -5,15 +5,20 @@ import pytest import narwhals as nw -from tests.utils import PANDAS_VERSION +from tests.utils import PANDAS_VERSION, assert_equal_data if TYPE_CHECKING: from tests.utils import Constructor, ConstructorEager -data = {"a": [[3, 2, 2, 4, None], [-1]]} +data = {"a": [[3, None, 2, 2, 4, None], [-1], None, [None, None, None], []]} -def test_mean_expr(request: pytest.FixtureRequest, constructor: Constructor) -> None: +@pytest.mark.parametrize( + ("index", "expected"), [(0, 2.75), (1, -1), (2, None), (3, None), (4, None)] +) +def test_mean_expr( + request: pytest.FixtureRequest, constructor: Constructor, index: int, expected: float +) -> None: if any(backend in str(constructor) for backend in ("dask", "cudf", "sqlframe")): # sqlframe issue: https://github.com/eakmanrq/sqlframe/issues/548 request.applymarker(pytest.mark.xfail) @@ -28,12 +33,17 @@ def test_mean_expr(request: pytest.FixtureRequest, constructor: Constructor) -> .collect()["a"] .to_list() ) - assert result[0] == 2.75 - assert result[1] == -1 + assert_equal_data({"a": [result[index]]}, {"a": [expected]}) +@pytest.mark.parametrize( + ("index", "expected"), [(0, 2.75), (1, -1), (2, None), (3, None), (4, None)] +) def test_mean_series( - request: pytest.FixtureRequest, constructor_eager: ConstructorEager + request: pytest.FixtureRequest, + constructor_eager: ConstructorEager, + index: int, + expected: float, ) -> None: if any(backend in str(constructor_eager) for backend in ("cudf",)): request.applymarker(pytest.mark.xfail) @@ -43,5 +53,4 @@ def test_mean_series( pytest.importorskip("pyarrow") df = nw.from_native(constructor_eager(data), eager_only=True) result = df["a"].cast(nw.List(nw.Int32())).list.mean().to_list() - assert result[0] == 2.75 - assert result[1] == -1 + assert_equal_data({"a": [result[index]]}, {"a": [expected]}) diff --git a/tests/expr_and_series/list/median_test.py b/tests/expr_and_series/list/median_test.py index b15c674635..e2e0003c94 100644 --- a/tests/expr_and_series/list/median_test.py +++ b/tests/expr_and_series/list/median_test.py @@ -6,15 +6,20 @@ import pytest import narwhals as nw -from tests.utils import PANDAS_VERSION, POLARS_VERSION +from tests.utils import PANDAS_VERSION, POLARS_VERSION, assert_equal_data if TYPE_CHECKING: from tests.utils import Constructor, ConstructorEager -data = {"a": [[3, 2, 2, 4, None], [-1]]} +data = {"a": [[3, None, 2, 2, 4, None], [-1], None, [None, None, None], []]} -def test_median_expr(request: pytest.FixtureRequest, constructor: Constructor) -> None: +@pytest.mark.parametrize( + ("index", "expected"), [(0, 2.5), (1, -1), (2, None), (3, None), (4, None)] +) +def test_median_expr( + request: pytest.FixtureRequest, constructor: Constructor, index: int, expected: float +) -> None: if any( backend in str(constructor) for backend in ("dask", "cudf", "sqlframe", "ibis") ) or ("polars" in str(constructor) and POLARS_VERSION < (0, 20, 7)): @@ -34,12 +39,17 @@ def test_median_expr(request: pytest.FixtureRequest, constructor: Constructor) - .collect()["a"] .to_list() ) - assert result[0] == 2.5 - assert result[1] == -1 + assert_equal_data({"a": [result[index]]}, {"a": [expected]}) +@pytest.mark.parametrize( + ("index", "expected"), [(0, 2.5), (1, -1), (2, None), (3, None), (4, None)] +) def test_median_series( - request: pytest.FixtureRequest, constructor_eager: ConstructorEager + request: pytest.FixtureRequest, + constructor_eager: ConstructorEager, + index: int, + expected: float, ) -> None: if any(backend in str(constructor_eager) for backend in ("cudf",)) or ( "polars" in str(constructor_eager) and POLARS_VERSION < (0, 20, 7) @@ -51,5 +61,4 @@ def test_median_series( pytest.importorskip("pyarrow") df = nw.from_native(constructor_eager(data), eager_only=True) result = df["a"].cast(nw.List(nw.Int32())).list.median().to_list() - assert result[0] == 2.5 - assert result[1] == -1 + assert_equal_data({"a": [result[index]]}, {"a": [expected]}) diff --git a/tests/expr_and_series/list/min_test.py b/tests/expr_and_series/list/min_test.py index d2655c00a1..87d3fe1ca7 100644 --- a/tests/expr_and_series/list/min_test.py +++ b/tests/expr_and_series/list/min_test.py @@ -5,15 +5,20 @@ import pytest import narwhals as nw -from tests.utils import PANDAS_VERSION +from tests.utils import PANDAS_VERSION, assert_equal_data if TYPE_CHECKING: from tests.utils import Constructor, ConstructorEager -data = {"a": [[3, 2, 2, 4, None], [-1]]} +data = {"a": [[3, None, 2, 2, 4, None], [-1], None, [None, None, None], []]} -def test_min_expr(request: pytest.FixtureRequest, constructor: Constructor) -> None: +@pytest.mark.parametrize( + ("index", "expected"), [(0, 2), (1, -1), (2, None), (3, None), (4, None)] +) +def test_min_expr( + request: pytest.FixtureRequest, constructor: Constructor, index: int, expected: int +) -> None: if any(backend in str(constructor) for backend in ("dask", "cudf")): request.applymarker(pytest.mark.xfail) if "pandas" in str(constructor): @@ -27,12 +32,17 @@ def test_min_expr(request: pytest.FixtureRequest, constructor: Constructor) -> N .collect()["a"] .to_list() ) - assert result[0] == 2 - assert result[1] == -1 + assert_equal_data({"a": [result[index]]}, {"a": [expected]}) +@pytest.mark.parametrize( + ("index", "expected"), [(0, 2), (1, -1), (2, None), (3, None), (4, None)] +) def test_min_series( - request: pytest.FixtureRequest, constructor_eager: ConstructorEager + request: pytest.FixtureRequest, + constructor_eager: ConstructorEager, + index: int, + expected: int, ) -> None: if any(backend in str(constructor_eager) for backend in ("cudf",)): request.applymarker(pytest.mark.xfail) @@ -42,5 +52,4 @@ def test_min_series( pytest.importorskip("pyarrow") df = nw.from_native(constructor_eager(data), eager_only=True) result = df["a"].cast(nw.List(nw.Int32())).list.min().to_list() - assert result[0] == 2 - assert result[1] == -1 + assert_equal_data({"a": [result[index]]}, {"a": [expected]}) diff --git a/tests/expr_and_series/list/sum_test.py b/tests/expr_and_series/list/sum_test.py index 9b97815c12..f3ab2a12d6 100644 --- a/tests/expr_and_series/list/sum_test.py +++ b/tests/expr_and_series/list/sum_test.py @@ -10,11 +10,11 @@ if TYPE_CHECKING: from tests.utils import Constructor, ConstructorEager -data = {"a": [[3, None, 2, 2, 4, None], [], [-1], None, [None, None, None], []]} +data = {"a": [[3, None, 2, 2, 4, None], [-1], None, [None, None, None], []]} @pytest.mark.parametrize( - ("index", "expected"), [(0, 11), (1, 0), (2, -1), (3, None), (4, 0), (5, 0)] + ("index", "expected"), [(0, 11), (1, -1), (2, None), (3, 0), (4, 0)] ) def test_sum_expr( request: pytest.FixtureRequest, constructor: Constructor, index: int, expected: int @@ -37,7 +37,7 @@ def test_sum_expr( @pytest.mark.parametrize( - ("index", "expected"), [(0, 11), (1, 0), (2, -1), (3, None), (4, 0), (5, 0)] + ("index", "expected"), [(0, 11), (1, -1), (2, None), (3, 0), (4, 0)] ) def test_sum_series( request: pytest.FixtureRequest, From 3251865f53baf84915296070e1db72532bec0869 Mon Sep 17 00:00:00 2001 From: raisadz <34237447+raisadz@users.noreply.github.com> Date: Sun, 30 Nov 2025 12:38:35 +0000 Subject: [PATCH 17/34] add `try_divide` for pyspark mean, set min duckdb version for lambda_expr --- narwhals/_duckdb/expr_list.py | 1 + narwhals/_spark_like/expr_list.py | 7 ++++--- 2 files changed, 5 insertions(+), 3 deletions(-) diff --git a/narwhals/_duckdb/expr_list.py b/narwhals/_duckdb/expr_list.py index d20589e9ea..15b3f70a53 100644 --- a/narwhals/_duckdb/expr_list.py +++ b/narwhals/_duckdb/expr_list.py @@ -53,6 +53,7 @@ def mean(self) -> DuckDBExpr: def median(self) -> DuckDBExpr: return self.compliant._with_elementwise(lambda expr: F("list_median", expr)) + @requires.backend_version((1, 2)) def sum(self) -> DuckDBExpr: def func(expr: Expression) -> Expression: elem = col("_") diff --git a/narwhals/_spark_like/expr_list.py b/narwhals/_spark_like/expr_list.py index 454e088244..2e5ee5a369 100644 --- a/narwhals/_spark_like/expr_list.py +++ b/narwhals/_spark_like/expr_list.py @@ -59,9 +59,10 @@ def func(expr: Column) -> Column: def mean(self) -> SparkLikeExpr: def func(expr: Column) -> Column: F = self.compliant._F - return F.aggregate( - F.array_compact(expr), F.lit(0.0), operator.add - ) / F.array_size(F.array_compact(expr)) + return F.try_divide( + F.aggregate(F.array_compact(expr), F.lit(0.0), operator.add), + F.array_size(F.array_compact(expr)), + ) return self.compliant._with_elementwise(func) From eff085cd9cabaaa752e6155e922571abe9d2109f Mon Sep 17 00:00:00 2001 From: raisadz <34237447+raisadz@users.noreply.github.com> Date: Sun, 30 Nov 2025 12:48:09 +0000 Subject: [PATCH 18/34] xfail old duckdb for sum --- tests/expr_and_series/list/sum_test.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/tests/expr_and_series/list/sum_test.py b/tests/expr_and_series/list/sum_test.py index f3ab2a12d6..90b78bac58 100644 --- a/tests/expr_and_series/list/sum_test.py +++ b/tests/expr_and_series/list/sum_test.py @@ -5,7 +5,7 @@ import pytest import narwhals as nw -from tests.utils import PANDAS_VERSION, assert_equal_data +from tests.utils import DUCKDB_VERSION, PANDAS_VERSION, assert_equal_data if TYPE_CHECKING: from tests.utils import Constructor, ConstructorEager @@ -26,6 +26,9 @@ def test_sum_expr( if PANDAS_VERSION < (2, 2): pytest.skip() pytest.importorskip("pyarrow") + if "duckdb" in str(constructor) and DUCKDB_VERSION < (1, 2): + reason = "version too old, duckdb 1.2 required for LambdaExpression." + pytest.skip(reason=reason) result = ( nw.from_native(constructor(data)) .select(nw.col("a").cast(nw.List(nw.Int32())).list.sum()) From fdcf3f3e823f42079ef4cb108870ce20c27a7277 Mon Sep 17 00:00:00 2001 From: raisadz <34237447+raisadz@users.noreply.github.com> Date: Sun, 30 Nov 2025 14:03:10 +0000 Subject: [PATCH 19/34] fux typing --- narwhals/_arrow/utils.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/narwhals/_arrow/utils.py b/narwhals/_arrow/utils.py index f3bd421283..511ee43a60 100644 --- a/narwhals/_arrow/utils.py +++ b/narwhals/_arrow/utils.py @@ -511,7 +511,7 @@ def list_agg( .sort_by("offsets") .column(f"values_{func}") ) - non_empty_mask = pa.array(pc.not_equal(pc.list_value_length(array), 0)) # type: ignore[type-var] + non_empty_mask = pa.array(pc.not_equal(pc.list_value_length(array), lit(0))) if func == "sum": agg = agg.fill_null(lit(0)) # type: ignore[type-var] base_array = pc.if_else(non_empty_mask.is_null(), None, 0) @@ -520,7 +520,9 @@ def list_agg( return pa.chunked_array( [ pc.replace_with_mask( - base_array.cast(agg.type), non_empty_mask.fill_null(False), agg + base_array.cast(agg.type), + non_empty_mask.fill_null(False), # pyright:ignore[reportArgumentType] + agg, ) ] ) From 146c458551d567f8dcfba746147803653a1cf512 Mon Sep 17 00:00:00 2001 From: raisadz <34237447+raisadz@users.noreply.github.com> Date: Sun, 30 Nov 2025 14:35:07 +0000 Subject: [PATCH 20/34] adjust pyspark median --- narwhals/_spark_like/expr_list.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/narwhals/_spark_like/expr_list.py b/narwhals/_spark_like/expr_list.py index 2e5ee5a369..15f566131c 100644 --- a/narwhals/_spark_like/expr_list.py +++ b/narwhals/_spark_like/expr_list.py @@ -75,6 +75,10 @@ def func(expr: Column) -> Column: # pragma: no cover mid_index = (size / 2).cast("int") odd_case = sorted_expr[mid_index] even_case = (sorted_expr[mid_index] - 1 + sorted_expr[mid_index]) / 2 - return F.when(size % 2 == 1, odd_case).otherwise(even_case) + return ( + F.when((size.isNull()) | (size == 0), F.lit(None)) + .when(size % 2 == 1, odd_case) + .otherwise(even_case) + ) return self.compliant._with_elementwise(func) From 4d18654fc325ef9e305b83e3f77558468e78bf9b Mon Sep 17 00:00:00 2001 From: raisadz <34237447+raisadz@users.noreply.github.com> Date: Sun, 30 Nov 2025 14:54:09 +0000 Subject: [PATCH 21/34] fix typing --- narwhals/_arrow/utils.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/narwhals/_arrow/utils.py b/narwhals/_arrow/utils.py index 511ee43a60..90204efcef 100644 --- a/narwhals/_arrow/utils.py +++ b/narwhals/_arrow/utils.py @@ -513,7 +513,7 @@ def list_agg( ) non_empty_mask = pa.array(pc.not_equal(pc.list_value_length(array), lit(0))) if func == "sum": - agg = agg.fill_null(lit(0)) # type: ignore[type-var] + agg = agg.fill_null(lit(0)) # pyright:ignore[reportArgumentType] base_array = pc.if_else(non_empty_mask.is_null(), None, 0) else: base_array = pc.if_else(non_empty_mask, 0, None) @@ -521,7 +521,7 @@ def list_agg( [ pc.replace_with_mask( base_array.cast(agg.type), - non_empty_mask.fill_null(False), # pyright:ignore[reportArgumentType] + non_empty_mask.fill_null(False), # type: ignore[arg-type] agg, ) ] From 997533482fb44134bcc8289722a512509a7bdd4d Mon Sep 17 00:00:00 2001 From: raisadz <34237447+raisadz@users.noreply.github.com> Date: Mon, 1 Dec 2025 10:28:04 +0000 Subject: [PATCH 22/34] adjust ibis sum --- narwhals/_ibis/expr_list.py | 13 ++++++++++++- 1 file changed, 12 insertions(+), 1 deletion(-) diff --git a/narwhals/_ibis/expr_list.py b/narwhals/_ibis/expr_list.py index ff0cd76c08..64cc053831 100644 --- a/narwhals/_ibis/expr_list.py +++ b/narwhals/_ibis/expr_list.py @@ -2,6 +2,8 @@ from typing import TYPE_CHECKING +from ibis import cases, literal + from narwhals._compliant import LazyExprNamespace from narwhals._compliant.any_namespace import ListNamespace from narwhals._utils import not_implemented @@ -39,6 +41,15 @@ def mean(self) -> IbisExpr: return self.compliant._with_callable(lambda expr: expr.means()) def sum(self) -> IbisExpr: - return self.compliant._with_callable(lambda expr: expr.sums()) + def func(expr: ir.ArrayColumn) -> ir.Value: + expr_no_nulls = expr.filter(lambda x: x.notnull()) + len = expr_no_nulls.length() + return cases( + (len.isnull(), literal(None)), + (len == literal(0), literal(0)), + else_=expr.sums(), + ) + + return self.compliant._with_callable(func) median = not_implemented() From be000f5584f74019f95ec48e8cf1d9bf0331e374 Mon Sep 17 00:00:00 2001 From: raisadz <34237447+raisadz@users.noreply.github.com> Date: Mon, 1 Dec 2025 11:41:54 +0000 Subject: [PATCH 23/34] mix docstrings, add a test where there is a mismatch for median --- narwhals/expr_list.py | 64 ++++++++++------------- narwhals/series_list.py | 30 ++++++----- tests/expr_and_series/list/median_test.py | 24 +++++++-- 3 files changed, 64 insertions(+), 54 deletions(-) diff --git a/narwhals/expr_list.py b/narwhals/expr_list.py index 94e4c4f38d..dcc54d4404 100644 --- a/narwhals/expr_list.py +++ b/narwhals/expr_list.py @@ -148,22 +148,20 @@ def min(self) -> ExprT: """Compute the min value of the lists in the array. Examples: - >>> import polars as pl + >>> import duckdb >>> import narwhals as nw - >>> df_native = pl.DataFrame({"a": [[1], [3, 4, None]]}) + >>> df_native = duckdb.sql("SELECT * FROM VALUES ([1]), ([3, 4, NULL]) df(a)") >>> df = nw.from_native(df_native) >>> df.with_columns(a_min=nw.col("a").list.min()) ┌────────────────────────┐ - | Narwhals DataFrame | + | Narwhals LazyFrame | |------------------------| - |shape: (2, 2) | |┌──────────────┬───────┐| - |│ a ┆ a_min │| - |│ --- ┆ --- │| - |│ list[i64] ┆ i64 │| - |╞══════════════╪═══════╡| - |│ [1] ┆ 1 │| - |│ [3, 4, null] ┆ 3 │| + |│ a │ a_min │| + |│ int32[] │ int32 │| + |├──────────────┼───────┤| + |│ [1] │ 1 │| + |│ [3, 4, NULL] │ 3 │| |└──────────────┴───────┘| └────────────────────────┘ """ @@ -198,24 +196,22 @@ def mean(self) -> ExprT: """Compute the mean value of the lists in the array. Examples: - >>> import polars as pl + >>> import pyarrow as pa >>> import narwhals as nw - >>> df_native = pl.DataFrame({"a": [[1], [3, 4, None]]}) + >>> df_native = pa.table({"a": [[1], [3, 4, None]]}) >>> df = nw.from_native(df_native) >>> df.with_columns(a_mean=nw.col("a").list.mean()) - ┌─────────────────────────┐ - | Narwhals DataFrame | - |-------------------------| - |shape: (2, 2) | - |┌──────────────┬────────┐| - |│ a ┆ a_mean │| - |│ --- ┆ --- │| - |│ list[i64] ┆ f64 │| - |╞══════════════╪════════╡| - |│ [1] ┆ 1.0 │| - |│ [3, 4, null] ┆ 3.5 │| - |└──────────────┴────────┘| - └─────────────────────────┘ + ┌──────────────────────┐ + | Narwhals DataFrame | + |----------------------| + |pyarrow.Table | + |a: list | + | child 0, item: int64| + |a_mean: double | + |---- | + |a: [[[1],[3,4,null]]] | + |a_mean: [[1,3.5]] | + └──────────────────────┘ """ return self._expr._append_node(ExprNode(ExprKind.ELEMENTWISE, "list.mean")) @@ -223,22 +219,20 @@ def median(self) -> ExprT: """Compute the median value of the lists in the array. Examples: - >>> import polars as pl + >>> import duckdb >>> import narwhals as nw - >>> df_native = pl.DataFrame({"a": [[1], [3, 4, None]]}) + >>> df_native = duckdb.sql("SELECT * FROM VALUES ([1]), ([3, 4, NULL]) df(a)") >>> df = nw.from_native(df_native) >>> df.with_columns(a_median=nw.col("a").list.median()) ┌───────────────────────────┐ - | Narwhals DataFrame | + | Narwhals LazyFrame | |---------------------------| - |shape: (2, 2) | |┌──────────────┬──────────┐| - |│ a ┆ a_median │| - |│ --- ┆ --- │| - |│ list[i64] ┆ f64 │| - |╞══════════════╪══════════╡| - |│ [1] ┆ 1.0 │| - |│ [3, 4, null] ┆ 3.5 │| + |│ a │ a_median │| + |│ int32[] │ double │| + |├──────────────┼──────────┤| + |│ [1] │ 1.0 │| + |│ [3, 4, NULL] │ 3.5 │| |└──────────────┴──────────┘| └───────────────────────────┘ """ diff --git a/narwhals/series_list.py b/narwhals/series_list.py index a771ff1c59..d5ead244e1 100644 --- a/narwhals/series_list.py +++ b/narwhals/series_list.py @@ -142,16 +142,17 @@ def max(self) -> SeriesT: """Compute the max value of the lists in the array. Examples: - >>> import polars as pl + >>> import pyarrow as pa >>> import narwhals as nw - >>> s_native = pl.Series([[1], [3, 4, None]]) + >>> s_native = pa.chunked_array([[[1], [3, 4, None]]]) >>> s = nw.from_native(s_native, series_only=True) - >>> s.list.max().to_native() # doctest: +NORMALIZE_WHITESPACE - shape: (2,) - Series: '' [i64] + >>> s.list.max().to_native() # doctest: +ELLIPSIS + [ - 1 - 4 + [ + 1, + 4 + ] ] """ return self._narwhals_series._with_compliant( @@ -182,16 +183,17 @@ def median(self) -> SeriesT: """Compute the median value of the lists in the array. Examples: - >>> import polars as pl + >>> import pyarrow as pa >>> import narwhals as nw - >>> s_native = pl.Series([[1], [3, 4, None]]) + >>> s_native = pa.chunked_array([[[1], [3, 4, None]]]) >>> s = nw.from_native(s_native, series_only=True) - >>> s.list.median().to_native() # doctest: +NORMALIZE_WHITESPACE - shape: (2,) - Series: '' [f64] + >>> s.list.median().to_native() # doctest: +ELLIPSIS + [ - 1.0 - 3.5 + [ + 1, + 3 + ] ] """ return self._narwhals_series._with_compliant( diff --git a/tests/expr_and_series/list/median_test.py b/tests/expr_and_series/list/median_test.py index e2e0003c94..fc6f6fbad1 100644 --- a/tests/expr_and_series/list/median_test.py +++ b/tests/expr_and_series/list/median_test.py @@ -11,11 +11,11 @@ if TYPE_CHECKING: from tests.utils import Constructor, ConstructorEager -data = {"a": [[3, None, 2, 2, 4, None], [-1], None, [None, None, None], []]} +data = {"a": [[3, None, 2, 2, 4, None], [-1], None, [None, None, None], [], [3, 4, None]]} @pytest.mark.parametrize( - ("index", "expected"), [(0, 2.5), (1, -1), (2, None), (3, None), (4, None)] + ("index", "expected"), [(0, 2.5), (1, -1), (2, None), (3, None), (4, None), (5, 3.5)] ) def test_median_expr( request: pytest.FixtureRequest, constructor: Constructor, index: int, expected: float @@ -39,11 +39,18 @@ def test_median_expr( .collect()["a"] .to_list() ) - assert_equal_data({"a": [result[index]]}, {"a": [expected]}) + if any( + backend in str(constructor) + for backend in ("pandas", "pyarrow", "pandas[pyarrow]") + ) and (index == 5): + # there is a mismatch as pyarrow uses an approximate median + assert_equal_data({"a": [result[index]]}, {"a": [3]}) + else: + assert_equal_data({"a": [result[index]]}, {"a": [expected]}) @pytest.mark.parametrize( - ("index", "expected"), [(0, 2.5), (1, -1), (2, None), (3, None), (4, None)] + ("index", "expected"), [(0, 2.5), (1, -1), (2, None), (3, None), (4, None), (5, 3.5)] ) def test_median_series( request: pytest.FixtureRequest, @@ -61,4 +68,11 @@ def test_median_series( pytest.importorskip("pyarrow") df = nw.from_native(constructor_eager(data), eager_only=True) result = df["a"].cast(nw.List(nw.Int32())).list.median().to_list() - assert_equal_data({"a": [result[index]]}, {"a": [expected]}) + if any( + backend in str(constructor_eager) + for backend in ("pandas", "pyarrow", "pandas[pyarrow]") + ) and (index == 5): + # there is a mismatch as pyarrow uses an approximate median + assert_equal_data({"a": [result[index]]}, {"a": [3]}) + else: + assert_equal_data({"a": [result[index]]}, {"a": [expected]}) From 9b53f03f598afa9e5cf1d1eab2b224a65e7009e1 Mon Sep 17 00:00:00 2001 From: raisadz <34237447+raisadz@users.noreply.github.com> Date: Mon, 1 Dec 2025 13:37:58 +0000 Subject: [PATCH 24/34] xfail median for pyarrow and python below 3.10 --- narwhals/_arrow/utils.py | 8 +++++++- tests/expr_and_series/list/median_test.py | 12 ++++++++++++ 2 files changed, 19 insertions(+), 1 deletion(-) diff --git a/narwhals/_arrow/utils.py b/narwhals/_arrow/utils.py index 90204efcef..44e799f978 100644 --- a/narwhals/_arrow/utils.py +++ b/narwhals/_arrow/utils.py @@ -1,5 +1,6 @@ from __future__ import annotations +import sys from functools import lru_cache from typing import TYPE_CHECKING, Any, cast @@ -501,6 +502,9 @@ def list_agg( array: ChunkedArrayAny, func: Literal["min", "max", "mean", "approximate_median", "sum"], ) -> ChunkedArrayAny: + if func == "approximate_median" and sys.version_info < (3, 10): + msg = f"The minimum supported Python version for {func}" + raise NotImplementedError(msg) agg = pa.array( pa.Table.from_arrays( [pc.list_flatten(array), pc.list_parent_indices(array)], @@ -516,7 +520,9 @@ def list_agg( agg = agg.fill_null(lit(0)) # pyright:ignore[reportArgumentType] base_array = pc.if_else(non_empty_mask.is_null(), None, 0) else: - base_array = pc.if_else(non_empty_mask, 0, None) + base_array = pc.if_else( + non_empty_mask, 0, None + ) # zero is just a placeholder which is replaced below return pa.chunked_array( [ pc.replace_with_mask( diff --git a/tests/expr_and_series/list/median_test.py b/tests/expr_and_series/list/median_test.py index fc6f6fbad1..ce8db05d89 100644 --- a/tests/expr_and_series/list/median_test.py +++ b/tests/expr_and_series/list/median_test.py @@ -1,6 +1,7 @@ from __future__ import annotations import os +import sys from typing import TYPE_CHECKING import pytest @@ -26,12 +27,18 @@ def test_median_expr( # sqlframe issue: https://github.com/eakmanrq/sqlframe/issues/548 # ibis issue: https://github.com/ibis-project/ibis/issues/11788 request.applymarker(pytest.mark.xfail) + if any( + backend in str(constructor) + for backend in ("pandas", "pyarrow", "pandas[pyarrow]") + ) and (sys.version_info < (3, 10)): + request.applymarker(pytest.mark.xfail) if os.environ.get("SPARK_CONNECT", None) and "pyspark" in str(constructor): request.applymarker(pytest.mark.xfail) if "pandas" in str(constructor): if PANDAS_VERSION < (2, 2): pytest.skip() pytest.importorskip("pyarrow") + result = ( nw.from_native(constructor(data)) .select(nw.col("a").cast(nw.List(nw.Int32())).list.median()) @@ -62,6 +69,11 @@ def test_median_series( "polars" in str(constructor_eager) and POLARS_VERSION < (0, 20, 7) ): request.applymarker(pytest.mark.xfail) + if any( + backend in str(constructor_eager) + for backend in ("pandas", "pyarrow", "pandas[pyarrow]") + ) and (sys.version_info < (3, 10)): + request.applymarker(pytest.mark.xfail) if "pandas" in str(constructor_eager): if PANDAS_VERSION < (2, 2): pytest.skip() From a5495766077953459893597857ac175d8b09d5b6 Mon Sep 17 00:00:00 2001 From: raisadz <34237447+raisadz@users.noreply.github.com> Date: Mon, 1 Dec 2025 13:45:07 +0000 Subject: [PATCH 25/34] add no cover --- narwhals/_arrow/utils.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/narwhals/_arrow/utils.py b/narwhals/_arrow/utils.py index 44e799f978..f4ebc57cd1 100644 --- a/narwhals/_arrow/utils.py +++ b/narwhals/_arrow/utils.py @@ -502,7 +502,7 @@ def list_agg( array: ChunkedArrayAny, func: Literal["min", "max", "mean", "approximate_median", "sum"], ) -> ChunkedArrayAny: - if func == "approximate_median" and sys.version_info < (3, 10): + if func == "approximate_median" and sys.version_info < (3, 10): # pragma: no cover msg = f"The minimum supported Python version for {func}" raise NotImplementedError(msg) agg = pa.array( From 76c70ff4efe5b62433e7434a6948d52f863288a2 Mon Sep 17 00:00:00 2001 From: raisadz <34237447+raisadz@users.noreply.github.com> Date: Mon, 1 Dec 2025 20:42:51 +0000 Subject: [PATCH 26/34] update the error msg --- narwhals/_arrow/utils.py | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/narwhals/_arrow/utils.py b/narwhals/_arrow/utils.py index f4ebc57cd1..add6241147 100644 --- a/narwhals/_arrow/utils.py +++ b/narwhals/_arrow/utils.py @@ -502,8 +502,12 @@ def list_agg( array: ChunkedArrayAny, func: Literal["min", "max", "mean", "approximate_median", "sum"], ) -> ChunkedArrayAny: - if func == "approximate_median" and sys.version_info < (3, 10): # pragma: no cover - msg = f"The minimum supported Python version for {func}" + version = sys.version_info + if func == "approximate_median" and version < (3, 10): # pragma: no cover + msg = ( + f"The minimum supported Python version for {func} is 3.10." + f"\nGot: {version.major}.{version.minor}.{version.micro}." + ) raise NotImplementedError(msg) agg = pa.array( pa.Table.from_arrays( From 54d7041b52d8750d43add10090eb1e653a97ef76 Mon Sep 17 00:00:00 2001 From: raisadz <34237447+raisadz@users.noreply.github.com> Date: Tue, 9 Dec 2025 11:13:33 +0000 Subject: [PATCH 27/34] Update narwhals/_spark_like/expr_list.py Co-authored-by: Francesco Bruzzesi <42817048+FBruzzesi@users.noreply.github.com> --- narwhals/_spark_like/expr_list.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/narwhals/_spark_like/expr_list.py b/narwhals/_spark_like/expr_list.py index 15f566131c..1b2ec723bc 100644 --- a/narwhals/_spark_like/expr_list.py +++ b/narwhals/_spark_like/expr_list.py @@ -74,7 +74,7 @@ def func(expr: Column) -> Column: # pragma: no cover size = F.array_size(sorted_expr) mid_index = (size / 2).cast("int") odd_case = sorted_expr[mid_index] - even_case = (sorted_expr[mid_index] - 1 + sorted_expr[mid_index]) / 2 + even_case = (sorted_expr[mid_index - 1] + sorted_expr[mid_index]) / 2 return ( F.when((size.isNull()) | (size == 0), F.lit(None)) .when(size % 2 == 1, odd_case) From 47f5a4be41a987503247ac21663d6b6bba7cbc8c Mon Sep 17 00:00:00 2001 From: raisadz <34237447+raisadz@users.noreply.github.com> Date: Tue, 9 Dec 2025 11:16:56 +0000 Subject: [PATCH 28/34] remove a minimum 3.10 python version --- narwhals/_arrow/utils.py | 8 -------- 1 file changed, 8 deletions(-) diff --git a/narwhals/_arrow/utils.py b/narwhals/_arrow/utils.py index add6241147..169622d63d 100644 --- a/narwhals/_arrow/utils.py +++ b/narwhals/_arrow/utils.py @@ -1,6 +1,5 @@ from __future__ import annotations -import sys from functools import lru_cache from typing import TYPE_CHECKING, Any, cast @@ -502,13 +501,6 @@ def list_agg( array: ChunkedArrayAny, func: Literal["min", "max", "mean", "approximate_median", "sum"], ) -> ChunkedArrayAny: - version = sys.version_info - if func == "approximate_median" and version < (3, 10): # pragma: no cover - msg = ( - f"The minimum supported Python version for {func} is 3.10." - f"\nGot: {version.major}.{version.minor}.{version.micro}." - ) - raise NotImplementedError(msg) agg = pa.array( pa.Table.from_arrays( [pc.list_flatten(array), pc.list_parent_indices(array)], From 9bcdebef6c20556054e37beb1bb4d1e13481d907 Mon Sep 17 00:00:00 2001 From: raisadz <34237447+raisadz@users.noreply.github.com> Date: Tue, 9 Dec 2025 11:23:13 +0000 Subject: [PATCH 29/34] remove xfail from tests --- tests/expr_and_series/list/median_test.py | 11 ----------- 1 file changed, 11 deletions(-) diff --git a/tests/expr_and_series/list/median_test.py b/tests/expr_and_series/list/median_test.py index ce8db05d89..0d5e52666a 100644 --- a/tests/expr_and_series/list/median_test.py +++ b/tests/expr_and_series/list/median_test.py @@ -1,7 +1,6 @@ from __future__ import annotations import os -import sys from typing import TYPE_CHECKING import pytest @@ -27,11 +26,6 @@ def test_median_expr( # sqlframe issue: https://github.com/eakmanrq/sqlframe/issues/548 # ibis issue: https://github.com/ibis-project/ibis/issues/11788 request.applymarker(pytest.mark.xfail) - if any( - backend in str(constructor) - for backend in ("pandas", "pyarrow", "pandas[pyarrow]") - ) and (sys.version_info < (3, 10)): - request.applymarker(pytest.mark.xfail) if os.environ.get("SPARK_CONNECT", None) and "pyspark" in str(constructor): request.applymarker(pytest.mark.xfail) if "pandas" in str(constructor): @@ -69,11 +63,6 @@ def test_median_series( "polars" in str(constructor_eager) and POLARS_VERSION < (0, 20, 7) ): request.applymarker(pytest.mark.xfail) - if any( - backend in str(constructor_eager) - for backend in ("pandas", "pyarrow", "pandas[pyarrow]") - ) and (sys.version_info < (3, 10)): - request.applymarker(pytest.mark.xfail) if "pandas" in str(constructor_eager): if PANDAS_VERSION < (2, 2): pytest.skip() From 3ab7639c49858b0e720bab8b3741b1cac30ac55e Mon Sep 17 00:00:00 2001 From: raisadz <34237447+raisadz@users.noreply.github.com> Date: Fri, 12 Dec 2025 10:02:20 +0000 Subject: [PATCH 30/34] skip old Python on windows tests for median --- tests/expr_and_series/list/median_test.py | 18 ++++++++++++++++-- 1 file changed, 16 insertions(+), 2 deletions(-) diff --git a/tests/expr_and_series/list/median_test.py b/tests/expr_and_series/list/median_test.py index 0d5e52666a..173eac7087 100644 --- a/tests/expr_and_series/list/median_test.py +++ b/tests/expr_and_series/list/median_test.py @@ -1,12 +1,13 @@ from __future__ import annotations import os +import sys from typing import TYPE_CHECKING import pytest import narwhals as nw -from tests.utils import PANDAS_VERSION, POLARS_VERSION, assert_equal_data +from tests.utils import PANDAS_VERSION, POLARS_VERSION, assert_equal_data, is_windows if TYPE_CHECKING: from tests.utils import Constructor, ConstructorEager @@ -32,7 +33,13 @@ def test_median_expr( if PANDAS_VERSION < (2, 2): pytest.skip() pytest.importorskip("pyarrow") - + if ( + any(backend in str(constructor) for backend in ("pandas", "pyarrow")) + and sys.version_info < (3, 10) + and is_windows + ): + reason = "The issue only affects old Python versions on Windows." + pytest.skip(reason=reason) result = ( nw.from_native(constructor(data)) .select(nw.col("a").cast(nw.List(nw.Int32())).list.median()) @@ -67,6 +74,13 @@ def test_median_series( if PANDAS_VERSION < (2, 2): pytest.skip() pytest.importorskip("pyarrow") + if ( + any(backend in str(constructor_eager) for backend in ("pandas", "pyarrow")) + and sys.version_info < (3, 10) + and is_windows + ): + reason = "The issue only affects old Python versions on Windows." + pytest.skip(reason=reason) df = nw.from_native(constructor_eager(data), eager_only=True) result = df["a"].cast(nw.List(nw.Int32())).list.median().to_list() if any( From 687c4ae62c25f0b9b2f8ec7c8e31f0b5bce392f8 Mon Sep 17 00:00:00 2001 From: raisadz <34237447+raisadz@users.noreply.github.com> Date: Fri, 12 Dec 2025 10:11:28 +0000 Subject: [PATCH 31/34] add no cover --- tests/expr_and_series/list/median_test.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/expr_and_series/list/median_test.py b/tests/expr_and_series/list/median_test.py index 173eac7087..801312a673 100644 --- a/tests/expr_and_series/list/median_test.py +++ b/tests/expr_and_series/list/median_test.py @@ -37,7 +37,7 @@ def test_median_expr( any(backend in str(constructor) for backend in ("pandas", "pyarrow")) and sys.version_info < (3, 10) and is_windows - ): + ): # pragma: no cover reason = "The issue only affects old Python versions on Windows." pytest.skip(reason=reason) result = ( @@ -78,7 +78,7 @@ def test_median_series( any(backend in str(constructor_eager) for backend in ("pandas", "pyarrow")) and sys.version_info < (3, 10) and is_windows - ): + ): # pragma: no cover reason = "The issue only affects old Python versions on Windows." pytest.skip(reason=reason) df = nw.from_native(constructor_eager(data), eager_only=True) From c851f10c70c5c26152052373fdd15fb303a7705c Mon Sep 17 00:00:00 2001 From: raisadz <34237447+raisadz@users.noreply.github.com> Date: Sat, 13 Dec 2025 13:45:44 +0000 Subject: [PATCH 32/34] modify list_agg as suggested --- narwhals/_arrow/utils.py | 16 ++++++++++------ 1 file changed, 10 insertions(+), 6 deletions(-) diff --git a/narwhals/_arrow/utils.py b/narwhals/_arrow/utils.py index 169622d63d..c46d400658 100644 --- a/narwhals/_arrow/utils.py +++ b/narwhals/_arrow/utils.py @@ -501,28 +501,32 @@ def list_agg( array: ChunkedArrayAny, func: Literal["min", "max", "mean", "approximate_median", "sum"], ) -> ChunkedArrayAny: + lit_: Incomplete = lit + aggregation = ( + ("values", "sum", pc.ScalarAggregateOptions(min_count=0)) + if func == "sum" + else ("values", func) + ) agg = pa.array( pa.Table.from_arrays( [pc.list_flatten(array), pc.list_parent_indices(array)], names=["values", "offsets"], ) .group_by("offsets") - .aggregate([("values", func)]) + .aggregate([aggregation]) .sort_by("offsets") .column(f"values_{func}") ) non_empty_mask = pa.array(pc.not_equal(pc.list_value_length(array), lit(0))) if func == "sum": - agg = agg.fill_null(lit(0)) # pyright:ignore[reportArgumentType] + # Make sure sum of empty list is 0. base_array = pc.if_else(non_empty_mask.is_null(), None, 0) else: - base_array = pc.if_else( - non_empty_mask, 0, None - ) # zero is just a placeholder which is replaced below + base_array = pa.repeat(lit_(None, type=agg.type), len(array)) return pa.chunked_array( [ pc.replace_with_mask( - base_array.cast(agg.type), + base_array, non_empty_mask.fill_null(False), # type: ignore[arg-type] agg, ) From 310daa65e1dfa6a252e108f8c441bfe8a9bdd6db Mon Sep 17 00:00:00 2001 From: raisadz <34237447+raisadz@users.noreply.github.com> Date: Sat, 13 Dec 2025 14:11:09 +0000 Subject: [PATCH 33/34] simplify tests --- tests/expr_and_series/list/max_test.py | 30 +++++------------ tests/expr_and_series/list/mean_test.py | 30 +++++------------ tests/expr_and_series/list/median_test.py | 39 ++++++++--------------- tests/expr_and_series/list/min_test.py | 28 ++++------------ tests/expr_and_series/list/sum_test.py | 30 +++++------------ 5 files changed, 44 insertions(+), 113 deletions(-) diff --git a/tests/expr_and_series/list/max_test.py b/tests/expr_and_series/list/max_test.py index 6b79c75c5e..f3cd5db5a1 100644 --- a/tests/expr_and_series/list/max_test.py +++ b/tests/expr_and_series/list/max_test.py @@ -11,14 +11,10 @@ from tests.utils import Constructor, ConstructorEager data = {"a": [[3, None, 2, 2, 4, None], [-1], None, [None, None, None], []]} +expected = [4, -1, None, None, None] -@pytest.mark.parametrize( - ("index", "expected"), [(0, 4), (1, -1), (2, None), (3, None), (4, None)] -) -def test_max_expr( - request: pytest.FixtureRequest, constructor: Constructor, index: int, expected: int -) -> None: +def test_max_expr(request: pytest.FixtureRequest, constructor: Constructor) -> None: if any(backend in str(constructor) for backend in ("dask", "cudf", "sqlframe")): # sqlframe issue: https://github.com/eakmanrq/sqlframe/issues/548 request.applymarker(pytest.mark.xfail) @@ -26,24 +22,14 @@ def test_max_expr( if PANDAS_VERSION < (2, 2): pytest.skip() pytest.importorskip("pyarrow") - result = ( - nw.from_native(constructor(data)) - .select(nw.col("a").cast(nw.List(nw.Int32())).list.max()) - .lazy() - .collect()["a"] - .to_list() + result = nw.from_native(constructor(data)).select( + nw.col("a").cast(nw.List(nw.Int32())).list.max() ) - assert_equal_data({"a": [result[index]]}, {"a": [expected]}) + assert_equal_data(result, {"a": expected}) -@pytest.mark.parametrize( - ("index", "expected"), [(0, 4), (1, -1), (2, None), (3, None), (4, None)] -) def test_max_series( - request: pytest.FixtureRequest, - constructor_eager: ConstructorEager, - index: int, - expected: int, + request: pytest.FixtureRequest, constructor_eager: ConstructorEager ) -> None: if any(backend in str(constructor_eager) for backend in ("cudf",)): request.applymarker(pytest.mark.xfail) @@ -52,5 +38,5 @@ def test_max_series( pytest.skip() pytest.importorskip("pyarrow") df = nw.from_native(constructor_eager(data), eager_only=True) - result = df["a"].cast(nw.List(nw.Int32())).list.max().to_list() - assert_equal_data({"a": [result[index]]}, {"a": [expected]}) + result = df["a"].cast(nw.List(nw.Int32())).list.max() + assert_equal_data({"a": result}, {"a": expected}) diff --git a/tests/expr_and_series/list/mean_test.py b/tests/expr_and_series/list/mean_test.py index d2c42be932..9ff5984b2e 100644 --- a/tests/expr_and_series/list/mean_test.py +++ b/tests/expr_and_series/list/mean_test.py @@ -11,14 +11,10 @@ from tests.utils import Constructor, ConstructorEager data = {"a": [[3, None, 2, 2, 4, None], [-1], None, [None, None, None], []]} +expected = [2.75, -1, None, None, None] -@pytest.mark.parametrize( - ("index", "expected"), [(0, 2.75), (1, -1), (2, None), (3, None), (4, None)] -) -def test_mean_expr( - request: pytest.FixtureRequest, constructor: Constructor, index: int, expected: float -) -> None: +def test_mean_expr(request: pytest.FixtureRequest, constructor: Constructor) -> None: if any(backend in str(constructor) for backend in ("dask", "cudf", "sqlframe")): # sqlframe issue: https://github.com/eakmanrq/sqlframe/issues/548 request.applymarker(pytest.mark.xfail) @@ -26,24 +22,14 @@ def test_mean_expr( if PANDAS_VERSION < (2, 2): pytest.skip() pytest.importorskip("pyarrow") - result = ( - nw.from_native(constructor(data)) - .select(nw.col("a").cast(nw.List(nw.Int32())).list.mean()) - .lazy() - .collect()["a"] - .to_list() + result = nw.from_native(constructor(data)).select( + nw.col("a").cast(nw.List(nw.Int32())).list.mean() ) - assert_equal_data({"a": [result[index]]}, {"a": [expected]}) + assert_equal_data(result, {"a": expected}) -@pytest.mark.parametrize( - ("index", "expected"), [(0, 2.75), (1, -1), (2, None), (3, None), (4, None)] -) def test_mean_series( - request: pytest.FixtureRequest, - constructor_eager: ConstructorEager, - index: int, - expected: float, + request: pytest.FixtureRequest, constructor_eager: ConstructorEager ) -> None: if any(backend in str(constructor_eager) for backend in ("cudf",)): request.applymarker(pytest.mark.xfail) @@ -52,5 +38,5 @@ def test_mean_series( pytest.skip() pytest.importorskip("pyarrow") df = nw.from_native(constructor_eager(data), eager_only=True) - result = df["a"].cast(nw.List(nw.Int32())).list.mean().to_list() - assert_equal_data({"a": [result[index]]}, {"a": [expected]}) + result = df["a"].cast(nw.List(nw.Int32())).list.mean() + assert_equal_data({"a": result}, {"a": expected}) diff --git a/tests/expr_and_series/list/median_test.py b/tests/expr_and_series/list/median_test.py index 801312a673..b1baa242d7 100644 --- a/tests/expr_and_series/list/median_test.py +++ b/tests/expr_and_series/list/median_test.py @@ -13,14 +13,11 @@ from tests.utils import Constructor, ConstructorEager data = {"a": [[3, None, 2, 2, 4, None], [-1], None, [None, None, None], [], [3, 4, None]]} +expected = [2.5, -1, None, None, None, 3.5] +expected_pyarrow = [2.5, -1, None, None, None, 3] -@pytest.mark.parametrize( - ("index", "expected"), [(0, 2.5), (1, -1), (2, None), (3, None), (4, None), (5, 3.5)] -) -def test_median_expr( - request: pytest.FixtureRequest, constructor: Constructor, index: int, expected: float -) -> None: +def test_median_expr(request: pytest.FixtureRequest, constructor: Constructor) -> None: if any( backend in str(constructor) for backend in ("dask", "cudf", "sqlframe", "ibis") ) or ("polars" in str(constructor) and POLARS_VERSION < (0, 20, 7)): @@ -40,31 +37,21 @@ def test_median_expr( ): # pragma: no cover reason = "The issue only affects old Python versions on Windows." pytest.skip(reason=reason) - result = ( - nw.from_native(constructor(data)) - .select(nw.col("a").cast(nw.List(nw.Int32())).list.median()) - .lazy() - .collect()["a"] - .to_list() + result = nw.from_native(constructor(data)).select( + nw.col("a").cast(nw.List(nw.Int32())).list.median() ) if any( backend in str(constructor) for backend in ("pandas", "pyarrow", "pandas[pyarrow]") - ) and (index == 5): + ): # there is a mismatch as pyarrow uses an approximate median - assert_equal_data({"a": [result[index]]}, {"a": [3]}) + assert_equal_data(result, {"a": expected_pyarrow}) else: - assert_equal_data({"a": [result[index]]}, {"a": [expected]}) + assert_equal_data(result, {"a": expected}) -@pytest.mark.parametrize( - ("index", "expected"), [(0, 2.5), (1, -1), (2, None), (3, None), (4, None), (5, 3.5)] -) def test_median_series( - request: pytest.FixtureRequest, - constructor_eager: ConstructorEager, - index: int, - expected: float, + request: pytest.FixtureRequest, constructor_eager: ConstructorEager ) -> None: if any(backend in str(constructor_eager) for backend in ("cudf",)) or ( "polars" in str(constructor_eager) and POLARS_VERSION < (0, 20, 7) @@ -82,12 +69,12 @@ def test_median_series( reason = "The issue only affects old Python versions on Windows." pytest.skip(reason=reason) df = nw.from_native(constructor_eager(data), eager_only=True) - result = df["a"].cast(nw.List(nw.Int32())).list.median().to_list() + result = df["a"].cast(nw.List(nw.Int32())).list.median() if any( backend in str(constructor_eager) for backend in ("pandas", "pyarrow", "pandas[pyarrow]") - ) and (index == 5): + ): # there is a mismatch as pyarrow uses an approximate median - assert_equal_data({"a": [result[index]]}, {"a": [3]}) + assert_equal_data({"a": result}, {"a": expected_pyarrow}) else: - assert_equal_data({"a": [result[index]]}, {"a": [expected]}) + assert_equal_data({"a": result}, {"a": expected}) diff --git a/tests/expr_and_series/list/min_test.py b/tests/expr_and_series/list/min_test.py index 87d3fe1ca7..2039f7de56 100644 --- a/tests/expr_and_series/list/min_test.py +++ b/tests/expr_and_series/list/min_test.py @@ -11,38 +11,24 @@ from tests.utils import Constructor, ConstructorEager data = {"a": [[3, None, 2, 2, 4, None], [-1], None, [None, None, None], []]} +expected = [2, -1, None, None, None] -@pytest.mark.parametrize( - ("index", "expected"), [(0, 2), (1, -1), (2, None), (3, None), (4, None)] -) -def test_min_expr( - request: pytest.FixtureRequest, constructor: Constructor, index: int, expected: int -) -> None: +def test_min_expr(request: pytest.FixtureRequest, constructor: Constructor) -> None: if any(backend in str(constructor) for backend in ("dask", "cudf")): request.applymarker(pytest.mark.xfail) if "pandas" in str(constructor): if PANDAS_VERSION < (2, 2): pytest.skip() pytest.importorskip("pyarrow") - result = ( - nw.from_native(constructor(data)) - .select(nw.col("a").cast(nw.List(nw.Int32())).list.min()) - .lazy() - .collect()["a"] - .to_list() + result = nw.from_native(constructor(data)).select( + nw.col("a").cast(nw.List(nw.Int32())).list.min() ) - assert_equal_data({"a": [result[index]]}, {"a": [expected]}) + assert_equal_data(result, {"a": expected}) -@pytest.mark.parametrize( - ("index", "expected"), [(0, 2), (1, -1), (2, None), (3, None), (4, None)] -) def test_min_series( - request: pytest.FixtureRequest, - constructor_eager: ConstructorEager, - index: int, - expected: int, + request: pytest.FixtureRequest, constructor_eager: ConstructorEager ) -> None: if any(backend in str(constructor_eager) for backend in ("cudf",)): request.applymarker(pytest.mark.xfail) @@ -52,4 +38,4 @@ def test_min_series( pytest.importorskip("pyarrow") df = nw.from_native(constructor_eager(data), eager_only=True) result = df["a"].cast(nw.List(nw.Int32())).list.min().to_list() - assert_equal_data({"a": [result[index]]}, {"a": [expected]}) + assert_equal_data({"a": result}, {"a": expected}) diff --git a/tests/expr_and_series/list/sum_test.py b/tests/expr_and_series/list/sum_test.py index 90b78bac58..1f0ff7729e 100644 --- a/tests/expr_and_series/list/sum_test.py +++ b/tests/expr_and_series/list/sum_test.py @@ -11,14 +11,10 @@ from tests.utils import Constructor, ConstructorEager data = {"a": [[3, None, 2, 2, 4, None], [-1], None, [None, None, None], []]} +expected = [11, -1, None, 0, 0] -@pytest.mark.parametrize( - ("index", "expected"), [(0, 11), (1, -1), (2, None), (3, 0), (4, 0)] -) -def test_sum_expr( - request: pytest.FixtureRequest, constructor: Constructor, index: int, expected: int -) -> None: +def test_sum_expr(request: pytest.FixtureRequest, constructor: Constructor) -> None: if any(backend in str(constructor) for backend in ("dask", "cudf", "sqlframe")): # sqlframe issue: https://github.com/eakmanrq/sqlframe/issues/548 request.applymarker(pytest.mark.xfail) @@ -29,24 +25,14 @@ def test_sum_expr( if "duckdb" in str(constructor) and DUCKDB_VERSION < (1, 2): reason = "version too old, duckdb 1.2 required for LambdaExpression." pytest.skip(reason=reason) - result = ( - nw.from_native(constructor(data)) - .select(nw.col("a").cast(nw.List(nw.Int32())).list.sum()) - .lazy() - .collect()["a"] - .to_list() + result = nw.from_native(constructor(data)).select( + nw.col("a").cast(nw.List(nw.Int32())).list.sum() ) - assert_equal_data({"a": [result[index]]}, {"a": [expected]}) + assert_equal_data(result, {"a": expected}) -@pytest.mark.parametrize( - ("index", "expected"), [(0, 11), (1, -1), (2, None), (3, 0), (4, 0)] -) def test_sum_series( - request: pytest.FixtureRequest, - constructor_eager: ConstructorEager, - index: int, - expected: int, + request: pytest.FixtureRequest, constructor_eager: ConstructorEager ) -> None: if any(backend in str(constructor_eager) for backend in ("cudf",)): request.applymarker(pytest.mark.xfail) @@ -55,5 +41,5 @@ def test_sum_series( pytest.skip() pytest.importorskip("pyarrow") df = nw.from_native(constructor_eager(data), eager_only=True) - result = df["a"].cast(nw.List(nw.Int32())).list.sum().to_list() - assert_equal_data({"a": [result[index]]}, {"a": [expected]}) + result = df["a"].cast(nw.List(nw.Int32())).list.sum() + assert_equal_data({"a": result}, {"a": expected}) From 398c3509ba7e64b146a696c0b891f756d6394355 Mon Sep 17 00:00:00 2001 From: raisadz <34237447+raisadz@users.noreply.github.com> Date: Sat, 13 Dec 2025 14:22:03 +0000 Subject: [PATCH 34/34] fix typing --- narwhals/_arrow/utils.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/narwhals/_arrow/utils.py b/narwhals/_arrow/utils.py index c46d400658..dbd8aa6c62 100644 --- a/narwhals/_arrow/utils.py +++ b/narwhals/_arrow/utils.py @@ -503,7 +503,7 @@ def list_agg( ) -> ChunkedArrayAny: lit_: Incomplete = lit aggregation = ( - ("values", "sum", pc.ScalarAggregateOptions(min_count=0)) + ("values", func, pc.ScalarAggregateOptions(min_count=0)) if func == "sum" else ("values", func) )