From fadddc3da260c19eaa299593c63eab204315a2fc Mon Sep 17 00:00:00 2001
From: raisadz <34237447+raisadz@users.noreply.github.com>
Date: Fri, 28 Nov 2025 10:52:00 +0000
Subject: [PATCH 01/34] feat: add list aggregate methods

---
 docs/api-reference/expr_list.md           |   5 +
 docs/api-reference/series_list.md         |   5 +
 narwhals/_arrow/series_list.py            |  17 ++-
 narwhals/_arrow/utils.py                  |  16 +++
 narwhals/_compliant/any_namespace.py      |   5 +
 narwhals/_compliant/expr.py               |  15 +++
 narwhals/_duckdb/expr_list.py             |  15 +++
 narwhals/_ibis/expr_list.py               |  15 +++
 narwhals/_pandas_like/series_list.py      |  40 +++++++
 narwhals/_spark_like/expr_list.py         |  19 ++++
 narwhals/expr_list.py                     | 125 ++++++++++++++++++++++
 narwhals/series_list.py                   | 100 +++++++++++++++++
 tests/expr_and_series/list/max_test.py    |  40 +++++++
 tests/expr_and_series/list/mean_test.py   |  39 +++++++
 tests/expr_and_series/list/median_test.py |  40 +++++++
 tests/expr_and_series/list/min_test.py    |  37 +++++++
 tests/expr_and_series/list/sum_test.py    |  40 +++++++
 17 files changed, 572 insertions(+), 1 deletion(-)
 create mode 100644 tests/expr_and_series/list/max_test.py
 create mode 100644 tests/expr_and_series/list/mean_test.py
 create mode 100644 tests/expr_and_series/list/median_test.py
 create mode 100644 tests/expr_and_series/list/min_test.py
 create mode 100644 tests/expr_and_series/list/sum_test.py

diff --git a/docs/api-reference/expr_list.md b/docs/api-reference/expr_list.md
index 84fb831c50..f44a25d751 100644
--- a/docs/api-reference/expr_list.md
+++ b/docs/api-reference/expr_list.md
@@ -7,6 +7,11 @@
         - contains
         - get
         - len
+        - max
+        - mean
+        - median
+        - min
+        - sum
         - unique
       show_source: false
       show_bases: false
diff --git a/docs/api-reference/series_list.md b/docs/api-reference/series_list.md
index 7590732dee..39adbad185 100644
--- a/docs/api-reference/series_list.md
+++ b/docs/api-reference/series_list.md
@@ -7,6 +7,11 @@
         - contains
         - get
         - len
+        - max
+        - mean
+        - median
+        - min
+        - sum
         - unique
       show_source: false
       show_bases: false
diff --git a/narwhals/_arrow/series_list.py b/narwhals/_arrow/series_list.py
index defad3dad6..25e598aedd 100644
--- a/narwhals/_arrow/series_list.py
+++ b/narwhals/_arrow/series_list.py
@@ -5,7 +5,7 @@
 import pyarrow as pa
 import pyarrow.compute as pc
 
-from narwhals._arrow.utils import ArrowSeriesNamespace
+from narwhals._arrow.utils import ArrowSeriesNamespace, list_agg
 from narwhals._compliant.any_namespace import ListNamespace
 from narwhals._utils import not_implemented
 
@@ -20,5 +20,20 @@ def len(self) -> ArrowSeries:
     def get(self, index: int) -> ArrowSeries:
         return self.with_native(pc.list_element(self.native, index))
 
+    def min(self) -> ArrowSeries:
+        return self.with_native(list_agg(self.native, "min"))
+
+    def max(self) -> ArrowSeries:
+        return self.with_native(list_agg(self.native, "max"))
+
+    def mean(self) -> ArrowSeries:
+        return self.with_native(list_agg(self.native, "mean"))
+
+    def median(self) -> ArrowSeries:
+        return self.with_native(list_agg(self.native, "approximate_median"))
+
+    def sum(self) -> ArrowSeries:
+        return self.with_native(list_agg(self.native, "sum"))
+
     unique = not_implemented()
     contains = not_implemented()
diff --git a/narwhals/_arrow/utils.py b/narwhals/_arrow/utils.py
index 46b5985e1d..6ca76eb239 100644
--- a/narwhals/_arrow/utils.py
+++ b/narwhals/_arrow/utils.py
@@ -11,6 +11,7 @@
 
 if TYPE_CHECKING:
     from collections.abc import Iterable, Iterator, Mapping
+    from typing import Literal
 
     from typing_extensions import TypeAlias, TypeIs
 
@@ -494,3 +495,18 @@ def arange(start: int, end: int, step: int) -> ArrayAny:
         return pa.array(np.arange(start, end, step))
     # NOTE: Added in https://github.com/apache/arrow/pull/46778
     return pa.arange(start, end, step)  # type: ignore[attr-defined]
+
+
+def list_agg(
+    array: ChunkedArrayAny,
+    func: Literal["min", "max", "mean", "approximate_median", "sum"],
+) -> ChunkedArrayAny:
+    return (
+        pa.Table.from_arrays(
+            [pc.list_flatten(array), pc.list_parent_indices(array)],
+            names=["values", "offsets"],
+        )
+        .group_by("offsets")
+        .aggregate([("values", func)])
+        .column(f"values_{func}")
+    )
diff --git a/narwhals/_compliant/any_namespace.py b/narwhals/_compliant/any_namespace.py
index b7e48a273f..cd76470d89 100644
--- a/narwhals/_compliant/any_namespace.py
+++ b/narwhals/_compliant/any_namespace.py
@@ -70,6 +70,11 @@ def get(self, index: int) -> CompliantT_co: ...
     def len(self) -> CompliantT_co: ...
     def unique(self) -> CompliantT_co: ...
     def contains(self, item: NonNestedLiteral) -> CompliantT_co: ...
+    def min(self) -> CompliantT_co: ...
+    def max(self) -> CompliantT_co: ...
+    def mean(self) -> CompliantT_co: ...
+    def median(self) -> CompliantT_co: ...
+    def sum(self) -> CompliantT_co: ...
 
 
 class NameNamespace(_StoresCompliant[CompliantT_co], Protocol[CompliantT_co]):
diff --git a/narwhals/_compliant/expr.py b/narwhals/_compliant/expr.py
index cc0d6cc3f8..6ff9417960 100644
--- a/narwhals/_compliant/expr.py
+++ b/narwhals/_compliant/expr.py
@@ -993,6 +993,21 @@ def contains(self, item: NonNestedLiteral) -> EagerExprT:
     def get(self, index: int) -> EagerExprT:
         return self.compliant._reuse_series_namespace("list", "get", index=index)
 
+    def min(self) -> EagerExprT:
+        return self.compliant._reuse_series_namespace("list", "min")
+
+    def max(self) -> EagerExprT:
+        return self.compliant._reuse_series_namespace("list", "max")
+
+    def mean(self) -> EagerExprT:
+        return self.compliant._reuse_series_namespace("list", "mean")
+
+    def median(self) -> EagerExprT:
+        return self.compliant._reuse_series_namespace("list", "median")
+
+    def sum(self) -> EagerExprT:
+        return self.compliant._reuse_series_namespace("list", "sum")
+
 
 class CompliantExprNameNamespace(  # type: ignore[misc]
     _ExprNamespace[CompliantExprT_co],
diff --git a/narwhals/_duckdb/expr_list.py b/narwhals/_duckdb/expr_list.py
index b726f2fc78..184f57252f 100644
--- a/narwhals/_duckdb/expr_list.py
+++ b/narwhals/_duckdb/expr_list.py
@@ -40,3 +40,18 @@ def get(self, index: int) -> DuckDBExpr:
         return self.compliant._with_elementwise(
             lambda expr: F("list_extract", expr, lit(index + 1))
         )
+
+    def min(self) -> DuckDBExpr:
+        return self.compliant._with_elementwise(lambda expr: F("list_min", expr))
+
+    def max(self) -> DuckDBExpr:
+        return self.compliant._with_elementwise(lambda expr: F("list_max", expr))
+
+    def mean(self) -> DuckDBExpr:
+        return self.compliant._with_elementwise(lambda expr: F("list_avg", expr))
+
+    def median(self) -> DuckDBExpr:
+        return self.compliant._with_elementwise(lambda expr: F("list_median", expr))
+
+    def sum(self) -> DuckDBExpr:
+        return self.compliant._with_elementwise(lambda expr: F("list_sum", expr))
diff --git a/narwhals/_ibis/expr_list.py b/narwhals/_ibis/expr_list.py
index 8070769308..ff0cd76c08 100644
--- a/narwhals/_ibis/expr_list.py
+++ b/narwhals/_ibis/expr_list.py
@@ -4,6 +4,7 @@
 
 from narwhals._compliant import LazyExprNamespace
 from narwhals._compliant.any_namespace import ListNamespace
+from narwhals._utils import not_implemented
 
 if TYPE_CHECKING:
     import ibis.expr.types as ir
@@ -27,3 +28,17 @@ def _get(expr: ir.ArrayColumn) -> ir.Column:
             return expr[index]
 
         return self.compliant._with_callable(_get)
+
+    def min(self) -> IbisExpr:
+        return self.compliant._with_callable(lambda expr: expr.mins())
+
+    def max(self) -> IbisExpr:
+        return self.compliant._with_callable(lambda expr: expr.maxs())
+
+    def mean(self) -> IbisExpr:
+        return self.compliant._with_callable(lambda expr: expr.means())
+
+    def sum(self) -> IbisExpr:
+        return self.compliant._with_callable(lambda expr: expr.sums())
+
+    median = not_implemented()
diff --git a/narwhals/_pandas_like/series_list.py b/narwhals/_pandas_like/series_list.py
index 2d087493df..04f424e14d 100644
--- a/narwhals/_pandas_like/series_list.py
+++ b/narwhals/_pandas_like/series_list.py
@@ -11,6 +11,8 @@
 from narwhals._utils import not_implemented
 
 if TYPE_CHECKING:
+    from typing import Literal
+
     from narwhals._pandas_like.series import PandasLikeSeries
 
 
@@ -40,3 +42,41 @@ def get(self, index: int) -> PandasLikeSeries:
         result = self.native.list[index]
         result.name = self.native.name
         return self.with_native(result)
+
+    def _agg(
+        self, func: Literal["min", "max", "mean", "approximate_median", "sum"]
+    ) -> PandasLikeSeries:
+        dtype_backend = get_dtype_backend(
+            self.native.dtype, self.compliant._implementation
+        )
+        if dtype_backend != "pyarrow":
+            msg = "Only pyarrow backend is currently supported."
+            raise NotImplementedError(msg)
+
+        from narwhals._arrow.utils import list_agg, native_to_narwhals_dtype
+
+        ca = self.native.array._pa_array
+        result_arr = list_agg(ca, func)
+        nw_dtype = native_to_narwhals_dtype(result_arr.type, self.version)
+        out_dtype = narwhals_to_native_dtype(
+            nw_dtype, "pyarrow", self.implementation, self.version
+        )
+        result_native = type(self.native)(
+            result_arr, dtype=out_dtype, index=self.native.index, name=self.native.name
+        )
+        return self.with_native(result_native)
+
+    def min(self) -> PandasLikeSeries:
+        return self._agg("min")
+
+    def max(self) -> PandasLikeSeries:
+        return self._agg("max")
+
+    def mean(self) -> PandasLikeSeries:
+        return self._agg("mean")
+
+    def median(self) -> PandasLikeSeries:
+        return self._agg("approximate_median")
+
+    def sum(self) -> PandasLikeSeries:
+        return self._agg("sum")
diff --git a/narwhals/_spark_like/expr_list.py b/narwhals/_spark_like/expr_list.py
index 31be5f5bb9..f8ee83dfe5 100644
--- a/narwhals/_spark_like/expr_list.py
+++ b/narwhals/_spark_like/expr_list.py
@@ -4,6 +4,7 @@
 
 from narwhals._compliant import LazyExprNamespace
 from narwhals._compliant.any_namespace import ListNamespace
+from narwhals._utils import not_implemented
 
 if TYPE_CHECKING:
     from sqlframe.base.column import Column
@@ -33,3 +34,21 @@ def _get(expr: Column) -> Column:
             return expr.getItem(index)
 
         return self.compliant._with_elementwise(_get)
+
+    def min(self) -> SparkLikeExpr:
+        def func(expr: Column) -> Column:
+            F = self.compliant._F
+            return F.array_min(expr)
+
+        return self.compliant._with_elementwise(func)
+
+    def max(self) -> SparkLikeExpr:
+        def func(expr: Column) -> Column:
+            F = self.compliant._F
+            return F.array_max(F.array_compact(expr))
+
+        return self.compliant._with_elementwise(func)
+
+    mean = not_implemented()
+    median = not_implemented()
+    sum = not_implemented()
diff --git a/narwhals/expr_list.py b/narwhals/expr_list.py
index 8f9c94c6ab..94e4c4f38d 100644
--- a/narwhals/expr_list.py
+++ b/narwhals/expr_list.py
@@ -143,3 +143,128 @@ def get(self, index: int) -> ExprT:
         return self._expr._append_node(
             ExprNode(ExprKind.ELEMENTWISE, "list.get", index=index)
         )
+
+    def min(self) -> ExprT:
+        """Compute the min value of the lists in the array.
+
+        Examples:
+            >>> import polars as pl
+            >>> import narwhals as nw
+            >>> df_native = pl.DataFrame({"a": [[1], [3, 4, None]]})
+            >>> df = nw.from_native(df_native)
+            >>> df.with_columns(a_min=nw.col("a").list.min())
+            ┌────────────────────────┐
+            |   Narwhals DataFrame   |
+            |------------------------|
+            |shape: (2, 2)           |
+            |┌──────────────┬───────┐|
+            |│ a            ┆ a_min │|
+            |│ ---          ┆ ---   │|
+            |│ list[i64]    ┆ i64   │|
+            |╞══════════════╪═══════╡|
+            |│ [1]          ┆ 1     │|
+            |│ [3, 4, null] ┆ 3     │|
+            |└──────────────┴───────┘|
+            └────────────────────────┘
+        """
+        return self._expr._append_node(ExprNode(ExprKind.ELEMENTWISE, "list.min"))
+
+    def max(self) -> ExprT:
+        """Compute the max value of the lists in the array.
+
+        Examples:
+            >>> import polars as pl
+            >>> import narwhals as nw
+            >>> df_native = pl.DataFrame({"a": [[1], [3, 4, None]]})
+            >>> df = nw.from_native(df_native)
+            >>> df.with_columns(a_max=nw.col("a").list.max())
+            ┌────────────────────────┐
+            |   Narwhals DataFrame   |
+            |------------------------|
+            |shape: (2, 2)           |
+            |┌──────────────┬───────┐|
+            |│ a            ┆ a_max │|
+            |│ ---          ┆ ---   │|
+            |│ list[i64]    ┆ i64   │|
+            |╞══════════════╪═══════╡|
+            |│ [1]          ┆ 1     │|
+            |│ [3, 4, null] ┆ 4     │|
+            |└──────────────┴───────┘|
+            └────────────────────────┘
+        """
+        return self._expr._append_node(ExprNode(ExprKind.ELEMENTWISE, "list.max"))
+
+    def mean(self) -> ExprT:
+        """Compute the mean value of the lists in the array.
+
+        Examples:
+            >>> import polars as pl
+            >>> import narwhals as nw
+            >>> df_native = pl.DataFrame({"a": [[1], [3, 4, None]]})
+            >>> df = nw.from_native(df_native)
+            >>> df.with_columns(a_mean=nw.col("a").list.mean())
+            ┌─────────────────────────┐
+            |   Narwhals DataFrame    |
+            |-------------------------|
+            |shape: (2, 2)            |
+            |┌──────────────┬────────┐|
+            |│ a            ┆ a_mean │|
+            |│ ---          ┆ ---    │|
+            |│ list[i64]    ┆ f64    │|
+            |╞══════════════╪════════╡|
+            |│ [1]          ┆ 1.0    │|
+            |│ [3, 4, null] ┆ 3.5    │|
+            |└──────────────┴────────┘|
+            └─────────────────────────┘
+        """
+        return self._expr._append_node(ExprNode(ExprKind.ELEMENTWISE, "list.mean"))
+
+    def median(self) -> ExprT:
+        """Compute the median value of the lists in the array.
+
+        Examples:
+            >>> import polars as pl
+            >>> import narwhals as nw
+            >>> df_native = pl.DataFrame({"a": [[1], [3, 4, None]]})
+            >>> df = nw.from_native(df_native)
+            >>> df.with_columns(a_median=nw.col("a").list.median())
+            ┌───────────────────────────┐
+            |    Narwhals DataFrame     |
+            |---------------------------|
+            |shape: (2, 2)              |
+            |┌──────────────┬──────────┐|
+            |│ a            ┆ a_median │|
+            |│ ---          ┆ ---      │|
+            |│ list[i64]    ┆ f64      │|
+            |╞══════════════╪══════════╡|
+            |│ [1]          ┆ 1.0      │|
+            |│ [3, 4, null] ┆ 3.5      │|
+            |└──────────────┴──────────┘|
+            └───────────────────────────┘
+        """
+        return self._expr._append_node(ExprNode(ExprKind.ELEMENTWISE, "list.median"))
+
+    def sum(self) -> ExprT:
+        """Compute the sum value of the lists in the array.
+
+        Examples:
+            >>> import polars as pl
+            >>> import narwhals as nw
+            >>> df_native = pl.DataFrame({"a": [[1], [3, 4, None]]})
+            >>> df = nw.from_native(df_native)
+            >>> df.with_columns(a_sum=nw.col("a").list.sum())
+            ┌────────────────────────┐
+            |   Narwhals DataFrame   |
+            |------------------------|
+            |shape: (2, 2)           |
+            |┌──────────────┬───────┐|
+            |│ a            ┆ a_sum │|
+            |│ ---          ┆ ---   │|
+            |│ list[i64]    ┆ i64   │|
+            |╞══════════════╪═══════╡|
+            |│ [1]          ┆ 1     │|
+            |│ [3, 4, null] ┆ 7     │|
+            |└──────────────┴───────┘|
+            └────────────────────────┘
+        """
+        return self._expr._append_node(ExprNode(ExprKind.ELEMENTWISE, "list.sum"))
diff --git a/narwhals/series_list.py b/narwhals/series_list.py
index baa7ed8c8e..a771ff1c59 100644
--- a/narwhals/series_list.py
+++ b/narwhals/series_list.py
@@ -117,3 +117,103 @@ def get(self, index: int) -> SeriesT:
         return self._narwhals_series._with_compliant(
             self._narwhals_series._compliant_series.list.get(index)
         )
+
+    def min(self) -> SeriesT:
+        """Compute the min value of the lists in the array.
+
+        Examples:
+            >>> import polars as pl
+            >>> import narwhals as nw
+            >>> s_native = pl.Series([[1], [3, 4, None]])
+            >>> s = nw.from_native(s_native, series_only=True)
+            >>> s.list.min().to_native()  # doctest: +NORMALIZE_WHITESPACE
+            shape: (2,)
+            Series: '' [i64]
+            [
+                    1
+                    3
+            ]
+        """
+        return self._narwhals_series._with_compliant(
+            self._narwhals_series._compliant_series.list.min()
+        )
+
+    def max(self) -> SeriesT:
+        """Compute the max value of the lists in the array.
+
+        Examples:
+            >>> import polars as pl
+            >>> import narwhals as nw
+            >>> s_native = pl.Series([[1], [3, 4, None]])
+            >>> s = nw.from_native(s_native, series_only=True)
+            >>> s.list.max().to_native()  # doctest: +NORMALIZE_WHITESPACE
+            shape: (2,)
+            Series: '' [i64]
+            [
+                    1
+                    4
+            ]
+        """
+        return self._narwhals_series._with_compliant(
+            self._narwhals_series._compliant_series.list.max()
+        )
+
+    def mean(self) -> SeriesT:
+        """Compute the mean value of the lists in the array.
+
+        Examples:
+            >>> import polars as pl
+            >>> import narwhals as nw
+            >>> s_native = pl.Series([[1], [3, 4, None]])
+            >>> s = nw.from_native(s_native, series_only=True)
+            >>> s.list.mean().to_native()  # doctest: +NORMALIZE_WHITESPACE
+            shape: (2,)
+            Series: '' [f64]
+            [
+                    1.0
+                    3.5
+            ]
+        """
+        return self._narwhals_series._with_compliant(
+            self._narwhals_series._compliant_series.list.mean()
+        )
+
+    def median(self) -> SeriesT:
+        """Compute the median value of the lists in the array.
+
+        Examples:
+            >>> import polars as pl
+            >>> import narwhals as nw
+            >>> s_native = pl.Series([[1], [3, 4, None]])
+            >>> s = nw.from_native(s_native, series_only=True)
+            >>> s.list.median().to_native()  # doctest: +NORMALIZE_WHITESPACE
+            shape: (2,)
+            Series: '' [f64]
+            [
+                    1.0
+                    3.5
+            ]
+        """
+        return self._narwhals_series._with_compliant(
+            self._narwhals_series._compliant_series.list.median()
+        )
+
+    def sum(self) -> SeriesT:
+        """Compute the sum value of the lists in the array.
+
+        Examples:
+            >>> import polars as pl
+            >>> import narwhals as nw
+            >>> s_native = pl.Series([[1], [3, 4, None]])
+            >>> s = nw.from_native(s_native, series_only=True)
+            >>> s.list.sum().to_native()  # doctest: +NORMALIZE_WHITESPACE
+            shape: (2,)
+            Series: '' [i64]
+            [
+                    1
+                    7
+            ]
+        """
+        return self._narwhals_series._with_compliant(
+            self._narwhals_series._compliant_series.list.sum()
+        )
diff --git a/tests/expr_and_series/list/max_test.py b/tests/expr_and_series/list/max_test.py
new file mode 100644
index 0000000000..152fa77719
--- /dev/null
+++ b/tests/expr_and_series/list/max_test.py
@@ -0,0 +1,40 @@
+from __future__ import annotations
+
+from typing import TYPE_CHECKING
+
+import pytest
+
+import narwhals as nw
+
+if TYPE_CHECKING:
+    from tests.utils import Constructor, ConstructorEager
+
+data = {"a": [[3, 2, 2, 4, None], [-1]]}
+
+
+def test_max_expr(request: pytest.FixtureRequest, constructor: Constructor) -> None:
+    if any(
+        backend in str(constructor) for backend in ("dask", "modin", "cudf", "sqlframe")
+    ):
+        # sqlframe issue: https://github.com/eakmanrq/sqlframe/issues/548
+        request.applymarker(pytest.mark.xfail)
+    result = (
+        nw.from_native(constructor(data))
+        .select(nw.col("a").cast(nw.List(nw.Int32())).list.max())
+        .lazy()
+        .collect()["a"]
+        .to_list()
+    )
+    assert result[0] == 4
+    assert result[1] == -1
+
+
+def test_max_series(
+    request: pytest.FixtureRequest, constructor_eager: ConstructorEager
+) -> None:
+    if any(backend in str(constructor_eager) for backend in ("modin", "cudf")):
+        request.applymarker(pytest.mark.xfail)
+    df = nw.from_native(constructor_eager(data), eager_only=True)
+    result = df["a"].cast(nw.List(nw.Int32())).list.max().to_list()
+    assert result[0] == 4
+    assert result[1] == -1
diff --git a/tests/expr_and_series/list/mean_test.py b/tests/expr_and_series/list/mean_test.py
new file mode 100644
index 0000000000..aa657a3574
--- /dev/null
+++ b/tests/expr_and_series/list/mean_test.py
@@ -0,0 +1,39 @@
+from __future__ import annotations
+
+from typing import TYPE_CHECKING
+
+import pytest
+
+import narwhals as nw
+
+if TYPE_CHECKING:
+    from tests.utils import Constructor, ConstructorEager
+
+data = {"a": [[3, 2, 2, 4, None], [-1]]}
+
+
+def test_mean_expr(request: pytest.FixtureRequest, constructor: Constructor) -> None:
+    if any(
+        backend in str(constructor) for backend in ("dask", "modin", "cudf", "sqlframe")
+    ):
+        request.applymarker(pytest.mark.xfail)
+    result = (
+        nw.from_native(constructor(data))
+        .select(nw.col("a").cast(nw.List(nw.Int32())).list.mean())
+        .lazy()
+        .collect()["a"]
+        .to_list()
+    )
+    assert result[0] == 2.75
+    assert result[1] == -1
+
+
+def test_mean_series(
+    request: pytest.FixtureRequest, constructor_eager: ConstructorEager
+) -> None:
+    if any(backend in str(constructor_eager) for backend in ("modin", "cudf")):
+        request.applymarker(pytest.mark.xfail)
+    df = nw.from_native(constructor_eager(data), eager_only=True)
+    result = df["a"].cast(nw.List(nw.Int32())).list.mean().to_list()
+    assert result[0] == 2.75
+    assert result[1] == -1
diff --git a/tests/expr_and_series/list/median_test.py b/tests/expr_and_series/list/median_test.py
new file mode 100644
index 0000000000..cc253b53f1
--- /dev/null
+++ b/tests/expr_and_series/list/median_test.py
@@ -0,0 +1,40 @@
+from __future__ import annotations
+
+from typing import TYPE_CHECKING
+
+import pytest
+
+import narwhals as nw
+
+if TYPE_CHECKING:
+    from tests.utils import Constructor, ConstructorEager
+
+data = {"a": [[3, 2, 2, 4, None], [-1]]}
+
+
+def test_median_expr(request: pytest.FixtureRequest, constructor: Constructor) -> None:
+    if any(
+        backend in str(constructor)
+        for backend in ("dask", "modin", "cudf", "sqlframe", "ibis")
+    ):
+        request.applymarker(pytest.mark.xfail)
+    result = (
+        nw.from_native(constructor(data))
+        .select(nw.col("a").cast(nw.List(nw.Int32())).list.median())
+        .lazy()
+        .collect()["a"]
+        .to_list()
+    )
+    assert result[0] == 2.5
+    assert result[1] == -1
+
+
+def test_median_series(
+    request: pytest.FixtureRequest, constructor_eager: ConstructorEager
+) -> None:
+    if any(backend in str(constructor_eager) for backend in ("modin", "cudf")):
+        request.applymarker(pytest.mark.xfail)
+    df = nw.from_native(constructor_eager(data), eager_only=True)
+    result = df["a"].cast(nw.List(nw.Int32())).list.median().to_list()
+    assert result[0] == 2.5
+    assert result[1] == -1
diff --git a/tests/expr_and_series/list/min_test.py b/tests/expr_and_series/list/min_test.py
new file mode 100644
index 0000000000..fc0df66805
--- /dev/null
+++ b/tests/expr_and_series/list/min_test.py
@@ -0,0 +1,37 @@
+from __future__ import annotations
+
+from typing import TYPE_CHECKING
+
+import pytest
+
+import narwhals as nw
+
+if TYPE_CHECKING:
+    from tests.utils import Constructor, ConstructorEager
+
+data = {"a": [[3, 2, 2, 4, None], [-1]]}
+
+
+def test_min_expr(request: pytest.FixtureRequest, constructor: Constructor) -> None:
+    if any(backend in str(constructor) for backend in ("dask", "modin", "cudf")):
+        request.applymarker(pytest.mark.xfail)
+    result = (
+        nw.from_native(constructor(data))
+        .select(nw.col("a").cast(nw.List(nw.Int32())).list.min())
+        .lazy()
+        .collect()["a"]
+        .to_list()
+    )
+    assert result[0] == 2
+    assert result[1] == -1
+
+
+def test_min_series(
+    request: pytest.FixtureRequest, constructor_eager: ConstructorEager
+) -> None:
+    if any(backend in str(constructor_eager) for backend in ("modin", "cudf")):
+        request.applymarker(pytest.mark.xfail)
+    df = nw.from_native(constructor_eager(data), eager_only=True)
+    result = df["a"].cast(nw.List(nw.Int32())).list.min().to_list()
+    assert result[0] == 2
+    assert result[1] == -1
diff --git a/tests/expr_and_series/list/sum_test.py b/tests/expr_and_series/list/sum_test.py
new file mode 100644
index 0000000000..d1f82ce622
--- /dev/null
+++ b/tests/expr_and_series/list/sum_test.py
@@ -0,0 +1,40 @@
+from __future__ import annotations
+
+from typing import TYPE_CHECKING
+
+import pytest
+
+import narwhals as nw
+
+if TYPE_CHECKING:
+    from tests.utils import Constructor, ConstructorEager
+
+data = {"a": [[3, 2, 2, 4, None], [-1]]}
+
+
+def test_sum_expr(request: pytest.FixtureRequest, constructor: Constructor) -> None:
+    if any(
+        backend in str(constructor) for backend in ("dask", "modin", "cudf", "sqlframe")
+    ):
+        # sqlframe issue: https://github.com/eakmanrq/sqlframe/issues/548
+        request.applymarker(pytest.mark.xfail)
+    result = (
+        nw.from_native(constructor(data))
+        .select(nw.col("a").cast(nw.List(nw.Int32())).list.sum())
+        .lazy()
+        .collect()["a"]
+        .to_list()
+    )
+    assert result[0] == 11
+    assert result[1] == -1
+
+
+def test_sum_series(
+    request: pytest.FixtureRequest, constructor_eager: ConstructorEager
+) -> None:
+    if any(backend in str(constructor_eager) for backend in ("modin", "cudf")):
+        request.applymarker(pytest.mark.xfail)
+    df = nw.from_native(constructor_eager(data), eager_only=True)
+    result = df["a"].cast(nw.List(nw.Int32())).list.sum().to_list()
+    assert result[0] == 11
+    assert result[1] == -1

From 040527b45b98b2e5918f622ac4dd2d0c34a544f0 Mon Sep 17 00:00:00 2001
From: raisadz <34237447+raisadz@users.noreply.github.com>
Date: Fri, 28 Nov 2025 11:36:23 +0000
Subject: [PATCH 02/34] xfail old pandas and skip if no pyarrow

---
 tests/expr_and_series/list/max_test.py    | 9 +++++++++
 tests/expr_and_series/list/mean_test.py   | 9 +++++++++
 tests/expr_and_series/list/median_test.py | 9 +++++++++
 tests/expr_and_series/list/min_test.py    | 9 +++++++++
 tests/expr_and_series/list/sum_test.py    | 9 +++++++++
 5 files changed, 45 insertions(+)

diff --git a/tests/expr_and_series/list/max_test.py b/tests/expr_and_series/list/max_test.py
index 152fa77719..7b9fe5ac4a 100644
--- a/tests/expr_and_series/list/max_test.py
+++ b/tests/expr_and_series/list/max_test.py
@@ -5,6 +5,7 @@
 import pytest
 
 import narwhals as nw
+from tests.utils import PANDAS_VERSION
 
 if TYPE_CHECKING:
     from tests.utils import Constructor, ConstructorEager
@@ -18,6 +19,10 @@ def test_max_expr(request: pytest.FixtureRequest, constructor: Constructor) -> N
     ):
         # sqlframe issue: https://github.com/eakmanrq/sqlframe/issues/548
         request.applymarker(pytest.mark.xfail)
+    if "pandas" in str(constructor):
+        if PANDAS_VERSION < (2, 2):
+            pytest.skip()
+        pytest.importorskip("pyarrow")
     result = (
         nw.from_native(constructor(data))
         .select(nw.col("a").cast(nw.List(nw.Int32())).list.max())
@@ -34,6 +39,10 @@ def test_max_series(
 ) -> None:
     if any(backend in str(constructor_eager) for backend in ("modin", "cudf")):
         request.applymarker(pytest.mark.xfail)
+    if "pandas" in str(constructor_eager):
+        if PANDAS_VERSION < (2, 2):
+            pytest.skip()
+        pytest.importorskip("pyarrow")
     df = nw.from_native(constructor_eager(data), eager_only=True)
     result = df["a"].cast(nw.List(nw.Int32())).list.max().to_list()
     assert result[0] == 4
diff --git a/tests/expr_and_series/list/mean_test.py b/tests/expr_and_series/list/mean_test.py
index aa657a3574..4f05090b81 100644
--- a/tests/expr_and_series/list/mean_test.py
+++ b/tests/expr_and_series/list/mean_test.py
@@ -5,6 +5,7 @@
 import pytest
 
 import narwhals as nw
+from tests.utils import PANDAS_VERSION
 
 if TYPE_CHECKING:
     from tests.utils import Constructor, ConstructorEager
@@ -17,6 +18,10 @@ def test_mean_expr(request: pytest.FixtureRequest, constructor: Constructor) ->
         backend in str(constructor) for backend in ("dask", "modin", "cudf", "sqlframe")
     ):
         request.applymarker(pytest.mark.xfail)
+    if "pandas" in str(constructor):
+        if PANDAS_VERSION < (2, 2):
+            pytest.skip()
+        pytest.importorskip("pyarrow")
     result = (
         nw.from_native(constructor(data))
         .select(nw.col("a").cast(nw.List(nw.Int32())).list.mean())
@@ -33,6 +38,10 @@ def test_mean_series(
 ) -> None:
     if any(backend in str(constructor_eager) for backend in ("modin", "cudf")):
         request.applymarker(pytest.mark.xfail)
+    if "pandas" in str(constructor_eager):
+        if PANDAS_VERSION < (2, 2):
+            pytest.skip()
+        pytest.importorskip("pyarrow")
     df = nw.from_native(constructor_eager(data), eager_only=True)
     result = df["a"].cast(nw.List(nw.Int32())).list.mean().to_list()
     assert result[0] == 2.75
diff --git a/tests/expr_and_series/list/median_test.py b/tests/expr_and_series/list/median_test.py
index cc253b53f1..760f817ba7 100644
--- a/tests/expr_and_series/list/median_test.py
+++ b/tests/expr_and_series/list/median_test.py
@@ -5,6 +5,7 @@
 import pytest
 
 import narwhals as nw
+from tests.utils import PANDAS_VERSION
 
 if TYPE_CHECKING:
     from tests.utils import Constructor, ConstructorEager
@@ -18,6 +19,10 @@ def test_median_expr(request: pytest.FixtureRequest, constructor: Constructor) -
         for backend in ("dask", "modin", "cudf", "sqlframe", "ibis")
     ):
         request.applymarker(pytest.mark.xfail)
+    if "pandas" in str(constructor):
+        if PANDAS_VERSION < (2, 2):
+            pytest.skip()
+        pytest.importorskip("pyarrow")
     result = (
         nw.from_native(constructor(data))
         .select(nw.col("a").cast(nw.List(nw.Int32())).list.median())
@@ -34,6 +39,10 @@ def test_median_series(
 ) -> None:
     if any(backend in str(constructor_eager) for backend in ("modin", "cudf")):
         request.applymarker(pytest.mark.xfail)
+    if "pandas" in str(constructor_eager):
+        if PANDAS_VERSION < (2, 2):
+            pytest.skip()
+        pytest.importorskip("pyarrow")
     df = nw.from_native(constructor_eager(data), eager_only=True)
     result = df["a"].cast(nw.List(nw.Int32())).list.median().to_list()
     assert result[0] == 2.5
diff --git a/tests/expr_and_series/list/min_test.py b/tests/expr_and_series/list/min_test.py
index fc0df66805..ab98f066b6 100644
--- a/tests/expr_and_series/list/min_test.py
+++ b/tests/expr_and_series/list/min_test.py
@@ -5,6 +5,7 @@
 import pytest
 
 import narwhals as nw
+from tests.utils import PANDAS_VERSION
 
 if TYPE_CHECKING:
     from tests.utils import Constructor, ConstructorEager
@@ -15,6 +16,10 @@
 def test_min_expr(request: pytest.FixtureRequest, constructor: Constructor) -> None:
     if any(backend in str(constructor) for backend in ("dask", "modin", "cudf")):
         request.applymarker(pytest.mark.xfail)
+    if "pandas" in str(constructor):
+        if PANDAS_VERSION < (2, 2):
+            pytest.skip()
+        pytest.importorskip("pyarrow")
     result = (
         nw.from_native(constructor(data))
         .select(nw.col("a").cast(nw.List(nw.Int32())).list.min())
@@ -31,6 +36,10 @@ def test_min_series(
 ) -> None:
     if any(backend in str(constructor_eager) for backend in ("modin", "cudf")):
         request.applymarker(pytest.mark.xfail)
+    if "pandas" in str(constructor_eager):
+        if PANDAS_VERSION < (2, 2):
+            pytest.skip()
+        pytest.importorskip("pyarrow")
     df = nw.from_native(constructor_eager(data), eager_only=True)
     result = df["a"].cast(nw.List(nw.Int32())).list.min().to_list()
     assert result[0] == 2
diff --git a/tests/expr_and_series/list/sum_test.py b/tests/expr_and_series/list/sum_test.py
index d1f82ce622..e68a1720e7 100644
--- a/tests/expr_and_series/list/sum_test.py
+++ b/tests/expr_and_series/list/sum_test.py
@@ -5,6 +5,7 @@
 import pytest
 
 import narwhals as nw
+from tests.utils import PANDAS_VERSION
 
 if TYPE_CHECKING:
     from tests.utils import Constructor, ConstructorEager
@@ -18,6 +19,10 @@ def test_sum_expr(request: pytest.FixtureRequest, constructor: Constructor) -> N
     ):
         # sqlframe issue: https://github.com/eakmanrq/sqlframe/issues/548
         request.applymarker(pytest.mark.xfail)
+    if "pandas" in str(constructor):
+        if PANDAS_VERSION < (2, 2):
+            pytest.skip()
+        pytest.importorskip("pyarrow")
     result = (
         nw.from_native(constructor(data))
         .select(nw.col("a").cast(nw.List(nw.Int32())).list.sum())
@@ -34,6 +39,10 @@ def test_sum_series(
 ) -> None:
     if any(backend in str(constructor_eager) for backend in ("modin", "cudf")):
         request.applymarker(pytest.mark.xfail)
+    if "pandas" in str(constructor_eager):
+        if PANDAS_VERSION < (2, 2):
+            pytest.skip()
+        pytest.importorskip("pyarrow")
     df = nw.from_native(constructor_eager(data), eager_only=True)
     result = df["a"].cast(nw.List(nw.Int32())).list.sum().to_list()
     assert result[0] == 11

From 9b4555537ae20d987d8432d69ea6431837fe6a83 Mon Sep 17 00:00:00 2001
From: raisadz <34237447+raisadz@users.noreply.github.com>
Date: Fri, 28 Nov 2025 11:38:00 +0000
Subject: [PATCH 03/34] add the new methods to the polars list namespace

---
 narwhals/_polars/utils.py | 10 ++++++++++
 1 file changed, 10 insertions(+)

diff --git a/narwhals/_polars/utils.py b/narwhals/_polars/utils.py
index d638c791fd..f2081f99dc 100644
--- a/narwhals/_polars/utils.py
+++ b/narwhals/_polars/utils.py
@@ -362,6 +362,16 @@ def len(self) -> CompliantT: ...
 
     unique: Method[CompliantT]
 
+    max: Method[CompliantT]
+
+    mean: Method[CompliantT]
+
+    median: Method[CompliantT]
+
+    min: Method[CompliantT]
+
+    sum: Method[CompliantT]
+
 
 class PolarsStructNamespace(PolarsAnyNamespace[CompliantT, NativeT_co]):
     _accessor: ClassVar[Accessor] = "struct"

From 7bdd2d2c4a6cc3c95b327b8ad44e791e8af16ce4 Mon Sep 17 00:00:00 2001
From: raisadz <34237447+raisadz@users.noreply.github.com>
Date: Fri, 28 Nov 2025 11:53:25 +0000
Subject: [PATCH 04/34] xfail old polars for median and pyspark for non
 implemented methods

---
 tests/expr_and_series/list/mean_test.py   |  3 ++-
 tests/expr_and_series/list/median_test.py | 10 ++++++----
 tests/expr_and_series/list/sum_test.py    |  3 ++-
 3 files changed, 10 insertions(+), 6 deletions(-)

diff --git a/tests/expr_and_series/list/mean_test.py b/tests/expr_and_series/list/mean_test.py
index 4f05090b81..ba77450fb9 100644
--- a/tests/expr_and_series/list/mean_test.py
+++ b/tests/expr_and_series/list/mean_test.py
@@ -15,7 +15,8 @@
 
 def test_mean_expr(request: pytest.FixtureRequest, constructor: Constructor) -> None:
     if any(
-        backend in str(constructor) for backend in ("dask", "modin", "cudf", "sqlframe")
+        backend in str(constructor)
+        for backend in ("dask", "modin", "cudf", "sqlframe", "pyspark")
     ):
         request.applymarker(pytest.mark.xfail)
     if "pandas" in str(constructor):
diff --git a/tests/expr_and_series/list/median_test.py b/tests/expr_and_series/list/median_test.py
index 760f817ba7..d08516551d 100644
--- a/tests/expr_and_series/list/median_test.py
+++ b/tests/expr_and_series/list/median_test.py
@@ -5,7 +5,7 @@
 import pytest
 
 import narwhals as nw
-from tests.utils import PANDAS_VERSION
+from tests.utils import PANDAS_VERSION, POLARS_VERSION
 
 if TYPE_CHECKING:
     from tests.utils import Constructor, ConstructorEager
@@ -16,8 +16,8 @@
 def test_median_expr(request: pytest.FixtureRequest, constructor: Constructor) -> None:
     if any(
         backend in str(constructor)
-        for backend in ("dask", "modin", "cudf", "sqlframe", "ibis")
-    ):
+        for backend in ("dask", "modin", "cudf", "sqlframe", "ibis", "pyspark")
+    ) or ("polars" in str(constructor) and POLARS_VERSION < (0, 20, 7)):
         request.applymarker(pytest.mark.xfail)
     if "pandas" in str(constructor):
         if PANDAS_VERSION < (2, 2):
@@ -37,7 +37,9 @@ def test_median_expr(request: pytest.FixtureRequest, constructor: Constructor) -
 def test_median_series(
     request: pytest.FixtureRequest, constructor_eager: ConstructorEager
 ) -> None:
-    if any(backend in str(constructor_eager) for backend in ("modin", "cudf")):
+    if any(backend in str(constructor_eager) for backend in ("modin", "cudf")) or (
+        "polars" in str(constructor_eager) and POLARS_VERSION < (0, 20, 7)
+    ):
         request.applymarker(pytest.mark.xfail)
     if "pandas" in str(constructor_eager):
         if PANDAS_VERSION < (2, 2):
diff --git a/tests/expr_and_series/list/sum_test.py b/tests/expr_and_series/list/sum_test.py
index e68a1720e7..9760a09c71 100644
--- a/tests/expr_and_series/list/sum_test.py
+++ b/tests/expr_and_series/list/sum_test.py
@@ -15,7 +15,8 @@
 
 def test_sum_expr(request: pytest.FixtureRequest, constructor: Constructor) -> None:
     if any(
-        backend in str(constructor) for backend in ("dask", "modin", "cudf", "sqlframe")
+        backend in str(constructor)
+        for backend in ("dask", "modin", "cudf", "sqlframe", "pyspark")
     ):
         # sqlframe issue: https://github.com/eakmanrq/sqlframe/issues/548
         request.applymarker(pytest.mark.xfail)

From 74934c0c8dbea3caece6904fc63628e13ce73906 Mon Sep 17 00:00:00 2001
From: raisadz <34237447+raisadz@users.noreply.github.com>
Date: Fri, 28 Nov 2025 11:56:52 +0000
Subject: [PATCH 05/34] unxfail modin

---
 tests/expr_and_series/list/max_test.py    | 6 ++----
 tests/expr_and_series/list/mean_test.py   | 5 ++---
 tests/expr_and_series/list/median_test.py | 4 ++--
 tests/expr_and_series/list/min_test.py    | 4 ++--
 tests/expr_and_series/list/sum_test.py    | 5 ++---
 5 files changed, 10 insertions(+), 14 deletions(-)

diff --git a/tests/expr_and_series/list/max_test.py b/tests/expr_and_series/list/max_test.py
index 7b9fe5ac4a..8f112410cd 100644
--- a/tests/expr_and_series/list/max_test.py
+++ b/tests/expr_and_series/list/max_test.py
@@ -14,9 +14,7 @@
 
 
 def test_max_expr(request: pytest.FixtureRequest, constructor: Constructor) -> None:
-    if any(
-        backend in str(constructor) for backend in ("dask", "modin", "cudf", "sqlframe")
-    ):
+    if any(backend in str(constructor) for backend in ("dask", "cudf", "sqlframe")):
         # sqlframe issue: https://github.com/eakmanrq/sqlframe/issues/548
         request.applymarker(pytest.mark.xfail)
     if "pandas" in str(constructor):
@@ -37,7 +35,7 @@ def test_max_expr(request: pytest.FixtureRequest, constructor: Constructor) -> N
 def test_max_series(
     request: pytest.FixtureRequest, constructor_eager: ConstructorEager
 ) -> None:
-    if any(backend in str(constructor_eager) for backend in ("modin", "cudf")):
+    if any(backend in str(constructor_eager) for backend in ("cudf",)):
         request.applymarker(pytest.mark.xfail)
     if "pandas" in str(constructor_eager):
         if PANDAS_VERSION < (2, 2):
diff --git a/tests/expr_and_series/list/mean_test.py b/tests/expr_and_series/list/mean_test.py
index ba77450fb9..cbe09f1b64 100644
--- a/tests/expr_and_series/list/mean_test.py
+++ b/tests/expr_and_series/list/mean_test.py
@@ -15,8 +15,7 @@
 
 def test_mean_expr(request: pytest.FixtureRequest, constructor: Constructor) -> None:
     if any(
-        backend in str(constructor)
-        for backend in ("dask", "modin", "cudf", "sqlframe", "pyspark")
+        backend in str(constructor) for backend in ("dask", "cudf", "sqlframe", "pyspark")
     ):
         request.applymarker(pytest.mark.xfail)
     if "pandas" in str(constructor):
@@ -37,7 +36,7 @@ def test_mean_expr(request: pytest.FixtureRequest, constructor: Constructor) ->
 def test_mean_series(
     request: pytest.FixtureRequest, constructor_eager: ConstructorEager
 ) -> None:
-    if any(backend in str(constructor_eager) for backend in ("modin", "cudf")):
+    if any(backend in str(constructor_eager) for backend in ("cudf",)):
         request.applymarker(pytest.mark.xfail)
     if "pandas" in str(constructor_eager):
         if PANDAS_VERSION < (2, 2):
diff --git a/tests/expr_and_series/list/median_test.py b/tests/expr_and_series/list/median_test.py
index d08516551d..d9e577f5ad 100644
--- a/tests/expr_and_series/list/median_test.py
+++ b/tests/expr_and_series/list/median_test.py
@@ -16,7 +16,7 @@
 def test_median_expr(request: pytest.FixtureRequest, constructor: Constructor) -> None:
     if any(
         backend in str(constructor)
-        for backend in ("dask", "modin", "cudf", "sqlframe", "ibis", "pyspark")
+        for backend in ("dask", "cudf", "sqlframe", "ibis", "pyspark")
     ) or ("polars" in str(constructor) and POLARS_VERSION < (0, 20, 7)):
         request.applymarker(pytest.mark.xfail)
     if "pandas" in str(constructor):
@@ -37,7 +37,7 @@ def test_median_expr(request: pytest.FixtureRequest, constructor: Constructor) -
 def test_median_series(
     request: pytest.FixtureRequest, constructor_eager: ConstructorEager
 ) -> None:
-    if any(backend in str(constructor_eager) for backend in ("modin", "cudf")) or (
+    if any(backend in str(constructor_eager) for backend in ("cudf",)) or (
         "polars" in str(constructor_eager) and POLARS_VERSION < (0, 20, 7)
     ):
         request.applymarker(pytest.mark.xfail)
diff --git a/tests/expr_and_series/list/min_test.py b/tests/expr_and_series/list/min_test.py
index ab98f066b6..d2655c00a1 100644
--- a/tests/expr_and_series/list/min_test.py
+++ b/tests/expr_and_series/list/min_test.py
@@ -14,7 +14,7 @@
 
 
 def test_min_expr(request: pytest.FixtureRequest, constructor: Constructor) -> None:
-    if any(backend in str(constructor) for backend in ("dask", "modin", "cudf")):
+    if any(backend in str(constructor) for backend in ("dask", "cudf")):
         request.applymarker(pytest.mark.xfail)
     if "pandas" in str(constructor):
         if PANDAS_VERSION < (2, 2):
@@ -34,7 +34,7 @@ def test_min_expr(request: pytest.FixtureRequest, constructor: Constructor) -> N
 def test_min_series(
     request: pytest.FixtureRequest, constructor_eager: ConstructorEager
 ) -> None:
-    if any(backend in str(constructor_eager) for backend in ("modin", "cudf")):
+    if any(backend in str(constructor_eager) for backend in ("cudf",)):
         request.applymarker(pytest.mark.xfail)
     if "pandas" in str(constructor_eager):
         if PANDAS_VERSION < (2, 2):
diff --git a/tests/expr_and_series/list/sum_test.py b/tests/expr_and_series/list/sum_test.py
index 9760a09c71..fedd51d639 100644
--- a/tests/expr_and_series/list/sum_test.py
+++ b/tests/expr_and_series/list/sum_test.py
@@ -15,8 +15,7 @@
 
 def test_sum_expr(request: pytest.FixtureRequest, constructor: Constructor) -> None:
     if any(
-        backend in str(constructor)
-        for backend in ("dask", "modin", "cudf", "sqlframe", "pyspark")
+        backend in str(constructor) for backend in ("dask", "cudf", "sqlframe", "pyspark")
     ):
         # sqlframe issue: https://github.com/eakmanrq/sqlframe/issues/548
         request.applymarker(pytest.mark.xfail)
@@ -38,7 +37,7 @@ def test_sum_expr(request: pytest.FixtureRequest, constructor: Constructor) -> N
 def test_sum_series(
     request: pytest.FixtureRequest, constructor_eager: ConstructorEager
 ) -> None:
-    if any(backend in str(constructor_eager) for backend in ("modin", "cudf")):
+    if any(backend in str(constructor_eager) for backend in ("cudf",)):
         request.applymarker(pytest.mark.xfail)
     if "pandas" in str(constructor_eager):
         if PANDAS_VERSION < (2, 2):

From 7efe2d22607fba4919138a136cd3ef9978af60b0 Mon Sep 17 00:00:00 2001
From: raisadz <34237447+raisadz@users.noreply.github.com>
Date: Fri, 28 Nov 2025 12:04:55 +0000
Subject: [PATCH 06/34] add no cover for non-pyarrow backends

---
 narwhals/_pandas_like/series_list.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/narwhals/_pandas_like/series_list.py b/narwhals/_pandas_like/series_list.py
index 04f424e14d..d1fb085262 100644
--- a/narwhals/_pandas_like/series_list.py
+++ b/narwhals/_pandas_like/series_list.py
@@ -49,7 +49,7 @@ def _agg(
         dtype_backend = get_dtype_backend(
             self.native.dtype, self.compliant._implementation
         )
-        if dtype_backend != "pyarrow":
+        if dtype_backend != "pyarrow":  # pragma: no cover
             msg = "Only pyarrow backend is currently supported."
             raise NotImplementedError(msg)
 

From 8e04fc1ec19ea1f623487982b9bfa032c2b37b37 Mon Sep 17 00:00:00 2001
From: raisadz <34237447+raisadz@users.noreply.github.com>
Date: Fri, 28 Nov 2025 12:24:30 +0000
Subject: [PATCH 07/34] link pyspark and ibis issues

---
 tests/expr_and_series/list/mean_test.py   | 1 +
 tests/expr_and_series/list/median_test.py | 2 ++
 tests/expr_and_series/list/sum_test.py    | 2 +-
 3 files changed, 4 insertions(+), 1 deletion(-)

diff --git a/tests/expr_and_series/list/mean_test.py b/tests/expr_and_series/list/mean_test.py
index cbe09f1b64..f707a3183a 100644
--- a/tests/expr_and_series/list/mean_test.py
+++ b/tests/expr_and_series/list/mean_test.py
@@ -17,6 +17,7 @@ def test_mean_expr(request: pytest.FixtureRequest, constructor: Constructor) ->
     if any(
         backend in str(constructor) for backend in ("dask", "cudf", "sqlframe", "pyspark")
     ):
+        # PySpark issue: https://issues.apache.org/jira/browse/SPARK-54382
         request.applymarker(pytest.mark.xfail)
     if "pandas" in str(constructor):
         if PANDAS_VERSION < (2, 2):
diff --git a/tests/expr_and_series/list/median_test.py b/tests/expr_and_series/list/median_test.py
index d9e577f5ad..6c0f863782 100644
--- a/tests/expr_and_series/list/median_test.py
+++ b/tests/expr_and_series/list/median_test.py
@@ -18,6 +18,8 @@ def test_median_expr(request: pytest.FixtureRequest, constructor: Constructor) -
         backend in str(constructor)
         for backend in ("dask", "cudf", "sqlframe", "ibis", "pyspark")
     ) or ("polars" in str(constructor) and POLARS_VERSION < (0, 20, 7)):
+        # PySpark issue: https://issues.apache.org/jira/browse/SPARK-54382
+        # ibis issue: https://github.com/ibis-project/ibis/issues/11788
         request.applymarker(pytest.mark.xfail)
     if "pandas" in str(constructor):
         if PANDAS_VERSION < (2, 2):
diff --git a/tests/expr_and_series/list/sum_test.py b/tests/expr_and_series/list/sum_test.py
index fedd51d639..0ddbe4c9b3 100644
--- a/tests/expr_and_series/list/sum_test.py
+++ b/tests/expr_and_series/list/sum_test.py
@@ -17,7 +17,7 @@ def test_sum_expr(request: pytest.FixtureRequest, constructor: Constructor) -> N
     if any(
         backend in str(constructor) for backend in ("dask", "cudf", "sqlframe", "pyspark")
     ):
-        # sqlframe issue: https://github.com/eakmanrq/sqlframe/issues/548
+        # PySpark issue: https://issues.apache.org/jira/browse/SPARK-54382
         request.applymarker(pytest.mark.xfail)
     if "pandas" in str(constructor):
         if PANDAS_VERSION < (2, 2):

From 7ab1ebf5bd536cc687b32bda277cb98f16072c98 Mon Sep 17 00:00:00 2001
From: raisadz <34237447+raisadz@users.noreply.github.com>
Date: Fri, 28 Nov 2025 14:20:45 +0000
Subject: [PATCH 08/34] add sum/mean/median for PySpark

---
 narwhals/_spark_like/expr_list.py         | 32 ++++++++++++++++++++---
 tests/expr_and_series/list/mean_test.py   |  6 ++---
 tests/expr_and_series/list/median_test.py |  5 ++--
 tests/expr_and_series/list/sum_test.py    |  6 ++---
 4 files changed, 34 insertions(+), 15 deletions(-)

diff --git a/narwhals/_spark_like/expr_list.py b/narwhals/_spark_like/expr_list.py
index f8ee83dfe5..3a888f88a7 100644
--- a/narwhals/_spark_like/expr_list.py
+++ b/narwhals/_spark_like/expr_list.py
@@ -1,10 +1,10 @@
 from __future__ import annotations
 
+import operator
 from typing import TYPE_CHECKING
 
 from narwhals._compliant import LazyExprNamespace
 from narwhals._compliant.any_namespace import ListNamespace
-from narwhals._utils import not_implemented
 
 if TYPE_CHECKING:
     from sqlframe.base.column import Column
@@ -49,6 +49,30 @@ def func(expr: Column) -> Column:
 
         return self.compliant._with_elementwise(func)
 
-    mean = not_implemented()
-    median = not_implemented()
-    sum = not_implemented()
+    def sum(self) -> SparkLikeExpr:
+        def func(expr: Column) -> Column:
+            F = self.compliant._F
+            return F.aggregate(F.array_compact(expr), F.lit(0.0), operator.add)
+
+        return self.compliant._with_elementwise(func)
+
+    def mean(self) -> SparkLikeExpr:
+        def func(expr: Column) -> Column:
+            F = self.compliant._F
+            return F.aggregate(
+                F.array_compact(expr), F.lit(0.0), operator.add
+            ) / F.array_size(F.array_compact(expr))
+
+        return self.compliant._with_elementwise(func)
+
+    def median(self) -> SparkLikeExpr:
+        def func(expr: Column) -> Column:
+            F = self.compliant._F
+            sorted_expr = F.array_compact(F.sort_array(expr))
+            size = F.array_size(sorted_expr)
+            mid_index = (size / 2).cast("int")
+            odd_case = sorted_expr[mid_index]
+            even_case = (sorted_expr[mid_index] - 1 + sorted_expr[mid_index]) / 2
+            return F.when(size % 2 == 1, odd_case).otherwise(even_case)
+
+        return self.compliant._with_elementwise(func)
diff --git a/tests/expr_and_series/list/mean_test.py b/tests/expr_and_series/list/mean_test.py
index f707a3183a..1c0551df71 100644
--- a/tests/expr_and_series/list/mean_test.py
+++ b/tests/expr_and_series/list/mean_test.py
@@ -14,10 +14,8 @@
 
 
 def test_mean_expr(request: pytest.FixtureRequest, constructor: Constructor) -> None:
-    if any(
-        backend in str(constructor) for backend in ("dask", "cudf", "sqlframe", "pyspark")
-    ):
-        # PySpark issue: https://issues.apache.org/jira/browse/SPARK-54382
+    if any(backend in str(constructor) for backend in ("dask", "cudf", "sqlframe")):
+        # sqlframe issue: https://github.com/eakmanrq/sqlframe/issues/548
         request.applymarker(pytest.mark.xfail)
     if "pandas" in str(constructor):
         if PANDAS_VERSION < (2, 2):
diff --git a/tests/expr_and_series/list/median_test.py b/tests/expr_and_series/list/median_test.py
index 6c0f863782..256e1f8111 100644
--- a/tests/expr_and_series/list/median_test.py
+++ b/tests/expr_and_series/list/median_test.py
@@ -15,10 +15,9 @@
 
 def test_median_expr(request: pytest.FixtureRequest, constructor: Constructor) -> None:
     if any(
-        backend in str(constructor)
-        for backend in ("dask", "cudf", "sqlframe", "ibis", "pyspark")
+        backend in str(constructor) for backend in ("dask", "cudf", "sqlframe", "ibis")
     ) or ("polars" in str(constructor) and POLARS_VERSION < (0, 20, 7)):
-        # PySpark issue: https://issues.apache.org/jira/browse/SPARK-54382
+        # sqlframe issue: https://github.com/eakmanrq/sqlframe/issues/548
         # ibis issue: https://github.com/ibis-project/ibis/issues/11788
         request.applymarker(pytest.mark.xfail)
     if "pandas" in str(constructor):
diff --git a/tests/expr_and_series/list/sum_test.py b/tests/expr_and_series/list/sum_test.py
index 0ddbe4c9b3..d72b321164 100644
--- a/tests/expr_and_series/list/sum_test.py
+++ b/tests/expr_and_series/list/sum_test.py
@@ -14,10 +14,8 @@
 
 
 def test_sum_expr(request: pytest.FixtureRequest, constructor: Constructor) -> None:
-    if any(
-        backend in str(constructor) for backend in ("dask", "cudf", "sqlframe", "pyspark")
-    ):
-        # PySpark issue: https://issues.apache.org/jira/browse/SPARK-54382
+    if any(backend in str(constructor) for backend in ("dask", "cudf", "sqlframe")):
+        # sqlframe issue: https://github.com/eakmanrq/sqlframe/issues/548
         request.applymarker(pytest.mark.xfail)
     if "pandas" in str(constructor):
         if PANDAS_VERSION < (2, 2):

From 6b5780938a82afcb1564b58a8047a3526f1b917a Mon Sep 17 00:00:00 2001
From: raisadz <34237447+raisadz@users.noreply.github.com>
Date: Fri, 28 Nov 2025 14:30:44 +0000
Subject: [PATCH 09/34] xfail pyspark[connect], add no cover for sqlframe

---
 narwhals/_spark_like/expr_list.py         | 3 ++-
 tests/expr_and_series/list/median_test.py | 3 ++-
 2 files changed, 4 insertions(+), 2 deletions(-)

diff --git a/narwhals/_spark_like/expr_list.py b/narwhals/_spark_like/expr_list.py
index 3a888f88a7..454e088244 100644
--- a/narwhals/_spark_like/expr_list.py
+++ b/narwhals/_spark_like/expr_list.py
@@ -66,7 +66,8 @@ def func(expr: Column) -> Column:
         return self.compliant._with_elementwise(func)
 
     def median(self) -> SparkLikeExpr:
-        def func(expr: Column) -> Column:
+        def func(expr: Column) -> Column:  # pragma: no cover
+            # sqlframe issue: https://github.com/eakmanrq/sqlframe/issues/548
             F = self.compliant._F
             sorted_expr = F.array_compact(F.sort_array(expr))
             size = F.array_size(sorted_expr)
diff --git a/tests/expr_and_series/list/median_test.py b/tests/expr_and_series/list/median_test.py
index 256e1f8111..2af71c8083 100644
--- a/tests/expr_and_series/list/median_test.py
+++ b/tests/expr_and_series/list/median_test.py
@@ -15,7 +15,8 @@
 
 def test_median_expr(request: pytest.FixtureRequest, constructor: Constructor) -> None:
     if any(
-        backend in str(constructor) for backend in ("dask", "cudf", "sqlframe", "ibis")
+        backend in str(constructor)
+        for backend in ("dask", "cudf", "sqlframe", "ibis", "pyspark[connect]")
     ) or ("polars" in str(constructor) and POLARS_VERSION < (0, 20, 7)):
         # sqlframe issue: https://github.com/eakmanrq/sqlframe/issues/548
         # ibis issue: https://github.com/ibis-project/ibis/issues/11788

From 7dfc9fa9e87918f563e3eb8cffee5e5b59639136 Mon Sep 17 00:00:00 2001
From: raisadz <34237447+raisadz@users.noreply.github.com>
Date: Fri, 28 Nov 2025 14:40:50 +0000
Subject: [PATCH 10/34] xfail pyspark connect

---
 tests/expr_and_series/list/median_test.py | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/tests/expr_and_series/list/median_test.py b/tests/expr_and_series/list/median_test.py
index 2af71c8083..b15c674635 100644
--- a/tests/expr_and_series/list/median_test.py
+++ b/tests/expr_and_series/list/median_test.py
@@ -1,5 +1,6 @@
 from __future__ import annotations
 
+import os
 from typing import TYPE_CHECKING
 
 import pytest
@@ -15,12 +16,13 @@
 
 def test_median_expr(request: pytest.FixtureRequest, constructor: Constructor) -> None:
     if any(
-        backend in str(constructor)
-        for backend in ("dask", "cudf", "sqlframe", "ibis", "pyspark[connect]")
+        backend in str(constructor) for backend in ("dask", "cudf", "sqlframe", "ibis")
     ) or ("polars" in str(constructor) and POLARS_VERSION < (0, 20, 7)):
         # sqlframe issue: https://github.com/eakmanrq/sqlframe/issues/548
         # ibis issue: https://github.com/ibis-project/ibis/issues/11788
         request.applymarker(pytest.mark.xfail)
+    if os.environ.get("SPARK_CONNECT", None) and "pyspark" in str(constructor):
+        request.applymarker(pytest.mark.xfail)
     if "pandas" in str(constructor):
         if PANDAS_VERSION < (2, 2):
             pytest.skip()

From 1243980ed0d1b638ab9bdc702be5065334a38007 Mon Sep 17 00:00:00 2001
From: raisadz <34237447+raisadz@users.noreply.github.com>
Date: Sat, 29 Nov 2025 12:25:15 +0000
Subject: [PATCH 11/34] handle empty lists for pyarrow, tests for empty lists

---
 narwhals/_arrow/utils.py               |  6 ++++--
 tests/expr_and_series/list/sum_test.py | 23 ++++++++++++++++-------
 tests/expr_and_series/lit_test.py      |  2 +-
 3 files changed, 21 insertions(+), 10 deletions(-)

diff --git a/narwhals/_arrow/utils.py b/narwhals/_arrow/utils.py
index 6ca76eb239..94f046ffd8 100644
--- a/narwhals/_arrow/utils.py
+++ b/narwhals/_arrow/utils.py
@@ -501,12 +501,14 @@ def list_agg(
     array: ChunkedArrayAny,
     func: Literal["min", "max", "mean", "approximate_median", "sum"],
 ) -> ChunkedArrayAny:
-    return (
+    agg = pa.array(
         pa.Table.from_arrays(
             [pc.list_flatten(array), pc.list_parent_indices(array)],
             names=["values", "offsets"],
         )
         .group_by("offsets")
-        .aggregate([("values", func)])
+        .aggregate([("values", func, pc.CountOptions("all"))])
         .column(f"values_{func}")
     )
+    non_empty_mask = pa.array(pc.not_equal(pc.list_value_length(array), 0))  # type: ignore[type-var]
+    return pa.chunked_array([pc.replace_with_mask([0] * len(array), non_empty_mask, agg)])
diff --git a/tests/expr_and_series/list/sum_test.py b/tests/expr_and_series/list/sum_test.py
index d72b321164..ef804e84ce 100644
--- a/tests/expr_and_series/list/sum_test.py
+++ b/tests/expr_and_series/list/sum_test.py
@@ -10,10 +10,15 @@
 if TYPE_CHECKING:
     from tests.utils import Constructor, ConstructorEager
 
-data = {"a": [[3, 2, 2, 4, None], [-1]]}
+data = {"a": [[3, None, 2, 2, 4, None], [], [-1], [None, None, None], []]}
 
 
-def test_sum_expr(request: pytest.FixtureRequest, constructor: Constructor) -> None:
+@pytest.mark.parametrize(
+    ("index", "expected"), [(0, 11), (1, 0), (2, -1), (3, 0), (4, 0)]
+)
+def test_sum_expr(
+    request: pytest.FixtureRequest, constructor: Constructor, index: int, expected: int
+) -> None:
     if any(backend in str(constructor) for backend in ("dask", "cudf", "sqlframe")):
         # sqlframe issue: https://github.com/eakmanrq/sqlframe/issues/548
         request.applymarker(pytest.mark.xfail)
@@ -28,12 +33,17 @@ def test_sum_expr(request: pytest.FixtureRequest, constructor: Constructor) -> N
         .collect()["a"]
         .to_list()
     )
-    assert result[0] == 11
-    assert result[1] == -1
+    assert result[index] == expected
 
 
+@pytest.mark.parametrize(
+    ("index", "expected"), [(0, 11), (1, 0), (2, -1), (3, 0), (4, 0)]
+)
 def test_sum_series(
-    request: pytest.FixtureRequest, constructor_eager: ConstructorEager
+    request: pytest.FixtureRequest,
+    constructor_eager: ConstructorEager,
+    index: int,
+    expected: int,
 ) -> None:
     if any(backend in str(constructor_eager) for backend in ("cudf",)):
         request.applymarker(pytest.mark.xfail)
@@ -43,5 +53,4 @@ def test_sum_series(
         pytest.importorskip("pyarrow")
     df = nw.from_native(constructor_eager(data), eager_only=True)
     result = df["a"].cast(nw.List(nw.Int32())).list.sum().to_list()
-    assert result[0] == 11
-    assert result[1] == -1
+    assert result[index] == expected
diff --git a/tests/expr_and_series/lit_test.py b/tests/expr_and_series/lit_test.py
index 4a23ab9629..f742d46092 100644
--- a/tests/expr_and_series/lit_test.py
+++ b/tests/expr_and_series/lit_test.py
@@ -19,7 +19,7 @@
     from narwhals.dtypes import DType
 
 
-@pytest.mark.parametrize(
+@pytest.mark.rize(
     ("dtype", "expected_lit"),
     [(None, [2, 2, 2]), (nw.String, ["2", "2", "2"]), (nw.Float32, [2.0, 2.0, 2.0])],
 )

From 7eca29a6b41aca4df06ad4338bc9e10479d0cdb8 Mon Sep 17 00:00:00 2001
From: raisadz <34237447+raisadz@users.noreply.github.com>
Date: Sat, 29 Nov 2025 12:38:07 +0000
Subject: [PATCH 12/34] undo typo

---
 tests/expr_and_series/lit_test.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tests/expr_and_series/lit_test.py b/tests/expr_and_series/lit_test.py
index f742d46092..4a23ab9629 100644
--- a/tests/expr_and_series/lit_test.py
+++ b/tests/expr_and_series/lit_test.py
@@ -19,7 +19,7 @@
     from narwhals.dtypes import DType
 
 
-@pytest.mark.rize(
+@pytest.mark.parametrize(
     ("dtype", "expected_lit"),
     [(None, [2, 2, 2]), (nw.String, ["2", "2", "2"]), (nw.Float32, [2.0, 2.0, 2.0])],
 )

From 545abf8ecc5da2e25c5f6bbf54527f06c6b421cc Mon Sep 17 00:00:00 2001
From: raisadz <34237447+raisadz@users.noreply.github.com>
Date: Sat, 29 Nov 2025 14:35:33 +0000
Subject: [PATCH 13/34] add None case

---
 narwhals/_arrow/utils.py               | 5 ++++-
 tests/expr_and_series/list/sum_test.py | 6 +++---
 2 files changed, 7 insertions(+), 4 deletions(-)

diff --git a/narwhals/_arrow/utils.py b/narwhals/_arrow/utils.py
index 94f046ffd8..a9d702d01b 100644
--- a/narwhals/_arrow/utils.py
+++ b/narwhals/_arrow/utils.py
@@ -511,4 +511,7 @@ def list_agg(
         .column(f"values_{func}")
     )
     non_empty_mask = pa.array(pc.not_equal(pc.list_value_length(array), 0))  # type: ignore[type-var]
-    return pa.chunked_array([pc.replace_with_mask([0] * len(array), non_empty_mask, agg)])
+    base_array = [None if x else 0 for x in non_empty_mask.is_null()]
+    return pa.chunked_array(
+        [pc.replace_with_mask(base_array, non_empty_mask.fill_null(False), agg)]
+    )
diff --git a/tests/expr_and_series/list/sum_test.py b/tests/expr_and_series/list/sum_test.py
index ef804e84ce..f206d75911 100644
--- a/tests/expr_and_series/list/sum_test.py
+++ b/tests/expr_and_series/list/sum_test.py
@@ -10,11 +10,11 @@
 if TYPE_CHECKING:
     from tests.utils import Constructor, ConstructorEager
 
-data = {"a": [[3, None, 2, 2, 4, None], [], [-1], [None, None, None], []]}
+data = {"a": [[3, None, 2, 2, 4, None], [], [-1], None, [None, None, None], []]}
 
 
 @pytest.mark.parametrize(
-    ("index", "expected"), [(0, 11), (1, 0), (2, -1), (3, 0), (4, 0)]
+    ("index", "expected"), [(0, 11), (1, 0), (2, -1), (3, None), (4, 0), (5, 0)]
 )
 def test_sum_expr(
     request: pytest.FixtureRequest, constructor: Constructor, index: int, expected: int
@@ -37,7 +37,7 @@ def test_sum_expr(
 
 
 @pytest.mark.parametrize(
-    ("index", "expected"), [(0, 11), (1, 0), (2, -1), (3, 0), (4, 0)]
+    ("index", "expected"), [(0, 11), (1, 0), (2, -1), (3, None), (4, 0), (5, 0)]
 )
 def test_sum_series(
     request: pytest.FixtureRequest,

From 74de5c635c36e32a80613daab9a28979046adcd7 Mon Sep 17 00:00:00 2001
From: raisadz <34237447+raisadz@users.noreply.github.com>
Date: Sat, 29 Nov 2025 15:44:02 +0000
Subject: [PATCH 14/34] fix list_agg and tests

---
 narwhals/_arrow/utils.py               | 7 +++++--
 tests/expr_and_series/list/sum_test.py | 6 +++---
 2 files changed, 8 insertions(+), 5 deletions(-)

diff --git a/narwhals/_arrow/utils.py b/narwhals/_arrow/utils.py
index a9d702d01b..e968ad548d 100644
--- a/narwhals/_arrow/utils.py
+++ b/narwhals/_arrow/utils.py
@@ -507,11 +507,14 @@ def list_agg(
             names=["values", "offsets"],
         )
         .group_by("offsets")
-        .aggregate([("values", func, pc.CountOptions("all"))])
+        .aggregate([("values", func)])
+        .sort_by("offsets")
         .column(f"values_{func}")
     )
+    if func == "sum":
+        agg = agg.fill_null(lit(0))  # type: ignore[type-var]
     non_empty_mask = pa.array(pc.not_equal(pc.list_value_length(array), 0))  # type: ignore[type-var]
-    base_array = [None if x else 0 for x in non_empty_mask.is_null()]
+    base_array = pc.if_else(non_empty_mask.is_null(), None, 0)
     return pa.chunked_array(
         [pc.replace_with_mask(base_array, non_empty_mask.fill_null(False), agg)]
     )
diff --git a/tests/expr_and_series/list/sum_test.py b/tests/expr_and_series/list/sum_test.py
index f206d75911..9b97815c12 100644
--- a/tests/expr_and_series/list/sum_test.py
+++ b/tests/expr_and_series/list/sum_test.py
@@ -5,7 +5,7 @@
 import pytest
 
 import narwhals as nw
-from tests.utils import PANDAS_VERSION
+from tests.utils import PANDAS_VERSION, assert_equal_data
 
 if TYPE_CHECKING:
     from tests.utils import Constructor, ConstructorEager
@@ -33,7 +33,7 @@ def test_sum_expr(
         .collect()["a"]
         .to_list()
     )
-    assert result[index] == expected
+    assert_equal_data({"a": [result[index]]}, {"a": [expected]})
 
 
 @pytest.mark.parametrize(
@@ -53,4 +53,4 @@ def test_sum_series(
         pytest.importorskip("pyarrow")
     df = nw.from_native(constructor_eager(data), eager_only=True)
     result = df["a"].cast(nw.List(nw.Int32())).list.sum().to_list()
-    assert result[index] == expected
+    assert_equal_data({"a": [result[index]]}, {"a": [expected]})

From 65072ffe31884f85c6e110a52dee5d6695fb7ce8 Mon Sep 17 00:00:00 2001
From: raisadz <34237447+raisadz@users.noreply.github.com>
Date: Sun, 30 Nov 2025 11:21:20 +0000
Subject: [PATCH 15/34] adjust duckdb

---
 narwhals/_duckdb/expr_list.py | 12 ++++++++++--
 1 file changed, 10 insertions(+), 2 deletions(-)

diff --git a/narwhals/_duckdb/expr_list.py b/narwhals/_duckdb/expr_list.py
index 184f57252f..d20589e9ea 100644
--- a/narwhals/_duckdb/expr_list.py
+++ b/narwhals/_duckdb/expr_list.py
@@ -4,7 +4,7 @@
 
 from narwhals._compliant import LazyExprNamespace
 from narwhals._compliant.any_namespace import ListNamespace
-from narwhals._duckdb.utils import F, lit, when
+from narwhals._duckdb.utils import F, col, lambda_expr, lit, when
 from narwhals._utils import requires
 
 if TYPE_CHECKING:
@@ -54,4 +54,12 @@ def median(self) -> DuckDBExpr:
         return self.compliant._with_elementwise(lambda expr: F("list_median", expr))
 
     def sum(self) -> DuckDBExpr:
-        return self.compliant._with_elementwise(lambda expr: F("list_sum", expr))
+        def func(expr: Expression) -> Expression:
+            elem = col("_")
+            expr_no_nulls = F("list_filter", expr, lambda_expr(elem, elem.isnotnull()))
+            expr_sum = F("list_sum", expr_no_nulls)
+            return when(F("array_length", expr_no_nulls) == lit(0), lit(0)).otherwise(
+                expr_sum
+            )
+
+        return self.compliant._with_callable(func)

From ca4c7944c8c65be3136647b9effbb8cb11a0c80c Mon Sep 17 00:00:00 2001
From: raisadz <34237447+raisadz@users.noreply.github.com>
Date: Sun, 30 Nov 2025 12:23:22 +0000
Subject: [PATCH 16/34] adjust pyarrow and tests

---
 narwhals/_arrow/utils.py                  | 12 ++++++++---
 tests/expr_and_series/list/max_test.py    | 25 +++++++++++++++--------
 tests/expr_and_series/list/mean_test.py   | 25 +++++++++++++++--------
 tests/expr_and_series/list/median_test.py | 25 +++++++++++++++--------
 tests/expr_and_series/list/min_test.py    | 25 +++++++++++++++--------
 tests/expr_and_series/list/sum_test.py    |  6 +++---
 6 files changed, 80 insertions(+), 38 deletions(-)

diff --git a/narwhals/_arrow/utils.py b/narwhals/_arrow/utils.py
index e968ad548d..f3bd421283 100644
--- a/narwhals/_arrow/utils.py
+++ b/narwhals/_arrow/utils.py
@@ -511,10 +511,16 @@ def list_agg(
         .sort_by("offsets")
         .column(f"values_{func}")
     )
+    non_empty_mask = pa.array(pc.not_equal(pc.list_value_length(array), 0))  # type: ignore[type-var]
     if func == "sum":
         agg = agg.fill_null(lit(0))  # type: ignore[type-var]
-    non_empty_mask = pa.array(pc.not_equal(pc.list_value_length(array), 0))  # type: ignore[type-var]
-    base_array = pc.if_else(non_empty_mask.is_null(), None, 0)
+        base_array = pc.if_else(non_empty_mask.is_null(), None, 0)
+    else:
+        base_array = pc.if_else(non_empty_mask, 0, None)
     return pa.chunked_array(
-        [pc.replace_with_mask(base_array, non_empty_mask.fill_null(False), agg)]
+        [
+            pc.replace_with_mask(
+                base_array.cast(agg.type), non_empty_mask.fill_null(False), agg
+            )
+        ]
     )
diff --git a/tests/expr_and_series/list/max_test.py b/tests/expr_and_series/list/max_test.py
index 8f112410cd..6b79c75c5e 100644
--- a/tests/expr_and_series/list/max_test.py
+++ b/tests/expr_and_series/list/max_test.py
@@ -5,15 +5,20 @@
 import pytest
 
 import narwhals as nw
-from tests.utils import PANDAS_VERSION
+from tests.utils import PANDAS_VERSION, assert_equal_data
 
 if TYPE_CHECKING:
     from tests.utils import Constructor, ConstructorEager
 
-data = {"a": [[3, 2, 2, 4, None], [-1]]}
+data = {"a": [[3, None, 2, 2, 4, None], [-1], None, [None, None, None], []]}
 
 
-def test_max_expr(request: pytest.FixtureRequest, constructor: Constructor) -> None:
+@pytest.mark.parametrize(
+    ("index", "expected"), [(0, 4), (1, -1), (2, None), (3, None), (4, None)]
+)
+def test_max_expr(
+    request: pytest.FixtureRequest, constructor: Constructor, index: int, expected: int
+) -> None:
     if any(backend in str(constructor) for backend in ("dask", "cudf", "sqlframe")):
         # sqlframe issue: https://github.com/eakmanrq/sqlframe/issues/548
         request.applymarker(pytest.mark.xfail)
@@ -28,12 +33,17 @@ def test_max_expr(request: pytest.FixtureRequest, constructor: Constructor) -> N
         .collect()["a"]
         .to_list()
     )
-    assert result[0] == 4
-    assert result[1] == -1
+    assert_equal_data({"a": [result[index]]}, {"a": [expected]})
 
 
+@pytest.mark.parametrize(
+    ("index", "expected"), [(0, 4), (1, -1), (2, None), (3, None), (4, None)]
+)
 def test_max_series(
-    request: pytest.FixtureRequest, constructor_eager: ConstructorEager
+    request: pytest.FixtureRequest,
+    constructor_eager: ConstructorEager,
+    index: int,
+    expected: int,
 ) -> None:
     if any(backend in str(constructor_eager) for backend in ("cudf",)):
         request.applymarker(pytest.mark.xfail)
@@ -43,5 +53,4 @@ def test_max_series(
         pytest.importorskip("pyarrow")
     df = nw.from_native(constructor_eager(data), eager_only=True)
     result = df["a"].cast(nw.List(nw.Int32())).list.max().to_list()
-    assert result[0] == 4
-    assert result[1] == -1
+    assert_equal_data({"a": [result[index]]}, {"a": [expected]})
diff --git a/tests/expr_and_series/list/mean_test.py b/tests/expr_and_series/list/mean_test.py
index 1c0551df71..d2c42be932 100644
--- a/tests/expr_and_series/list/mean_test.py
+++ b/tests/expr_and_series/list/mean_test.py
@@ -5,15 +5,20 @@
 import pytest
 
 import narwhals as nw
-from tests.utils import PANDAS_VERSION
+from tests.utils import PANDAS_VERSION, assert_equal_data
 
 if TYPE_CHECKING:
     from tests.utils import Constructor, ConstructorEager
 
-data = {"a": [[3, 2, 2, 4, None], [-1]]}
+data = {"a": [[3, None, 2, 2, 4, None], [-1], None, [None, None, None], []]}
 
 
-def test_mean_expr(request: pytest.FixtureRequest, constructor: Constructor) -> None:
+@pytest.mark.parametrize(
+    ("index", "expected"), [(0, 2.75), (1, -1), (2, None), (3, None), (4, None)]
+)
+def test_mean_expr(
+    request: pytest.FixtureRequest, constructor: Constructor, index: int, expected: float
+) -> None:
     if any(backend in str(constructor) for backend in ("dask", "cudf", "sqlframe")):
         # sqlframe issue: https://github.com/eakmanrq/sqlframe/issues/548
         request.applymarker(pytest.mark.xfail)
@@ -28,12 +33,17 @@ def test_mean_expr(request: pytest.FixtureRequest, constructor: Constructor) ->
         .collect()["a"]
         .to_list()
     )
-    assert result[0] == 2.75
-    assert result[1] == -1
+    assert_equal_data({"a": [result[index]]}, {"a": [expected]})
 
 
+@pytest.mark.parametrize(
+    ("index", "expected"), [(0, 2.75), (1, -1), (2, None), (3, None), (4, None)]
+)
 def test_mean_series(
-    request: pytest.FixtureRequest, constructor_eager: ConstructorEager
+    request: pytest.FixtureRequest,
+    constructor_eager: ConstructorEager,
+    index: int,
+    expected: float,
 ) -> None:
     if any(backend in str(constructor_eager) for backend in ("cudf",)):
         request.applymarker(pytest.mark.xfail)
@@ -43,5 +53,4 @@ def test_mean_series(
         pytest.importorskip("pyarrow")
     df = nw.from_native(constructor_eager(data), eager_only=True)
     result = df["a"].cast(nw.List(nw.Int32())).list.mean().to_list()
-    assert result[0] == 2.75
-    assert result[1] == -1
+    assert_equal_data({"a": [result[index]]}, {"a": [expected]})
diff --git a/tests/expr_and_series/list/median_test.py b/tests/expr_and_series/list/median_test.py
index b15c674635..e2e0003c94 100644
--- a/tests/expr_and_series/list/median_test.py
+++ b/tests/expr_and_series/list/median_test.py
@@ -6,15 +6,20 @@
 import pytest
 
 import narwhals as nw
-from tests.utils import PANDAS_VERSION, POLARS_VERSION
+from tests.utils import PANDAS_VERSION, POLARS_VERSION, assert_equal_data
 
 if TYPE_CHECKING:
     from tests.utils import Constructor, ConstructorEager
 
-data = {"a": [[3, 2, 2, 4, None], [-1]]}
+data = {"a": [[3, None, 2, 2, 4, None], [-1], None, [None, None, None], []]}
 
 
-def test_median_expr(request: pytest.FixtureRequest, constructor: Constructor) -> None:
+@pytest.mark.parametrize(
+    ("index", "expected"), [(0, 2.5), (1, -1), (2, None), (3, None), (4, None)]
+)
+def test_median_expr(
+    request: pytest.FixtureRequest, constructor: Constructor, index: int, expected: float
+) -> None:
     if any(
         backend in str(constructor) for backend in ("dask", "cudf", "sqlframe", "ibis")
     ) or ("polars" in str(constructor) and POLARS_VERSION < (0, 20, 7)):
@@ -34,12 +39,17 @@ def test_median_expr(request: pytest.FixtureRequest, constructor: Constructor) -
         .collect()["a"]
         .to_list()
     )
-    assert result[0] == 2.5
-    assert result[1] == -1
+    assert_equal_data({"a": [result[index]]}, {"a": [expected]})
 
 
+@pytest.mark.parametrize(
+    ("index", "expected"), [(0, 2.5), (1, -1), (2, None), (3, None), (4, None)]
+)
 def test_median_series(
-    request: pytest.FixtureRequest, constructor_eager: ConstructorEager
+    request: pytest.FixtureRequest,
+    constructor_eager: ConstructorEager,
+    index: int,
+    expected: float,
 ) -> None:
     if any(backend in str(constructor_eager) for backend in ("cudf",)) or (
         "polars" in str(constructor_eager) and POLARS_VERSION < (0, 20, 7)
@@ -51,5 +61,4 @@ def test_median_series(
         pytest.importorskip("pyarrow")
     df = nw.from_native(constructor_eager(data), eager_only=True)
     result = df["a"].cast(nw.List(nw.Int32())).list.median().to_list()
-    assert result[0] == 2.5
-    assert result[1] == -1
+    assert_equal_data({"a": [result[index]]}, {"a": [expected]})
diff --git a/tests/expr_and_series/list/min_test.py b/tests/expr_and_series/list/min_test.py
index d2655c00a1..87d3fe1ca7 100644
--- a/tests/expr_and_series/list/min_test.py
+++ b/tests/expr_and_series/list/min_test.py
@@ -5,15 +5,20 @@
 import pytest
 
 import narwhals as nw
-from tests.utils import PANDAS_VERSION
+from tests.utils import PANDAS_VERSION, assert_equal_data
 
 if TYPE_CHECKING:
     from tests.utils import Constructor, ConstructorEager
 
-data = {"a": [[3, 2, 2, 4, None], [-1]]}
+data = {"a": [[3, None, 2, 2, 4, None], [-1], None, [None, None, None], []]}
 
 
-def test_min_expr(request: pytest.FixtureRequest, constructor: Constructor) -> None:
+@pytest.mark.parametrize(
+    ("index", "expected"), [(0, 2), (1, -1), (2, None), (3, None), (4, None)]
+)
+def test_min_expr(
+    request: pytest.FixtureRequest, constructor: Constructor, index: int, expected: int
+) -> None:
     if any(backend in str(constructor) for backend in ("dask", "cudf")):
         request.applymarker(pytest.mark.xfail)
     if "pandas" in str(constructor):
@@ -27,12 +32,17 @@ def test_min_expr(request: pytest.FixtureRequest, constructor: Constructor) -> N
         .collect()["a"]
         .to_list()
     )
-    assert result[0] == 2
-    assert result[1] == -1
+    assert_equal_data({"a": [result[index]]}, {"a": [expected]})
 
 
+@pytest.mark.parametrize(
+    ("index", "expected"), [(0, 2), (1, -1), (2, None), (3, None), (4, None)]
+)
 def test_min_series(
-    request: pytest.FixtureRequest, constructor_eager: ConstructorEager
+    request: pytest.FixtureRequest,
+    constructor_eager: ConstructorEager,
+    index: int,
+    expected: int,
 ) -> None:
     if any(backend in str(constructor_eager) for backend in ("cudf",)):
         request.applymarker(pytest.mark.xfail)
@@ -42,5 +52,4 @@ def test_min_series(
         pytest.importorskip("pyarrow")
     df = nw.from_native(constructor_eager(data), eager_only=True)
     result = df["a"].cast(nw.List(nw.Int32())).list.min().to_list()
-    assert result[0] == 2
-    assert result[1] == -1
+    assert_equal_data({"a": [result[index]]}, {"a": [expected]})
diff --git a/tests/expr_and_series/list/sum_test.py b/tests/expr_and_series/list/sum_test.py
index 9b97815c12..f3ab2a12d6 100644
--- a/tests/expr_and_series/list/sum_test.py
+++ b/tests/expr_and_series/list/sum_test.py
@@ -10,11 +10,11 @@
 if TYPE_CHECKING:
     from tests.utils import Constructor, ConstructorEager
 
-data = {"a": [[3, None, 2, 2, 4, None], [], [-1], None, [None, None, None], []]}
+data = {"a": [[3, None, 2, 2, 4, None], [-1], None, [None, None, None], []]}
 
 
 @pytest.mark.parametrize(
-    ("index", "expected"), [(0, 11), (1, 0), (2, -1), (3, None), (4, 0), (5, 0)]
+    ("index", "expected"), [(0, 11), (1, -1), (2, None), (3, 0), (4, 0)]
 )
 def test_sum_expr(
     request: pytest.FixtureRequest, constructor: Constructor, index: int, expected: int
@@ -37,7 +37,7 @@ def test_sum_expr(
 
 
 @pytest.mark.parametrize(
-    ("index", "expected"), [(0, 11), (1, 0), (2, -1), (3, None), (4, 0), (5, 0)]
+    ("index", "expected"), [(0, 11), (1, -1), (2, None), (3, 0), (4, 0)]
 )
 def test_sum_series(
     request: pytest.FixtureRequest,

From 3251865f53baf84915296070e1db72532bec0869 Mon Sep 17 00:00:00 2001
From: raisadz <34237447+raisadz@users.noreply.github.com>
Date: Sun, 30 Nov 2025 12:38:35 +0000
Subject: [PATCH 17/34] add `try_divide` for pyspark mean, set min duckdb
 version for lambda_expr

---
 narwhals/_duckdb/expr_list.py     | 1 +
 narwhals/_spark_like/expr_list.py | 7 ++++---
 2 files changed, 5 insertions(+), 3 deletions(-)

diff --git a/narwhals/_duckdb/expr_list.py b/narwhals/_duckdb/expr_list.py
index d20589e9ea..15b3f70a53 100644
--- a/narwhals/_duckdb/expr_list.py
+++ b/narwhals/_duckdb/expr_list.py
@@ -53,6 +53,7 @@ def mean(self) -> DuckDBExpr:
     def median(self) -> DuckDBExpr:
         return self.compliant._with_elementwise(lambda expr: F("list_median", expr))
 
+    @requires.backend_version((1, 2))
     def sum(self) -> DuckDBExpr:
         def func(expr: Expression) -> Expression:
             elem = col("_")
diff --git a/narwhals/_spark_like/expr_list.py b/narwhals/_spark_like/expr_list.py
index 454e088244..2e5ee5a369 100644
--- a/narwhals/_spark_like/expr_list.py
+++ b/narwhals/_spark_like/expr_list.py
@@ -59,9 +59,10 @@ def func(expr: Column) -> Column:
     def mean(self) -> SparkLikeExpr:
         def func(expr: Column) -> Column:
             F = self.compliant._F
-            return F.aggregate(
-                F.array_compact(expr), F.lit(0.0), operator.add
-            ) / F.array_size(F.array_compact(expr))
+            return F.try_divide(
+                F.aggregate(F.array_compact(expr), F.lit(0.0), operator.add),
+                F.array_size(F.array_compact(expr)),
+            )
 
         return self.compliant._with_elementwise(func)
 

From eff085cd9cabaaa752e6155e922571abe9d2109f Mon Sep 17 00:00:00 2001
From: raisadz <34237447+raisadz@users.noreply.github.com>
Date: Sun, 30 Nov 2025 12:48:09 +0000
Subject: [PATCH 18/34] xfail old duckdb for sum

---
 tests/expr_and_series/list/sum_test.py | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/tests/expr_and_series/list/sum_test.py b/tests/expr_and_series/list/sum_test.py
index f3ab2a12d6..90b78bac58 100644
--- a/tests/expr_and_series/list/sum_test.py
+++ b/tests/expr_and_series/list/sum_test.py
@@ -5,7 +5,7 @@
 import pytest
 
 import narwhals as nw
-from tests.utils import PANDAS_VERSION, assert_equal_data
+from tests.utils import DUCKDB_VERSION, PANDAS_VERSION, assert_equal_data
 
 if TYPE_CHECKING:
     from tests.utils import Constructor, ConstructorEager
@@ -26,6 +26,9 @@ def test_sum_expr(
         if PANDAS_VERSION < (2, 2):
             pytest.skip()
         pytest.importorskip("pyarrow")
+    if "duckdb" in str(constructor) and DUCKDB_VERSION < (1, 2):
+        reason = "version too old, duckdb 1.2 required for LambdaExpression."
+        pytest.skip(reason=reason)
     result = (
         nw.from_native(constructor(data))
         .select(nw.col("a").cast(nw.List(nw.Int32())).list.sum())

From fdcf3f3e823f42079ef4cb108870ce20c27a7277 Mon Sep 17 00:00:00 2001
From: raisadz <34237447+raisadz@users.noreply.github.com>
Date: Sun, 30 Nov 2025 14:03:10 +0000
Subject: [PATCH 19/34] fux typing

---
 narwhals/_arrow/utils.py | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/narwhals/_arrow/utils.py b/narwhals/_arrow/utils.py
index f3bd421283..511ee43a60 100644
--- a/narwhals/_arrow/utils.py
+++ b/narwhals/_arrow/utils.py
@@ -511,7 +511,7 @@ def list_agg(
         .sort_by("offsets")
         .column(f"values_{func}")
     )
-    non_empty_mask = pa.array(pc.not_equal(pc.list_value_length(array), 0))  # type: ignore[type-var]
+    non_empty_mask = pa.array(pc.not_equal(pc.list_value_length(array), lit(0)))
     if func == "sum":
         agg = agg.fill_null(lit(0))  # type: ignore[type-var]
         base_array = pc.if_else(non_empty_mask.is_null(), None, 0)
@@ -520,7 +520,9 @@ def list_agg(
     return pa.chunked_array(
         [
             pc.replace_with_mask(
-                base_array.cast(agg.type), non_empty_mask.fill_null(False), agg
+                base_array.cast(agg.type),
+                non_empty_mask.fill_null(False),  # pyright:ignore[reportArgumentType]
+                agg,
             )
         ]
     )

From 146c458551d567f8dcfba746147803653a1cf512 Mon Sep 17 00:00:00 2001
From: raisadz <34237447+raisadz@users.noreply.github.com>
Date: Sun, 30 Nov 2025 14:35:07 +0000
Subject: [PATCH 20/34] adjust pyspark median

---
 narwhals/_spark_like/expr_list.py | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/narwhals/_spark_like/expr_list.py b/narwhals/_spark_like/expr_list.py
index 2e5ee5a369..15f566131c 100644
--- a/narwhals/_spark_like/expr_list.py
+++ b/narwhals/_spark_like/expr_list.py
@@ -75,6 +75,10 @@ def func(expr: Column) -> Column:  # pragma: no cover
             mid_index = (size / 2).cast("int")
             odd_case = sorted_expr[mid_index]
             even_case = (sorted_expr[mid_index] - 1 + sorted_expr[mid_index]) / 2
-            return F.when(size % 2 == 1, odd_case).otherwise(even_case)
+            return (
+                F.when((size.isNull()) | (size == 0), F.lit(None))
+                .when(size % 2 == 1, odd_case)
+                .otherwise(even_case)
+            )
 
         return self.compliant._with_elementwise(func)

From 4d18654fc325ef9e305b83e3f77558468e78bf9b Mon Sep 17 00:00:00 2001
From: raisadz <34237447+raisadz@users.noreply.github.com>
Date: Sun, 30 Nov 2025 14:54:09 +0000
Subject: [PATCH 21/34] fix typing

---
 narwhals/_arrow/utils.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/narwhals/_arrow/utils.py b/narwhals/_arrow/utils.py
index 511ee43a60..90204efcef 100644
--- a/narwhals/_arrow/utils.py
+++ b/narwhals/_arrow/utils.py
@@ -513,7 +513,7 @@ def list_agg(
     )
     non_empty_mask = pa.array(pc.not_equal(pc.list_value_length(array), lit(0)))
     if func == "sum":
-        agg = agg.fill_null(lit(0))  # type: ignore[type-var]
+        agg = agg.fill_null(lit(0))  # pyright:ignore[reportArgumentType]
         base_array = pc.if_else(non_empty_mask.is_null(), None, 0)
     else:
         base_array = pc.if_else(non_empty_mask, 0, None)
@@ -521,7 +521,7 @@ def list_agg(
         [
             pc.replace_with_mask(
                 base_array.cast(agg.type),
-                non_empty_mask.fill_null(False),  # pyright:ignore[reportArgumentType]
+                non_empty_mask.fill_null(False),  # type: ignore[arg-type]
                 agg,
             )
         ]

From 997533482fb44134bcc8289722a512509a7bdd4d Mon Sep 17 00:00:00 2001
From: raisadz <34237447+raisadz@users.noreply.github.com>
Date: Mon, 1 Dec 2025 10:28:04 +0000
Subject: [PATCH 22/34] adjust ibis sum

---
 narwhals/_ibis/expr_list.py | 13 ++++++++++++-
 1 file changed, 12 insertions(+), 1 deletion(-)

diff --git a/narwhals/_ibis/expr_list.py b/narwhals/_ibis/expr_list.py
index ff0cd76c08..64cc053831 100644
--- a/narwhals/_ibis/expr_list.py
+++ b/narwhals/_ibis/expr_list.py
@@ -2,6 +2,8 @@
 
 from typing import TYPE_CHECKING
 
+from ibis import cases, literal
+
 from narwhals._compliant import LazyExprNamespace
 from narwhals._compliant.any_namespace import ListNamespace
 from narwhals._utils import not_implemented
@@ -39,6 +41,15 @@ def mean(self) -> IbisExpr:
         return self.compliant._with_callable(lambda expr: expr.means())
 
     def sum(self) -> IbisExpr:
-        return self.compliant._with_callable(lambda expr: expr.sums())
+        def func(expr: ir.ArrayColumn) -> ir.Value:
+            expr_no_nulls = expr.filter(lambda x: x.notnull())
+            len = expr_no_nulls.length()
+            return cases(
+                (len.isnull(), literal(None)),
+                (len == literal(0), literal(0)),
+                else_=expr.sums(),
+            )
+
+        return self.compliant._with_callable(func)
 
     median = not_implemented()

From be000f5584f74019f95ec48e8cf1d9bf0331e374 Mon Sep 17 00:00:00 2001
From: raisadz <34237447+raisadz@users.noreply.github.com>
Date: Mon, 1 Dec 2025 11:41:54 +0000
Subject: [PATCH 23/34] mix docstrings, add a test where there is a mismatch
 for median

---
 narwhals/expr_list.py                     | 64 ++++++++++-------------
 narwhals/series_list.py                   | 30 ++++++-----
 tests/expr_and_series/list/median_test.py | 24 +++++++--
 3 files changed, 64 insertions(+), 54 deletions(-)

diff --git a/narwhals/expr_list.py b/narwhals/expr_list.py
index 94e4c4f38d..dcc54d4404 100644
--- a/narwhals/expr_list.py
+++ b/narwhals/expr_list.py
@@ -148,22 +148,20 @@ def min(self) -> ExprT:
         """Compute the min value of the lists in the array.
 
         Examples:
-            >>> import polars as pl
+            >>> import duckdb
             >>> import narwhals as nw
-            >>> df_native = pl.DataFrame({"a": [[1], [3, 4, None]]})
+            >>> df_native = duckdb.sql("SELECT * FROM VALUES ([1]), ([3, 4, NULL]) df(a)")
             >>> df = nw.from_native(df_native)
             >>> df.with_columns(a_min=nw.col("a").list.min())
             ┌────────────────────────┐
-            |   Narwhals DataFrame   |
+            |   Narwhals LazyFrame   |
             |------------------------|
-            |shape: (2, 2)           |
             |┌──────────────┬───────┐|
-            |│ a            ┆ a_min │|
-            |│ ---          ┆ ---   │|
-            |│ list[i64]    ┆ i64   │|
-            |╞══════════════╪═══════╡|
-            |│ [1]          ┆ 1     │|
-            |│ [3, 4, null] ┆ 3     │|
+            |│      a       │ a_min │|
+            |│   int32[]    │ int32 │|
+            |├──────────────┼───────┤|
+            |│ [1]          │     1 │|
+            |│ [3, 4, NULL] │     3 │|
             |└──────────────┴───────┘|
             └────────────────────────┘
         """
@@ -198,24 +196,22 @@ def mean(self) -> ExprT:
         """Compute the mean value of the lists in the array.
 
         Examples:
-            >>> import polars as pl
+            >>> import pyarrow as pa
             >>> import narwhals as nw
-            >>> df_native = pl.DataFrame({"a": [[1], [3, 4, None]]})
+            >>> df_native = pa.table({"a": [[1], [3, 4, None]]})
             >>> df = nw.from_native(df_native)
             >>> df.with_columns(a_mean=nw.col("a").list.mean())
-            ┌─────────────────────────┐
-            |   Narwhals DataFrame    |
-            |-------------------------|
-            |shape: (2, 2)            |
-            |┌──────────────┬────────┐|
-            |│ a            ┆ a_mean │|
-            |│ ---          ┆ ---    │|
-            |│ list[i64]    ┆ f64    │|
-            |╞══════════════╪════════╡|
-            |│ [1]          ┆ 1.0    │|
-            |│ [3, 4, null] ┆ 3.5    │|
-            |└──────────────┴────────┘|
-            └─────────────────────────┘
+            ┌──────────────────────┐
+            |  Narwhals DataFrame  |
+            |----------------------|
+            |pyarrow.Table         |
+            |a: list<item: int64>  |
+            |  child 0, item: int64|
+            |a_mean: double        |
+            |----                  |
+            |a: [[[1],[3,4,null]]] |
+            |a_mean: [[1,3.5]]     |
+            └──────────────────────┘
         """
         return self._expr._append_node(ExprNode(ExprKind.ELEMENTWISE, "list.mean"))
 
@@ -223,22 +219,20 @@ def median(self) -> ExprT:
         """Compute the median value of the lists in the array.
 
         Examples:
-            >>> import polars as pl
+            >>> import duckdb
             >>> import narwhals as nw
-            >>> df_native = pl.DataFrame({"a": [[1], [3, 4, None]]})
+            >>> df_native = duckdb.sql("SELECT * FROM VALUES ([1]), ([3, 4, NULL]) df(a)")
             >>> df = nw.from_native(df_native)
             >>> df.with_columns(a_median=nw.col("a").list.median())
             ┌───────────────────────────┐
-            |    Narwhals DataFrame     |
+            |    Narwhals LazyFrame     |
             |---------------------------|
-            |shape: (2, 2)              |
             |┌──────────────┬──────────┐|
-            |│ a            ┆ a_median │|
-            |│ ---          ┆ ---      │|
-            |│ list[i64]    ┆ f64      │|
-            |╞══════════════╪══════════╡|
-            |│ [1]          ┆ 1.0      │|
-            |│ [3, 4, null] ┆ 3.5      │|
+            |│      a       │ a_median │|
+            |│   int32[]    │  double  │|
+            |├──────────────┼──────────┤|
+            |│ [1]          │      1.0 │|
+            |│ [3, 4, NULL] │      3.5 │|
             |└──────────────┴──────────┘|
             └───────────────────────────┘
         """
diff --git a/narwhals/series_list.py b/narwhals/series_list.py
index a771ff1c59..d5ead244e1 100644
--- a/narwhals/series_list.py
+++ b/narwhals/series_list.py
@@ -142,16 +142,17 @@ def max(self) -> SeriesT:
         """Compute the max value of the lists in the array.
 
         Examples:
-            >>> import polars as pl
+            >>> import pyarrow as pa
             >>> import narwhals as nw
-            >>> s_native = pl.Series([[1], [3, 4, None]])
+            >>> s_native = pa.chunked_array([[[1], [3, 4, None]]])
             >>> s = nw.from_native(s_native, series_only=True)
-            >>> s.list.max().to_native()  # doctest: +NORMALIZE_WHITESPACE
-            shape: (2,)
-            Series: '' [i64]
+            >>> s.list.max().to_native()  # doctest: +ELLIPSIS
+            <pyarrow.lib.ChunkedArray object at ...>
             [
-                    1
-                    4
+              [
+                1,
+                4
+              ]
             ]
         """
         return self._narwhals_series._with_compliant(
@@ -182,16 +183,17 @@ def median(self) -> SeriesT:
         """Compute the median value of the lists in the array.
 
         Examples:
-            >>> import polars as pl
+            >>> import pyarrow as pa
             >>> import narwhals as nw
-            >>> s_native = pl.Series([[1], [3, 4, None]])
+            >>> s_native = pa.chunked_array([[[1], [3, 4, None]]])
             >>> s = nw.from_native(s_native, series_only=True)
-            >>> s.list.median().to_native()  # doctest: +NORMALIZE_WHITESPACE
-            shape: (2,)
-            Series: '' [f64]
+            >>> s.list.median().to_native()  # doctest: +ELLIPSIS
+            <pyarrow.lib.ChunkedArray object at ...>
             [
-                    1.0
-                    3.5
+              [
+                1,
+                3
+              ]
             ]
         """
         return self._narwhals_series._with_compliant(
diff --git a/tests/expr_and_series/list/median_test.py b/tests/expr_and_series/list/median_test.py
index e2e0003c94..fc6f6fbad1 100644
--- a/tests/expr_and_series/list/median_test.py
+++ b/tests/expr_and_series/list/median_test.py
@@ -11,11 +11,11 @@
 if TYPE_CHECKING:
     from tests.utils import Constructor, ConstructorEager
 
-data = {"a": [[3, None, 2, 2, 4, None], [-1], None, [None, None, None], []]}
+data = {"a": [[3, None, 2, 2, 4, None], [-1], None, [None, None, None], [], [3, 4, None]]}
 
 
 @pytest.mark.parametrize(
-    ("index", "expected"), [(0, 2.5), (1, -1), (2, None), (3, None), (4, None)]
+    ("index", "expected"), [(0, 2.5), (1, -1), (2, None), (3, None), (4, None), (5, 3.5)]
 )
 def test_median_expr(
     request: pytest.FixtureRequest, constructor: Constructor, index: int, expected: float
@@ -39,11 +39,18 @@ def test_median_expr(
         .collect()["a"]
         .to_list()
     )
-    assert_equal_data({"a": [result[index]]}, {"a": [expected]})
+    if any(
+        backend in str(constructor)
+        for backend in ("pandas", "pyarrow", "pandas[pyarrow]")
+    ) and (index == 5):
+        # there is a mismatch as pyarrow uses an approximate median
+        assert_equal_data({"a": [result[index]]}, {"a": [3]})
+    else:
+        assert_equal_data({"a": [result[index]]}, {"a": [expected]})
 
 
 @pytest.mark.parametrize(
-    ("index", "expected"), [(0, 2.5), (1, -1), (2, None), (3, None), (4, None)]
+    ("index", "expected"), [(0, 2.5), (1, -1), (2, None), (3, None), (4, None), (5, 3.5)]
 )
 def test_median_series(
     request: pytest.FixtureRequest,
@@ -61,4 +68,11 @@ def test_median_series(
         pytest.importorskip("pyarrow")
     df = nw.from_native(constructor_eager(data), eager_only=True)
     result = df["a"].cast(nw.List(nw.Int32())).list.median().to_list()
-    assert_equal_data({"a": [result[index]]}, {"a": [expected]})
+    if any(
+        backend in str(constructor_eager)
+        for backend in ("pandas", "pyarrow", "pandas[pyarrow]")
+    ) and (index == 5):
+        # there is a mismatch as pyarrow uses an approximate median
+        assert_equal_data({"a": [result[index]]}, {"a": [3]})
+    else:
+        assert_equal_data({"a": [result[index]]}, {"a": [expected]})

From 9b53f03f598afa9e5cf1d1eab2b224a65e7009e1 Mon Sep 17 00:00:00 2001
From: raisadz <34237447+raisadz@users.noreply.github.com>
Date: Mon, 1 Dec 2025 13:37:58 +0000
Subject: [PATCH 24/34] xfail median for pyarrow and python below 3.10

---
 narwhals/_arrow/utils.py                  |  8 +++++++-
 tests/expr_and_series/list/median_test.py | 12 ++++++++++++
 2 files changed, 19 insertions(+), 1 deletion(-)

diff --git a/narwhals/_arrow/utils.py b/narwhals/_arrow/utils.py
index 90204efcef..44e799f978 100644
--- a/narwhals/_arrow/utils.py
+++ b/narwhals/_arrow/utils.py
@@ -1,5 +1,6 @@
 from __future__ import annotations
 
+import sys
 from functools import lru_cache
 from typing import TYPE_CHECKING, Any, cast
 
@@ -501,6 +502,9 @@ def list_agg(
     array: ChunkedArrayAny,
     func: Literal["min", "max", "mean", "approximate_median", "sum"],
 ) -> ChunkedArrayAny:
+    if func == "approximate_median" and sys.version_info < (3, 10):
+        msg = f"The minimum supported Python version for {func}"
+        raise NotImplementedError(msg)
     agg = pa.array(
         pa.Table.from_arrays(
             [pc.list_flatten(array), pc.list_parent_indices(array)],
@@ -516,7 +520,9 @@ def list_agg(
         agg = agg.fill_null(lit(0))  # pyright:ignore[reportArgumentType]
         base_array = pc.if_else(non_empty_mask.is_null(), None, 0)
     else:
-        base_array = pc.if_else(non_empty_mask, 0, None)
+        base_array = pc.if_else(
+            non_empty_mask, 0, None
+        )  # zero is just a placeholder which is replaced below
     return pa.chunked_array(
         [
             pc.replace_with_mask(
diff --git a/tests/expr_and_series/list/median_test.py b/tests/expr_and_series/list/median_test.py
index fc6f6fbad1..ce8db05d89 100644
--- a/tests/expr_and_series/list/median_test.py
+++ b/tests/expr_and_series/list/median_test.py
@@ -1,6 +1,7 @@
 from __future__ import annotations
 
 import os
+import sys
 from typing import TYPE_CHECKING
 
 import pytest
@@ -26,12 +27,18 @@ def test_median_expr(
         # sqlframe issue: https://github.com/eakmanrq/sqlframe/issues/548
         # ibis issue: https://github.com/ibis-project/ibis/issues/11788
         request.applymarker(pytest.mark.xfail)
+    if any(
+        backend in str(constructor)
+        for backend in ("pandas", "pyarrow", "pandas[pyarrow]")
+    ) and (sys.version_info < (3, 10)):
+        request.applymarker(pytest.mark.xfail)
     if os.environ.get("SPARK_CONNECT", None) and "pyspark" in str(constructor):
         request.applymarker(pytest.mark.xfail)
     if "pandas" in str(constructor):
         if PANDAS_VERSION < (2, 2):
             pytest.skip()
         pytest.importorskip("pyarrow")
+
     result = (
         nw.from_native(constructor(data))
         .select(nw.col("a").cast(nw.List(nw.Int32())).list.median())
@@ -62,6 +69,11 @@ def test_median_series(
         "polars" in str(constructor_eager) and POLARS_VERSION < (0, 20, 7)
     ):
         request.applymarker(pytest.mark.xfail)
+    if any(
+        backend in str(constructor_eager)
+        for backend in ("pandas", "pyarrow", "pandas[pyarrow]")
+    ) and (sys.version_info < (3, 10)):
+        request.applymarker(pytest.mark.xfail)
     if "pandas" in str(constructor_eager):
         if PANDAS_VERSION < (2, 2):
             pytest.skip()

From a5495766077953459893597857ac175d8b09d5b6 Mon Sep 17 00:00:00 2001
From: raisadz <34237447+raisadz@users.noreply.github.com>
Date: Mon, 1 Dec 2025 13:45:07 +0000
Subject: [PATCH 25/34] add no cover

---
 narwhals/_arrow/utils.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/narwhals/_arrow/utils.py b/narwhals/_arrow/utils.py
index 44e799f978..f4ebc57cd1 100644
--- a/narwhals/_arrow/utils.py
+++ b/narwhals/_arrow/utils.py
@@ -502,7 +502,7 @@ def list_agg(
     array: ChunkedArrayAny,
     func: Literal["min", "max", "mean", "approximate_median", "sum"],
 ) -> ChunkedArrayAny:
-    if func == "approximate_median" and sys.version_info < (3, 10):
+    if func == "approximate_median" and sys.version_info < (3, 10):  # pragma: no cover
         msg = f"The minimum supported Python version for {func}"
         raise NotImplementedError(msg)
     agg = pa.array(

From 76c70ff4efe5b62433e7434a6948d52f863288a2 Mon Sep 17 00:00:00 2001
From: raisadz <34237447+raisadz@users.noreply.github.com>
Date: Mon, 1 Dec 2025 20:42:51 +0000
Subject: [PATCH 26/34] update the error msg

---
 narwhals/_arrow/utils.py | 8 ++++++--
 1 file changed, 6 insertions(+), 2 deletions(-)

diff --git a/narwhals/_arrow/utils.py b/narwhals/_arrow/utils.py
index f4ebc57cd1..add6241147 100644
--- a/narwhals/_arrow/utils.py
+++ b/narwhals/_arrow/utils.py
@@ -502,8 +502,12 @@ def list_agg(
     array: ChunkedArrayAny,
     func: Literal["min", "max", "mean", "approximate_median", "sum"],
 ) -> ChunkedArrayAny:
-    if func == "approximate_median" and sys.version_info < (3, 10):  # pragma: no cover
-        msg = f"The minimum supported Python version for {func}"
+    version = sys.version_info
+    if func == "approximate_median" and version < (3, 10):  # pragma: no cover
+        msg = (
+            f"The minimum supported Python version for {func} is 3.10."
+            f"\nGot: {version.major}.{version.minor}.{version.micro}."
+        )
         raise NotImplementedError(msg)
     agg = pa.array(
         pa.Table.from_arrays(

From 54d7041b52d8750d43add10090eb1e653a97ef76 Mon Sep 17 00:00:00 2001
From: raisadz <34237447+raisadz@users.noreply.github.com>
Date: Tue, 9 Dec 2025 11:13:33 +0000
Subject: [PATCH 27/34] Update narwhals/_spark_like/expr_list.py

Co-authored-by: Francesco Bruzzesi <42817048+FBruzzesi@users.noreply.github.com>
---
 narwhals/_spark_like/expr_list.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/narwhals/_spark_like/expr_list.py b/narwhals/_spark_like/expr_list.py
index 15f566131c..1b2ec723bc 100644
--- a/narwhals/_spark_like/expr_list.py
+++ b/narwhals/_spark_like/expr_list.py
@@ -74,7 +74,7 @@ def func(expr: Column) -> Column:  # pragma: no cover
             size = F.array_size(sorted_expr)
             mid_index = (size / 2).cast("int")
             odd_case = sorted_expr[mid_index]
-            even_case = (sorted_expr[mid_index] - 1 + sorted_expr[mid_index]) / 2
+            even_case = (sorted_expr[mid_index - 1] + sorted_expr[mid_index]) / 2
             return (
                 F.when((size.isNull()) | (size == 0), F.lit(None))
                 .when(size % 2 == 1, odd_case)

From 47f5a4be41a987503247ac21663d6b6bba7cbc8c Mon Sep 17 00:00:00 2001
From: raisadz <34237447+raisadz@users.noreply.github.com>
Date: Tue, 9 Dec 2025 11:16:56 +0000
Subject: [PATCH 28/34] remove a minimum 3.10 python version

---
 narwhals/_arrow/utils.py | 8 --------
 1 file changed, 8 deletions(-)

diff --git a/narwhals/_arrow/utils.py b/narwhals/_arrow/utils.py
index add6241147..169622d63d 100644
--- a/narwhals/_arrow/utils.py
+++ b/narwhals/_arrow/utils.py
@@ -1,6 +1,5 @@
 from __future__ import annotations
 
-import sys
 from functools import lru_cache
 from typing import TYPE_CHECKING, Any, cast
 
@@ -502,13 +501,6 @@ def list_agg(
     array: ChunkedArrayAny,
     func: Literal["min", "max", "mean", "approximate_median", "sum"],
 ) -> ChunkedArrayAny:
-    version = sys.version_info
-    if func == "approximate_median" and version < (3, 10):  # pragma: no cover
-        msg = (
-            f"The minimum supported Python version for {func} is 3.10."
-            f"\nGot: {version.major}.{version.minor}.{version.micro}."
-        )
-        raise NotImplementedError(msg)
     agg = pa.array(
         pa.Table.from_arrays(
             [pc.list_flatten(array), pc.list_parent_indices(array)],

From 9bcdebef6c20556054e37beb1bb4d1e13481d907 Mon Sep 17 00:00:00 2001
From: raisadz <34237447+raisadz@users.noreply.github.com>
Date: Tue, 9 Dec 2025 11:23:13 +0000
Subject: [PATCH 29/34] remove xfail from tests

---
 tests/expr_and_series/list/median_test.py | 11 -----------
 1 file changed, 11 deletions(-)

diff --git a/tests/expr_and_series/list/median_test.py b/tests/expr_and_series/list/median_test.py
index ce8db05d89..0d5e52666a 100644
--- a/tests/expr_and_series/list/median_test.py
+++ b/tests/expr_and_series/list/median_test.py
@@ -1,7 +1,6 @@
 from __future__ import annotations
 
 import os
-import sys
 from typing import TYPE_CHECKING
 
 import pytest
@@ -27,11 +26,6 @@ def test_median_expr(
         # sqlframe issue: https://github.com/eakmanrq/sqlframe/issues/548
         # ibis issue: https://github.com/ibis-project/ibis/issues/11788
         request.applymarker(pytest.mark.xfail)
-    if any(
-        backend in str(constructor)
-        for backend in ("pandas", "pyarrow", "pandas[pyarrow]")
-    ) and (sys.version_info < (3, 10)):
-        request.applymarker(pytest.mark.xfail)
     if os.environ.get("SPARK_CONNECT", None) and "pyspark" in str(constructor):
         request.applymarker(pytest.mark.xfail)
     if "pandas" in str(constructor):
@@ -69,11 +63,6 @@ def test_median_series(
         "polars" in str(constructor_eager) and POLARS_VERSION < (0, 20, 7)
     ):
         request.applymarker(pytest.mark.xfail)
-    if any(
-        backend in str(constructor_eager)
-        for backend in ("pandas", "pyarrow", "pandas[pyarrow]")
-    ) and (sys.version_info < (3, 10)):
-        request.applymarker(pytest.mark.xfail)
     if "pandas" in str(constructor_eager):
         if PANDAS_VERSION < (2, 2):
             pytest.skip()

From 3ab7639c49858b0e720bab8b3741b1cac30ac55e Mon Sep 17 00:00:00 2001
From: raisadz <34237447+raisadz@users.noreply.github.com>
Date: Fri, 12 Dec 2025 10:02:20 +0000
Subject: [PATCH 30/34] skip old Python on windows tests for median

---
 tests/expr_and_series/list/median_test.py | 18 ++++++++++++++++--
 1 file changed, 16 insertions(+), 2 deletions(-)

diff --git a/tests/expr_and_series/list/median_test.py b/tests/expr_and_series/list/median_test.py
index 0d5e52666a..173eac7087 100644
--- a/tests/expr_and_series/list/median_test.py
+++ b/tests/expr_and_series/list/median_test.py
@@ -1,12 +1,13 @@
 from __future__ import annotations
 
 import os
+import sys
 from typing import TYPE_CHECKING
 
 import pytest
 
 import narwhals as nw
-from tests.utils import PANDAS_VERSION, POLARS_VERSION, assert_equal_data
+from tests.utils import PANDAS_VERSION, POLARS_VERSION, assert_equal_data, is_windows
 
 if TYPE_CHECKING:
     from tests.utils import Constructor, ConstructorEager
@@ -32,7 +33,13 @@ def test_median_expr(
         if PANDAS_VERSION < (2, 2):
             pytest.skip()
         pytest.importorskip("pyarrow")
-
+    if (
+        any(backend in str(constructor) for backend in ("pandas", "pyarrow"))
+        and sys.version_info < (3, 10)
+        and is_windows
+    ):
+        reason = "The issue only affects old Python versions on Windows."
+        pytest.skip(reason=reason)
     result = (
         nw.from_native(constructor(data))
         .select(nw.col("a").cast(nw.List(nw.Int32())).list.median())
@@ -67,6 +74,13 @@ def test_median_series(
         if PANDAS_VERSION < (2, 2):
             pytest.skip()
         pytest.importorskip("pyarrow")
+    if (
+        any(backend in str(constructor_eager) for backend in ("pandas", "pyarrow"))
+        and sys.version_info < (3, 10)
+        and is_windows
+    ):
+        reason = "The issue only affects old Python versions on Windows."
+        pytest.skip(reason=reason)
     df = nw.from_native(constructor_eager(data), eager_only=True)
     result = df["a"].cast(nw.List(nw.Int32())).list.median().to_list()
     if any(

From 687c4ae62c25f0b9b2f8ec7c8e31f0b5bce392f8 Mon Sep 17 00:00:00 2001
From: raisadz <34237447+raisadz@users.noreply.github.com>
Date: Fri, 12 Dec 2025 10:11:28 +0000
Subject: [PATCH 31/34] add no cover

---
 tests/expr_and_series/list/median_test.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/tests/expr_and_series/list/median_test.py b/tests/expr_and_series/list/median_test.py
index 173eac7087..801312a673 100644
--- a/tests/expr_and_series/list/median_test.py
+++ b/tests/expr_and_series/list/median_test.py
@@ -37,7 +37,7 @@ def test_median_expr(
         any(backend in str(constructor) for backend in ("pandas", "pyarrow"))
         and sys.version_info < (3, 10)
         and is_windows
-    ):
+    ):  # pragma: no cover
         reason = "The issue only affects old Python versions on Windows."
         pytest.skip(reason=reason)
     result = (
@@ -78,7 +78,7 @@ def test_median_series(
         any(backend in str(constructor_eager) for backend in ("pandas", "pyarrow"))
         and sys.version_info < (3, 10)
         and is_windows
-    ):
+    ):  # pragma: no cover
         reason = "The issue only affects old Python versions on Windows."
         pytest.skip(reason=reason)
     df = nw.from_native(constructor_eager(data), eager_only=True)

From c851f10c70c5c26152052373fdd15fb303a7705c Mon Sep 17 00:00:00 2001
From: raisadz <34237447+raisadz@users.noreply.github.com>
Date: Sat, 13 Dec 2025 13:45:44 +0000
Subject: [PATCH 32/34] modify list_agg as suggested

---
 narwhals/_arrow/utils.py | 16 ++++++++++------
 1 file changed, 10 insertions(+), 6 deletions(-)

diff --git a/narwhals/_arrow/utils.py b/narwhals/_arrow/utils.py
index 169622d63d..c46d400658 100644
--- a/narwhals/_arrow/utils.py
+++ b/narwhals/_arrow/utils.py
@@ -501,28 +501,32 @@ def list_agg(
     array: ChunkedArrayAny,
     func: Literal["min", "max", "mean", "approximate_median", "sum"],
 ) -> ChunkedArrayAny:
+    lit_: Incomplete = lit
+    aggregation = (
+        ("values", "sum", pc.ScalarAggregateOptions(min_count=0))
+        if func == "sum"
+        else ("values", func)
+    )
     agg = pa.array(
         pa.Table.from_arrays(
             [pc.list_flatten(array), pc.list_parent_indices(array)],
             names=["values", "offsets"],
         )
         .group_by("offsets")
-        .aggregate([("values", func)])
+        .aggregate([aggregation])
         .sort_by("offsets")
         .column(f"values_{func}")
     )
     non_empty_mask = pa.array(pc.not_equal(pc.list_value_length(array), lit(0)))
     if func == "sum":
-        agg = agg.fill_null(lit(0))  # pyright:ignore[reportArgumentType]
+        # Make sure sum of empty list is 0.
         base_array = pc.if_else(non_empty_mask.is_null(), None, 0)
     else:
-        base_array = pc.if_else(
-            non_empty_mask, 0, None
-        )  # zero is just a placeholder which is replaced below
+        base_array = pa.repeat(lit_(None, type=agg.type), len(array))
     return pa.chunked_array(
         [
             pc.replace_with_mask(
-                base_array.cast(agg.type),
+                base_array,
                 non_empty_mask.fill_null(False),  # type: ignore[arg-type]
                 agg,
             )

From 310daa65e1dfa6a252e108f8c441bfe8a9bdd6db Mon Sep 17 00:00:00 2001
From: raisadz <34237447+raisadz@users.noreply.github.com>
Date: Sat, 13 Dec 2025 14:11:09 +0000
Subject: [PATCH 33/34] simplify tests

---
 tests/expr_and_series/list/max_test.py    | 30 +++++------------
 tests/expr_and_series/list/mean_test.py   | 30 +++++------------
 tests/expr_and_series/list/median_test.py | 39 ++++++++---------------
 tests/expr_and_series/list/min_test.py    | 28 ++++------------
 tests/expr_and_series/list/sum_test.py    | 30 +++++------------
 5 files changed, 44 insertions(+), 113 deletions(-)

diff --git a/tests/expr_and_series/list/max_test.py b/tests/expr_and_series/list/max_test.py
index 6b79c75c5e..f3cd5db5a1 100644
--- a/tests/expr_and_series/list/max_test.py
+++ b/tests/expr_and_series/list/max_test.py
@@ -11,14 +11,10 @@
     from tests.utils import Constructor, ConstructorEager
 
 data = {"a": [[3, None, 2, 2, 4, None], [-1], None, [None, None, None], []]}
+expected = [4, -1, None, None, None]
 
 
-@pytest.mark.parametrize(
-    ("index", "expected"), [(0, 4), (1, -1), (2, None), (3, None), (4, None)]
-)
-def test_max_expr(
-    request: pytest.FixtureRequest, constructor: Constructor, index: int, expected: int
-) -> None:
+def test_max_expr(request: pytest.FixtureRequest, constructor: Constructor) -> None:
     if any(backend in str(constructor) for backend in ("dask", "cudf", "sqlframe")):
         # sqlframe issue: https://github.com/eakmanrq/sqlframe/issues/548
         request.applymarker(pytest.mark.xfail)
@@ -26,24 +22,14 @@ def test_max_expr(
         if PANDAS_VERSION < (2, 2):
             pytest.skip()
         pytest.importorskip("pyarrow")
-    result = (
-        nw.from_native(constructor(data))
-        .select(nw.col("a").cast(nw.List(nw.Int32())).list.max())
-        .lazy()
-        .collect()["a"]
-        .to_list()
+    result = nw.from_native(constructor(data)).select(
+        nw.col("a").cast(nw.List(nw.Int32())).list.max()
     )
-    assert_equal_data({"a": [result[index]]}, {"a": [expected]})
+    assert_equal_data(result, {"a": expected})
 
 
-@pytest.mark.parametrize(
-    ("index", "expected"), [(0, 4), (1, -1), (2, None), (3, None), (4, None)]
-)
 def test_max_series(
-    request: pytest.FixtureRequest,
-    constructor_eager: ConstructorEager,
-    index: int,
-    expected: int,
+    request: pytest.FixtureRequest, constructor_eager: ConstructorEager
 ) -> None:
     if any(backend in str(constructor_eager) for backend in ("cudf",)):
         request.applymarker(pytest.mark.xfail)
@@ -52,5 +38,5 @@ def test_max_series(
             pytest.skip()
         pytest.importorskip("pyarrow")
     df = nw.from_native(constructor_eager(data), eager_only=True)
-    result = df["a"].cast(nw.List(nw.Int32())).list.max().to_list()
-    assert_equal_data({"a": [result[index]]}, {"a": [expected]})
+    result = df["a"].cast(nw.List(nw.Int32())).list.max()
+    assert_equal_data({"a": result}, {"a": expected})
diff --git a/tests/expr_and_series/list/mean_test.py b/tests/expr_and_series/list/mean_test.py
index d2c42be932..9ff5984b2e 100644
--- a/tests/expr_and_series/list/mean_test.py
+++ b/tests/expr_and_series/list/mean_test.py
@@ -11,14 +11,10 @@
     from tests.utils import Constructor, ConstructorEager
 
 data = {"a": [[3, None, 2, 2, 4, None], [-1], None, [None, None, None], []]}
+expected = [2.75, -1, None, None, None]
 
 
-@pytest.mark.parametrize(
-    ("index", "expected"), [(0, 2.75), (1, -1), (2, None), (3, None), (4, None)]
-)
-def test_mean_expr(
-    request: pytest.FixtureRequest, constructor: Constructor, index: int, expected: float
-) -> None:
+def test_mean_expr(request: pytest.FixtureRequest, constructor: Constructor) -> None:
     if any(backend in str(constructor) for backend in ("dask", "cudf", "sqlframe")):
         # sqlframe issue: https://github.com/eakmanrq/sqlframe/issues/548
         request.applymarker(pytest.mark.xfail)
@@ -26,24 +22,14 @@ def test_mean_expr(
         if PANDAS_VERSION < (2, 2):
             pytest.skip()
         pytest.importorskip("pyarrow")
-    result = (
-        nw.from_native(constructor(data))
-        .select(nw.col("a").cast(nw.List(nw.Int32())).list.mean())
-        .lazy()
-        .collect()["a"]
-        .to_list()
+    result = nw.from_native(constructor(data)).select(
+        nw.col("a").cast(nw.List(nw.Int32())).list.mean()
     )
-    assert_equal_data({"a": [result[index]]}, {"a": [expected]})
+    assert_equal_data(result, {"a": expected})
 
 
-@pytest.mark.parametrize(
-    ("index", "expected"), [(0, 2.75), (1, -1), (2, None), (3, None), (4, None)]
-)
 def test_mean_series(
-    request: pytest.FixtureRequest,
-    constructor_eager: ConstructorEager,
-    index: int,
-    expected: float,
+    request: pytest.FixtureRequest, constructor_eager: ConstructorEager
 ) -> None:
     if any(backend in str(constructor_eager) for backend in ("cudf",)):
         request.applymarker(pytest.mark.xfail)
@@ -52,5 +38,5 @@ def test_mean_series(
             pytest.skip()
         pytest.importorskip("pyarrow")
     df = nw.from_native(constructor_eager(data), eager_only=True)
-    result = df["a"].cast(nw.List(nw.Int32())).list.mean().to_list()
-    assert_equal_data({"a": [result[index]]}, {"a": [expected]})
+    result = df["a"].cast(nw.List(nw.Int32())).list.mean()
+    assert_equal_data({"a": result}, {"a": expected})
diff --git a/tests/expr_and_series/list/median_test.py b/tests/expr_and_series/list/median_test.py
index 801312a673..b1baa242d7 100644
--- a/tests/expr_and_series/list/median_test.py
+++ b/tests/expr_and_series/list/median_test.py
@@ -13,14 +13,11 @@
     from tests.utils import Constructor, ConstructorEager
 
 data = {"a": [[3, None, 2, 2, 4, None], [-1], None, [None, None, None], [], [3, 4, None]]}
+expected = [2.5, -1, None, None, None, 3.5]
+expected_pyarrow = [2.5, -1, None, None, None, 3]
 
 
-@pytest.mark.parametrize(
-    ("index", "expected"), [(0, 2.5), (1, -1), (2, None), (3, None), (4, None), (5, 3.5)]
-)
-def test_median_expr(
-    request: pytest.FixtureRequest, constructor: Constructor, index: int, expected: float
-) -> None:
+def test_median_expr(request: pytest.FixtureRequest, constructor: Constructor) -> None:
     if any(
         backend in str(constructor) for backend in ("dask", "cudf", "sqlframe", "ibis")
     ) or ("polars" in str(constructor) and POLARS_VERSION < (0, 20, 7)):
@@ -40,31 +37,21 @@ def test_median_expr(
     ):  # pragma: no cover
         reason = "The issue only affects old Python versions on Windows."
         pytest.skip(reason=reason)
-    result = (
-        nw.from_native(constructor(data))
-        .select(nw.col("a").cast(nw.List(nw.Int32())).list.median())
-        .lazy()
-        .collect()["a"]
-        .to_list()
+    result = nw.from_native(constructor(data)).select(
+        nw.col("a").cast(nw.List(nw.Int32())).list.median()
     )
     if any(
         backend in str(constructor)
         for backend in ("pandas", "pyarrow", "pandas[pyarrow]")
-    ) and (index == 5):
+    ):
         # there is a mismatch as pyarrow uses an approximate median
-        assert_equal_data({"a": [result[index]]}, {"a": [3]})
+        assert_equal_data(result, {"a": expected_pyarrow})
     else:
-        assert_equal_data({"a": [result[index]]}, {"a": [expected]})
+        assert_equal_data(result, {"a": expected})
 
 
-@pytest.mark.parametrize(
-    ("index", "expected"), [(0, 2.5), (1, -1), (2, None), (3, None), (4, None), (5, 3.5)]
-)
 def test_median_series(
-    request: pytest.FixtureRequest,
-    constructor_eager: ConstructorEager,
-    index: int,
-    expected: float,
+    request: pytest.FixtureRequest, constructor_eager: ConstructorEager
 ) -> None:
     if any(backend in str(constructor_eager) for backend in ("cudf",)) or (
         "polars" in str(constructor_eager) and POLARS_VERSION < (0, 20, 7)
@@ -82,12 +69,12 @@ def test_median_series(
         reason = "The issue only affects old Python versions on Windows."
         pytest.skip(reason=reason)
     df = nw.from_native(constructor_eager(data), eager_only=True)
-    result = df["a"].cast(nw.List(nw.Int32())).list.median().to_list()
+    result = df["a"].cast(nw.List(nw.Int32())).list.median()
     if any(
         backend in str(constructor_eager)
         for backend in ("pandas", "pyarrow", "pandas[pyarrow]")
-    ) and (index == 5):
+    ):
         # there is a mismatch as pyarrow uses an approximate median
-        assert_equal_data({"a": [result[index]]}, {"a": [3]})
+        assert_equal_data({"a": result}, {"a": expected_pyarrow})
     else:
-        assert_equal_data({"a": [result[index]]}, {"a": [expected]})
+        assert_equal_data({"a": result}, {"a": expected})
diff --git a/tests/expr_and_series/list/min_test.py b/tests/expr_and_series/list/min_test.py
index 87d3fe1ca7..2039f7de56 100644
--- a/tests/expr_and_series/list/min_test.py
+++ b/tests/expr_and_series/list/min_test.py
@@ -11,38 +11,24 @@
     from tests.utils import Constructor, ConstructorEager
 
 data = {"a": [[3, None, 2, 2, 4, None], [-1], None, [None, None, None], []]}
+expected = [2, -1, None, None, None]
 
 
-@pytest.mark.parametrize(
-    ("index", "expected"), [(0, 2), (1, -1), (2, None), (3, None), (4, None)]
-)
-def test_min_expr(
-    request: pytest.FixtureRequest, constructor: Constructor, index: int, expected: int
-) -> None:
+def test_min_expr(request: pytest.FixtureRequest, constructor: Constructor) -> None:
     if any(backend in str(constructor) for backend in ("dask", "cudf")):
         request.applymarker(pytest.mark.xfail)
     if "pandas" in str(constructor):
         if PANDAS_VERSION < (2, 2):
             pytest.skip()
         pytest.importorskip("pyarrow")
-    result = (
-        nw.from_native(constructor(data))
-        .select(nw.col("a").cast(nw.List(nw.Int32())).list.min())
-        .lazy()
-        .collect()["a"]
-        .to_list()
+    result = nw.from_native(constructor(data)).select(
+        nw.col("a").cast(nw.List(nw.Int32())).list.min()
     )
-    assert_equal_data({"a": [result[index]]}, {"a": [expected]})
+    assert_equal_data(result, {"a": expected})
 
 
-@pytest.mark.parametrize(
-    ("index", "expected"), [(0, 2), (1, -1), (2, None), (3, None), (4, None)]
-)
 def test_min_series(
-    request: pytest.FixtureRequest,
-    constructor_eager: ConstructorEager,
-    index: int,
-    expected: int,
+    request: pytest.FixtureRequest, constructor_eager: ConstructorEager
 ) -> None:
     if any(backend in str(constructor_eager) for backend in ("cudf",)):
         request.applymarker(pytest.mark.xfail)
@@ -52,4 +38,4 @@ def test_min_series(
         pytest.importorskip("pyarrow")
     df = nw.from_native(constructor_eager(data), eager_only=True)
     result = df["a"].cast(nw.List(nw.Int32())).list.min().to_list()
-    assert_equal_data({"a": [result[index]]}, {"a": [expected]})
+    assert_equal_data({"a": result}, {"a": expected})
diff --git a/tests/expr_and_series/list/sum_test.py b/tests/expr_and_series/list/sum_test.py
index 90b78bac58..1f0ff7729e 100644
--- a/tests/expr_and_series/list/sum_test.py
+++ b/tests/expr_and_series/list/sum_test.py
@@ -11,14 +11,10 @@
     from tests.utils import Constructor, ConstructorEager
 
 data = {"a": [[3, None, 2, 2, 4, None], [-1], None, [None, None, None], []]}
+expected = [11, -1, None, 0, 0]
 
 
-@pytest.mark.parametrize(
-    ("index", "expected"), [(0, 11), (1, -1), (2, None), (3, 0), (4, 0)]
-)
-def test_sum_expr(
-    request: pytest.FixtureRequest, constructor: Constructor, index: int, expected: int
-) -> None:
+def test_sum_expr(request: pytest.FixtureRequest, constructor: Constructor) -> None:
     if any(backend in str(constructor) for backend in ("dask", "cudf", "sqlframe")):
         # sqlframe issue: https://github.com/eakmanrq/sqlframe/issues/548
         request.applymarker(pytest.mark.xfail)
@@ -29,24 +25,14 @@ def test_sum_expr(
     if "duckdb" in str(constructor) and DUCKDB_VERSION < (1, 2):
         reason = "version too old, duckdb 1.2 required for LambdaExpression."
         pytest.skip(reason=reason)
-    result = (
-        nw.from_native(constructor(data))
-        .select(nw.col("a").cast(nw.List(nw.Int32())).list.sum())
-        .lazy()
-        .collect()["a"]
-        .to_list()
+    result = nw.from_native(constructor(data)).select(
+        nw.col("a").cast(nw.List(nw.Int32())).list.sum()
     )
-    assert_equal_data({"a": [result[index]]}, {"a": [expected]})
+    assert_equal_data(result, {"a": expected})
 
 
-@pytest.mark.parametrize(
-    ("index", "expected"), [(0, 11), (1, -1), (2, None), (3, 0), (4, 0)]
-)
 def test_sum_series(
-    request: pytest.FixtureRequest,
-    constructor_eager: ConstructorEager,
-    index: int,
-    expected: int,
+    request: pytest.FixtureRequest, constructor_eager: ConstructorEager
 ) -> None:
     if any(backend in str(constructor_eager) for backend in ("cudf",)):
         request.applymarker(pytest.mark.xfail)
@@ -55,5 +41,5 @@ def test_sum_series(
             pytest.skip()
         pytest.importorskip("pyarrow")
     df = nw.from_native(constructor_eager(data), eager_only=True)
-    result = df["a"].cast(nw.List(nw.Int32())).list.sum().to_list()
-    assert_equal_data({"a": [result[index]]}, {"a": [expected]})
+    result = df["a"].cast(nw.List(nw.Int32())).list.sum()
+    assert_equal_data({"a": result}, {"a": expected})

From 398c3509ba7e64b146a696c0b891f756d6394355 Mon Sep 17 00:00:00 2001
From: raisadz <34237447+raisadz@users.noreply.github.com>
Date: Sat, 13 Dec 2025 14:22:03 +0000
Subject: [PATCH 34/34] fix typing

---
 narwhals/_arrow/utils.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/narwhals/_arrow/utils.py b/narwhals/_arrow/utils.py
index c46d400658..dbd8aa6c62 100644
--- a/narwhals/_arrow/utils.py
+++ b/narwhals/_arrow/utils.py
@@ -503,7 +503,7 @@ def list_agg(
 ) -> ChunkedArrayAny:
     lit_: Incomplete = lit
     aggregation = (
-        ("values", "sum", pc.ScalarAggregateOptions(min_count=0))
+        ("values", func, pc.ScalarAggregateOptions(min_count=0))
         if func == "sum"
         else ("values", func)
     )