diff --git a/docs/api-reference/expr_struct.md b/docs/api-reference/expr_struct.md new file mode 100644 index 0000000000..bfc093ed8a --- /dev/null +++ b/docs/api-reference/expr_struct.md @@ -0,0 +1,9 @@ +# `narwhals.Expr.struct` + +::: narwhals.expr.ExprStructNamespace + handler: python + options: + members: + - field + show_source: false + show_bases: false diff --git a/docs/api-reference/index.md b/docs/api-reference/index.md index 3c4a0f42b2..72cd448472 100644 --- a/docs/api-reference/index.md +++ b/docs/api-reference/index.md @@ -8,6 +8,7 @@ - [narwhals.Expr.list](expr_list.md) - [narwhals.Expr.name](expr_name.md) - [narwhals.Expr.str](expr_str.md) +- [narwhals.Expr.struct](expr_struct.md) - [narwhals.GroupBy](group_by.md) - [narwhals.LazyGroupBy](lazy_group_by.md) - [narwhals.LazyFrame](lazyframe.md) @@ -17,6 +18,7 @@ - [narwhals.Series.dt](series_dt.md) - [narwhals.Series.list](series_list.md) - [narwhals.Series.str](series_str.md) +- [narwhals.Series.struct](series_struct.md) - [narwhals.dependencies](dependencies.md) - [narwhals.Implementation](implementation.md) - [narwhals.dtypes](dtypes.md) diff --git a/docs/api-reference/series_struct.md b/docs/api-reference/series_struct.md new file mode 100644 index 0000000000..638376dad3 --- /dev/null +++ b/docs/api-reference/series_struct.md @@ -0,0 +1,9 @@ +# `narwhals.Series.struct` + +::: narwhals.series.SeriesStructNamespace + handler: python + options: + members: + - field + show_source: false + show_bases: false diff --git a/mkdocs.yml b/mkdocs.yml index ad774340fd..db26af50ce 100644 --- a/mkdocs.yml +++ b/mkdocs.yml @@ -34,11 +34,13 @@ nav: - Supported Expr.list methods: api-completeness/expr_list.md - Supported Expr.name methods: api-completeness/expr_name.md - Supported Expr.str methods: api-completeness/expr_str.md + - Supported Expr.struct methods: api-completeness/expr_struct.md - Supported Series methods: api-completeness/series.md - Supported Series.cat methods: api-completeness/series_cat.md - Supported Series.dt methods: api-completeness/series_dt.md - Supported Series.list methods: api-completeness/series_list.md - Supported Series.str methods: api-completeness/series_str.md + - Supported Series.struct methods: api-completeness/series_struct.md - API Reference: - api-reference/index.md - api-reference/narwhals.md @@ -49,6 +51,7 @@ nav: - api-reference/expr_list.md - api-reference/expr_name.md - api-reference/expr_str.md + - api-reference/expr_struct.md - api-reference/group_by.md - api-reference/lazy_group_by.md - api-reference/lazyframe.md @@ -58,6 +61,7 @@ nav: - api-reference/series_dt.md - api-reference/series_list.md - api-reference/series_str.md + - api-reference/series_struct.md - api-reference/dependencies.md - api-reference/implementation.md - api-reference/dtypes.md diff --git a/narwhals/_arrow/expr.py b/narwhals/_arrow/expr.py index 5b80861752..805c977f75 100644 --- a/narwhals/_arrow/expr.py +++ b/narwhals/_arrow/expr.py @@ -14,6 +14,7 @@ from narwhals._arrow.expr_list import ArrowExprListNamespace from narwhals._arrow.expr_name import ArrowExprNameNamespace from narwhals._arrow.expr_str import ArrowExprStringNamespace +from narwhals._arrow.expr_struct import ArrowExprStructNamespace from narwhals._arrow.series import ArrowSeries from narwhals._expression_parsing import ExprKind from narwhals._expression_parsing import evaluate_output_names_and_aliases @@ -635,3 +636,7 @@ def name(self: Self) -> ArrowExprNameNamespace: @property def list(self: Self) -> ArrowExprListNamespace: return ArrowExprListNamespace(self) + + @property + def struct(self: Self) -> ArrowExprStructNamespace: + return ArrowExprStructNamespace(self) diff --git a/narwhals/_arrow/expr_struct.py b/narwhals/_arrow/expr_struct.py new file mode 100644 index 0000000000..4d4f1863d2 --- /dev/null +++ b/narwhals/_arrow/expr_struct.py @@ -0,0 +1,23 @@ +from __future__ import annotations + +from typing import TYPE_CHECKING + +from narwhals._expression_parsing import reuse_series_namespace_implementation + +if TYPE_CHECKING: + from typing_extensions import Self + + from narwhals._arrow.expr import ArrowExpr + + +class ArrowExprStructNamespace: + def __init__(self: Self, expr: ArrowExpr) -> None: + self._compliant_expr = expr + + def field(self: Self, name: str) -> ArrowExpr: + return reuse_series_namespace_implementation( + self._compliant_expr, + "struct", + "field", + name=name, + ).alias(name) diff --git a/narwhals/_arrow/series.py b/narwhals/_arrow/series.py index de0d35337f..0aa2593ab2 100644 --- a/narwhals/_arrow/series.py +++ b/narwhals/_arrow/series.py @@ -16,6 +16,7 @@ from narwhals._arrow.series_dt import ArrowSeriesDateTimeNamespace from narwhals._arrow.series_list import ArrowSeriesListNamespace from narwhals._arrow.series_str import ArrowSeriesStringNamespace +from narwhals._arrow.series_struct import ArrowSeriesStructNamespace from narwhals._arrow.utils import cast_for_truediv from narwhals._arrow.utils import chunked_array from narwhals._arrow.utils import extract_native @@ -1213,3 +1214,7 @@ def str(self: Self) -> ArrowSeriesStringNamespace: @property def list(self: Self) -> ArrowSeriesListNamespace: return ArrowSeriesListNamespace(self) + + @property + def struct(self: Self) -> ArrowSeriesStructNamespace: + return ArrowSeriesStructNamespace(self) diff --git a/narwhals/_arrow/series_struct.py b/narwhals/_arrow/series_struct.py new file mode 100644 index 0000000000..e79147d066 --- /dev/null +++ b/narwhals/_arrow/series_struct.py @@ -0,0 +1,20 @@ +from __future__ import annotations + +from typing import TYPE_CHECKING + +import pyarrow.compute as pc + +if TYPE_CHECKING: + from typing_extensions import Self + + from narwhals._arrow.series import ArrowSeries + + +class ArrowSeriesStructNamespace: + def __init__(self: Self, series: ArrowSeries) -> None: + self._compliant_series: ArrowSeries = series + + def field(self: Self, name: str) -> ArrowSeries: + return self._compliant_series._from_native_series( + pc.struct_field(self._compliant_series._native_series, name), + ).alias(name) diff --git a/narwhals/_dask/expr.py b/narwhals/_dask/expr.py index 1158830f8d..82ed345ce8 100644 --- a/narwhals/_dask/expr.py +++ b/narwhals/_dask/expr.py @@ -652,3 +652,4 @@ def name(self: Self) -> DaskExprNameNamespace: cat = not_implemented() # pyright: ignore[reportAssignmentType] list = not_implemented() # pyright: ignore[reportAssignmentType] + struct = not_implemented() # pyright: ignore[reportAssignmentType] diff --git a/narwhals/_duckdb/expr.py b/narwhals/_duckdb/expr.py index c47fa8bf66..6a6104f893 100644 --- a/narwhals/_duckdb/expr.py +++ b/narwhals/_duckdb/expr.py @@ -18,6 +18,7 @@ from narwhals._duckdb.expr_list import DuckDBExprListNamespace from narwhals._duckdb.expr_name import DuckDBExprNameNamespace from narwhals._duckdb.expr_str import DuckDBExprStringNamespace +from narwhals._duckdb.expr_struct import DuckDBExprStructNamespace from narwhals._duckdb.utils import lit from narwhals._duckdb.utils import maybe_evaluate_expr from narwhals._duckdb.utils import narwhals_to_native_dtype @@ -484,6 +485,10 @@ def name(self: Self) -> DuckDBExprNameNamespace: def list(self: Self) -> DuckDBExprListNamespace: return DuckDBExprListNamespace(self) + @property + def struct(self: Self) -> DuckDBExprStructNamespace: + return DuckDBExprStructNamespace(self) + arg_min = not_implemented() arg_max = not_implemented() arg_true = not_implemented() diff --git a/narwhals/_duckdb/expr_struct.py b/narwhals/_duckdb/expr_struct.py new file mode 100644 index 0000000000..1f750e1326 --- /dev/null +++ b/narwhals/_duckdb/expr_struct.py @@ -0,0 +1,23 @@ +from __future__ import annotations + +from typing import TYPE_CHECKING + +from duckdb import FunctionExpression + +from narwhals._duckdb.utils import lit + +if TYPE_CHECKING: + from typing_extensions import Self + + from narwhals._duckdb.expr import DuckDBExpr + + +class DuckDBExprStructNamespace: + def __init__(self: Self, expr: DuckDBExpr) -> None: + self._compliant_expr = expr + + def field(self: Self, name: str) -> DuckDBExpr: + return self._compliant_expr._from_call( + lambda _input: FunctionExpression("struct_extract", _input, lit(name)), + "field", + ).alias(name) diff --git a/narwhals/_expression_parsing.py b/narwhals/_expression_parsing.py index 96a6f91441..2f1920e18c 100644 --- a/narwhals/_expression_parsing.py +++ b/narwhals/_expression_parsing.py @@ -220,6 +220,7 @@ def reuse_series_namespace_implementation( kwargs: keyword arguments to pass to function. """ plx = expr.__narwhals_namespace__() + return plx._create_expr_from_callable( # type: ignore[return-value] lambda df: [ getattr(getattr(series, series_namespace), attr)(**kwargs) diff --git a/narwhals/_pandas_like/expr.py b/narwhals/_pandas_like/expr.py index f23b199674..586a129b59 100644 --- a/narwhals/_pandas_like/expr.py +++ b/narwhals/_pandas_like/expr.py @@ -17,6 +17,7 @@ from narwhals._pandas_like.expr_list import PandasLikeExprListNamespace from narwhals._pandas_like.expr_name import PandasLikeExprNameNamespace from narwhals._pandas_like.expr_str import PandasLikeExprStringNamespace +from narwhals._pandas_like.expr_struct import PandasLikeExprStructNamespace from narwhals._pandas_like.group_by import AGGREGATIONS_TO_PANDAS_EQUIVALENT from narwhals._pandas_like.series import PandasLikeSeries from narwhals.dependencies import get_numpy @@ -753,3 +754,7 @@ def name(self: Self) -> PandasLikeExprNameNamespace: @property def list(self: Self) -> PandasLikeExprListNamespace: return PandasLikeExprListNamespace(self) + + @property + def struct(self: Self) -> PandasLikeExprStructNamespace: + return PandasLikeExprStructNamespace(self) diff --git a/narwhals/_pandas_like/expr_struct.py b/narwhals/_pandas_like/expr_struct.py new file mode 100644 index 0000000000..997ce1dab7 --- /dev/null +++ b/narwhals/_pandas_like/expr_struct.py @@ -0,0 +1,23 @@ +from __future__ import annotations + +from typing import TYPE_CHECKING + +from narwhals._expression_parsing import reuse_series_namespace_implementation + +if TYPE_CHECKING: + from typing_extensions import Self + + from narwhals._pandas_like.expr import PandasLikeExpr + + +class PandasLikeExprStructNamespace: + def __init__(self: Self, expr: PandasLikeExpr) -> None: + self._compliant_expr = expr + + def field(self, name: str) -> PandasLikeExpr: + return reuse_series_namespace_implementation( + self._compliant_expr, + "struct", + "field", + name=name, + ).alias(name) diff --git a/narwhals/_pandas_like/series.py b/narwhals/_pandas_like/series.py index 7a0a0d3c16..23873bb221 100644 --- a/narwhals/_pandas_like/series.py +++ b/narwhals/_pandas_like/series.py @@ -15,6 +15,7 @@ from narwhals._pandas_like.series_dt import PandasLikeSeriesDateTimeNamespace from narwhals._pandas_like.series_list import PandasLikeSeriesListNamespace from narwhals._pandas_like.series_str import PandasLikeSeriesStringNamespace +from narwhals._pandas_like.series_struct import PandasLikeSeriesStructNamespace from narwhals._pandas_like.utils import align_and_extract_native from narwhals._pandas_like.utils import get_dtype_backend from narwhals._pandas_like.utils import narwhals_to_native_dtype @@ -1131,3 +1132,7 @@ def cat(self: Self) -> PandasLikeSeriesCatNamespace: @property def list(self: Self) -> PandasLikeSeriesListNamespace: return PandasLikeSeriesListNamespace(self) + + @property + def struct(self: Self) -> PandasLikeSeriesStructNamespace: + return PandasLikeSeriesStructNamespace(self) diff --git a/narwhals/_pandas_like/series_list.py b/narwhals/_pandas_like/series_list.py index f7142909ab..da3eea42a3 100644 --- a/narwhals/_pandas_like/series_list.py +++ b/narwhals/_pandas_like/series_list.py @@ -16,6 +16,9 @@ class PandasLikeSeriesListNamespace: def __init__(self: Self, series: PandasLikeSeries) -> None: + if not hasattr(series._native_series, "list"): + msg = "Series must be of PyArrow List type to support list namespace." + raise TypeError(msg) self._compliant_series = series def len(self: Self) -> PandasLikeSeries: diff --git a/narwhals/_pandas_like/series_struct.py b/narwhals/_pandas_like/series_struct.py new file mode 100644 index 0000000000..a9d602b0f2 --- /dev/null +++ b/narwhals/_pandas_like/series_struct.py @@ -0,0 +1,21 @@ +from __future__ import annotations + +from typing import TYPE_CHECKING + +if TYPE_CHECKING: + from typing_extensions import Self + + from narwhals._pandas_like.series import PandasLikeSeries + + +class PandasLikeSeriesStructNamespace: + def __init__(self: Self, series: PandasLikeSeries) -> None: + if not hasattr(series._native_series, "struct"): + msg = "Series must be of PyArrow Struct type to support struct namespace." + raise TypeError(msg) + self._compliant_series = series + + def field(self: Self, name: str) -> PandasLikeSeries: + return self._compliant_series._from_native_series( + self._compliant_series._native_series.struct.field(name) + ).alias(name) diff --git a/narwhals/_polars/expr.py b/narwhals/_polars/expr.py index fbb9644730..3bcd31bc83 100644 --- a/narwhals/_polars/expr.py +++ b/narwhals/_polars/expr.py @@ -316,6 +316,10 @@ def name(self: Self) -> PolarsExprNameNamespace: def list(self: Self) -> PolarsExprListNamespace: return PolarsExprListNamespace(self) + @property + def struct(self: Self) -> PolarsExprStructNamespace: + return PolarsExprStructNamespace(self) + class PolarsExprDateTimeNamespace: def __init__(self: Self, expr: PolarsExpr) -> None: @@ -401,3 +405,19 @@ def func(*args: Any, **kwargs: Any) -> PolarsExpr: ) return func + + +class PolarsExprStructNamespace: + def __init__(self: Self, expr: PolarsExpr) -> None: + self._expr = expr + + def __getattr__( + self: Self, attr: str + ) -> Callable[[Any], PolarsExpr]: # pragma: no cover + def func(*args: Any, **kwargs: Any) -> PolarsExpr: + args, kwargs = extract_args_kwargs(args, kwargs) # type: ignore[assignment] + return self._expr._from_native_expr( + getattr(self._expr._native_expr.struct, attr)(*args, **kwargs) + ) + + return func diff --git a/narwhals/_polars/series.py b/narwhals/_polars/series.py index 1db4d8bbdd..fc1f1e27a8 100644 --- a/narwhals/_polars/series.py +++ b/narwhals/_polars/series.py @@ -580,6 +580,10 @@ def cat(self: Self) -> PolarsSeriesCatNamespace: def list(self: Self) -> PolarsSeriesListNamespace: return PolarsSeriesListNamespace(self) + @property + def struct(self: Self) -> PolarsSeriesStructNamespace: + return PolarsSeriesStructNamespace(self) + class PolarsSeriesDateTimeNamespace: def __init__(self: Self, series: PolarsSeries) -> None: @@ -650,3 +654,19 @@ def func(*args: Any, **kwargs: Any) -> Any: ) return func + + +class PolarsSeriesStructNamespace: + def __init__(self: Self, series: PolarsSeries) -> None: + self._compliant_series = series + + def __getattr__(self: Self, attr: str) -> Any: + def func(*args: Any, **kwargs: Any) -> Any: + args, kwargs = extract_args_kwargs(args, kwargs) # type: ignore[assignment] + return self._compliant_series._from_native_series( + getattr(self._compliant_series._native_series.struct, attr)( + *args, **kwargs + ) + ) + + return func diff --git a/narwhals/_spark_like/expr.py b/narwhals/_spark_like/expr.py index 32f1923ede..45a801273d 100644 --- a/narwhals/_spark_like/expr.py +++ b/narwhals/_spark_like/expr.py @@ -14,6 +14,7 @@ from narwhals._spark_like.expr_list import SparkLikeExprListNamespace from narwhals._spark_like.expr_name import SparkLikeExprNameNamespace from narwhals._spark_like.expr_str import SparkLikeExprStringNamespace +from narwhals._spark_like.expr_struct import SparkLikeExprStructNamespace from narwhals._spark_like.utils import maybe_evaluate_expr from narwhals._spark_like.utils import narwhals_to_native_dtype from narwhals.dependencies import get_pyspark @@ -617,6 +618,10 @@ def dt(self: Self) -> SparkLikeExprDateTimeNamespace: def list(self: Self) -> SparkLikeExprListNamespace: return SparkLikeExprListNamespace(self) + @property + def struct(self: Self) -> SparkLikeExprStructNamespace: + return SparkLikeExprStructNamespace(self) + arg_min = not_implemented() arg_max = not_implemented() arg_true = not_implemented() diff --git a/narwhals/_spark_like/expr_struct.py b/narwhals/_spark_like/expr_struct.py new file mode 100644 index 0000000000..f7bdd10e63 --- /dev/null +++ b/narwhals/_spark_like/expr_struct.py @@ -0,0 +1,20 @@ +from __future__ import annotations + +from typing import TYPE_CHECKING + +if TYPE_CHECKING: + from pyspark.sql import Column + from typing_extensions import Self + + from narwhals._spark_like.expr import SparkLikeExpr + + +class SparkLikeExprStructNamespace: + def __init__(self: Self, expr: SparkLikeExpr) -> None: + self._compliant_expr = expr + + def field(self: Self, name: str) -> SparkLikeExpr: + def func(_input: Column) -> Column: + return _input.getField(name) + + return self._compliant_expr._from_call(func, "field").alias(name) diff --git a/narwhals/expr.py b/narwhals/expr.py index d7aae157ec..0925b03a8b 100644 --- a/narwhals/expr.py +++ b/narwhals/expr.py @@ -20,6 +20,7 @@ from narwhals.expr_list import ExprListNamespace from narwhals.expr_name import ExprNameNamespace from narwhals.expr_str import ExprStringNamespace +from narwhals.expr_struct import ExprStructNamespace from narwhals.translate import to_native from narwhals.utils import _validate_rolling_arguments from narwhals.utils import flatten @@ -2479,6 +2480,10 @@ def name(self: Self) -> ExprNameNamespace[Self]: def list(self: Self) -> ExprListNamespace[Self]: return ExprListNamespace(self) + @property + def struct(self: Self) -> ExprStructNamespace[Self]: + return ExprStructNamespace(self) + __all__ = [ "Expr", diff --git a/narwhals/expr_struct.py b/narwhals/expr_struct.py new file mode 100644 index 0000000000..eb6f33e0fc --- /dev/null +++ b/narwhals/expr_struct.py @@ -0,0 +1,58 @@ +from __future__ import annotations + +from typing import TYPE_CHECKING +from typing import Generic +from typing import TypeVar + +if TYPE_CHECKING: + from typing_extensions import Self + + from narwhals.expr import Expr + +ExprT = TypeVar("ExprT", bound="Expr") + + +class ExprStructNamespace(Generic[ExprT]): + def __init__(self: Self, expr: ExprT) -> None: + self._expr = expr + + def field(self: Self, name: str) -> ExprT: + r"""Retrieve a Struct field as a new expression. + + Arguments: + name: Name of the struct field to retrieve. + + Returns: + A new expression. + + Examples: + >>> import polars as pl + >>> import narwhals as nw + >>> df_native = pl.DataFrame( + ... { + ... "user": [ + ... {"id": "0", "name": "john"}, + ... {"id": "1", "name": "jane"}, + ... ] + ... } + ... ) + >>> df = nw.from_native(df_native) + >>> df.with_columns(name=nw.col("user").struct.field("name")) + ┌───────────────────────┐ + | Narwhals DataFrame | + |-----------------------| + |shape: (2, 2) | + |┌──────────────┬──────┐| + |│ user ┆ name │| + |│ --- ┆ --- │| + |│ struct[2] ┆ str │| + |╞══════════════╪══════╡| + |│ {"0","john"} ┆ john │| + |│ {"1","jane"} ┆ jane │| + |└──────────────┴──────┘| + └───────────────────────┘ + """ + return self._expr.__class__( + lambda plx: self._expr._to_compliant_expr(plx).struct.field(name), + self._expr._metadata, + ) diff --git a/narwhals/series.py b/narwhals/series.py index 749b5d1afa..3879ab4494 100644 --- a/narwhals/series.py +++ b/narwhals/series.py @@ -17,6 +17,7 @@ from narwhals.series_dt import SeriesDateTimeNamespace from narwhals.series_list import SeriesListNamespace from narwhals.series_str import SeriesStringNamespace +from narwhals.series_struct import SeriesStructNamespace from narwhals.translate import to_native from narwhals.typing import IntoSeriesT from narwhals.utils import _validate_rolling_arguments @@ -2631,3 +2632,7 @@ def cat(self: Self) -> SeriesCatNamespace[Self]: @property def list(self: Self) -> SeriesListNamespace[Self]: return SeriesListNamespace(self) + + @property + def struct(self: Self) -> SeriesStructNamespace[Self]: + return SeriesStructNamespace(self) diff --git a/narwhals/series_struct.py b/narwhals/series_struct.py new file mode 100644 index 0000000000..1310beaffc --- /dev/null +++ b/narwhals/series_struct.py @@ -0,0 +1,44 @@ +from __future__ import annotations + +from typing import TYPE_CHECKING +from typing import Any +from typing import Generic +from typing import TypeVar + +if TYPE_CHECKING: + from typing_extensions import Self + + from narwhals.series import Series + +SeriesT = TypeVar("SeriesT", bound="Series[Any]") + + +class SeriesStructNamespace(Generic[SeriesT]): + def __init__(self: Self, series: SeriesT) -> None: + self._narwhals_series = series + + def field(self: Self, name: str) -> SeriesT: + r"""Retrieve a Struct field as a new expression. + + Arguments: + name: Name of the struct field to retrieve. + + Returns: + A new Series. + + Examples: + >>> import polars as pl + >>> import narwhals as nw + >>> s_native = pl.Series( + ... [ + ... {"id": "0", "name": "john"}, + ... {"id": "1", "name": "jane"}, + ... ] + ... ) + >>> s = nw.from_native(s_native, series_only=True) + >>> s.struct.field("name").to_list() + ['john', 'jane'] + """ + return self._narwhals_series._from_compliant_series( + self._narwhals_series._compliant_series.struct.field(name) + ) diff --git a/narwhals/typing.py b/narwhals/typing.py index d6bd8d3d88..f6ff238f25 100644 --- a/narwhals/typing.py +++ b/narwhals/typing.py @@ -225,6 +225,8 @@ def dt(self) -> Any: ... def cat(self) -> Any: ... @property def list(self) -> Any: ... + @property + def struct(self) -> Any: ... @unstable def ewm_mean( diff --git a/tests/expr_and_series/list/len_test.py b/tests/expr_and_series/list/len_test.py index 7066fc6cf3..7ce2a65e48 100644 --- a/tests/expr_and_series/list/len_test.py +++ b/tests/expr_and_series/list/len_test.py @@ -56,3 +56,15 @@ def test_pandas_preserve_index(request: pytest.FixtureRequest) -> None: result = df["a"].cast(nw.List(nw.Int32())).list.len() assert_equal_data({"a": result}, expected) assert (result.to_native().index == index).all() + + +def test_pandas_object_series() -> None: + import pandas as pd + + import narwhals as nw + + s_native = pd.Series(data=data["a"]) + s = nw.from_native(s_native, series_only=True) + + with pytest.raises(TypeError): + s.list.len() diff --git a/tests/expr_and_series/struct_/__init__.py b/tests/expr_and_series/struct_/__init__.py new file mode 100644 index 0000000000..e69de29bb2 diff --git a/tests/expr_and_series/struct_/field_test.py b/tests/expr_and_series/struct_/field_test.py new file mode 100644 index 0000000000..2481f520b7 --- /dev/null +++ b/tests/expr_and_series/struct_/field_test.py @@ -0,0 +1,97 @@ +from __future__ import annotations + +import pandas as pd +import pyarrow as pa +import pytest + +import narwhals.stable.v1 as nw +from tests.utils import PANDAS_VERSION +from tests.utils import Constructor +from tests.utils import ConstructorEager +from tests.utils import assert_equal_data + + +def test_get_field_expr( + request: pytest.FixtureRequest, + constructor: Constructor, +) -> None: + if any( + backend in str(constructor) for backend in ("dask", "modin", "cudf", "sqlframe") + ): + request.applymarker(pytest.mark.xfail) + if "pandas" in str(constructor) and PANDAS_VERSION < (2, 2, 0): + pytest.skip() + data = {"user": [{"id": "0", "name": "john"}, {"id": "1", "name": "jane"}]} + + df_native = constructor(data) + + if "pandas" in str(constructor): + df_native = df_native.assign( # type: ignore[union-attr] + user=pd.Series( + data["user"], + dtype=pd.ArrowDtype( + pa.struct([("id", pa.string()), ("name", pa.string())]) + ), + ) + ) + + df = nw.from_native(df_native) + + result = nw.from_native(df).select( + nw.col("user").struct.field("id"), + nw.col("user").struct.field("name"), + ) + expected = {"id": ["0", "1"], "name": ["john", "jane"]} + assert_equal_data(result, expected) + result = nw.from_native(df).select( + nw.col("user").struct.field("id").name.keep(), + ) + expected = {"user": ["0", "1"]} + assert_equal_data(result, expected) + + +def test_get_field_series( + request: pytest.FixtureRequest, + constructor_eager: ConstructorEager, +) -> None: + if any(backend in str(constructor_eager) for backend in ("modin", "cudf")): + request.applymarker(pytest.mark.xfail) + if "pandas" in str(constructor_eager) and PANDAS_VERSION < (2, 2, 0): + pytest.skip() + data = {"user": [{"id": "0", "name": "john"}, {"id": "1", "name": "jane"}]} + expected = {"id": ["0", "1"], "name": ["john", "jane"]} + + _expected = expected.copy() + df_native = constructor_eager(data) + + if "pandas" in str(constructor_eager): + df_native = df_native.assign( # type: ignore[union-attr] + user=pd.Series( + data["user"], + dtype=pd.ArrowDtype( + pa.struct([("id", pa.string()), ("name", pa.string())]) + ), + ) + ) + + df = nw.from_native(df_native, eager_only=True) + + result = nw.from_native(df).select( + df["user"].struct.field("id"), + df["user"].struct.field("name"), + ) + expected = {"id": ["0", "1"], "name": ["john", "jane"]} + assert_equal_data(result, _expected) + + +def test_pandas_object_series() -> None: + s_native = pd.Series( + data=[ + {"id": "0", "name": "john"}, + {"id": "1", "name": "jane"}, + ] + ) + s = nw.from_native(s_native, series_only=True) + + with pytest.raises(TypeError): + s.struct.field("name") diff --git a/utils/check_api_reference.py b/utils/check_api_reference.py index 11bdca3c94..6a00dbfe28 100644 --- a/utils/check_api_reference.py +++ b/utils/check_api_reference.py @@ -12,7 +12,7 @@ ret = 0 -NAMESPACES = {"dt", "str", "cat", "name", "list"} +NAMESPACES = {"dt", "str", "cat", "name", "list", "struct"} EXPR_ONLY_METHODS = {"over", "map_batches"} SERIES_ONLY_METHODS = { "dtype", diff --git a/utils/generate_backend_completeness.py b/utils/generate_backend_completeness.py index b269a94b05..b8f2bb58f2 100644 --- a/utils/generate_backend_completeness.py +++ b/utils/generate_backend_completeness.py @@ -42,10 +42,12 @@ class Backend(NamedTuple): "expr_str", "expr_list", "expr_name", + "expr_struct", "series_dt", "series_cat", "series_str", "series_list", + "series_struct", ] BACKENDS = [