From 1bb56158125c2e152270ebbf1fd38f3079bb49b1 Mon Sep 17 00:00:00 2001 From: dangotbanned <125183946+dangotbanned@users.noreply.github.com> Date: Thu, 27 Mar 2025 18:43:28 +0000 Subject: [PATCH 01/12] feat(typing): Add `DictConvertible` protocol --- narwhals/_translate.py | 28 ++++++++++++++++++++++++++++ 1 file changed, 28 insertions(+) diff --git a/narwhals/_translate.py b/narwhals/_translate.py index 11aa6afc40..9f4438bd8b 100644 --- a/narwhals/_translate.py +++ b/narwhals/_translate.py @@ -3,6 +3,7 @@ from typing import TYPE_CHECKING from typing import Any from typing import Iterable +from typing import Mapping from typing import Protocol if TYPE_CHECKING: @@ -70,3 +71,30 @@ class FromIterable(Protocol[FromIterableT_contra]): def from_iterable( cls, data: Iterable[FromIterableT_contra], *args: Any, **kwds: Any ) -> Self: ... + + +ToDictDT_co = TypeVar( + "ToDictDT_co", bound=Mapping[str, Any], covariant=True, default=dict[str, Any] +) +FromDictDT_contra = TypeVar( + "FromDictDT_contra", + bound=Mapping[str, Any], + contravariant=True, + default=Mapping[str, Any], +) + + +class ToDict(Protocol[ToDictDT_co]): + def to_dict(self, *args: Any, **kwds: Any) -> ToDictDT_co: ... + + +class FromDict(Protocol[FromDictDT_contra]): + @classmethod + def from_dict(cls, data: FromDictDT_contra, *args: Any, **kwds: Any) -> Self: ... + + +class DictConvertible( + ToDict[ToDictDT_co], + FromDict[FromDictDT_contra], + Protocol[ToDictDT_co, FromDictDT_contra], +): ... From be8713fd31a5cf00889e14bd117a270b966d694a Mon Sep 17 00:00:00 2001 From: dangotbanned <125183946+dangotbanned@users.noreply.github.com> Date: Thu, 27 Mar 2025 18:44:08 +0000 Subject: [PATCH 02/12] feat(typing): Add `CompliantFrame.from_dict` --- narwhals/_compliant/dataframe.py | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/narwhals/_compliant/dataframe.py b/narwhals/_compliant/dataframe.py index 76f4fdc333..6c35c7b972 100644 --- a/narwhals/_compliant/dataframe.py +++ b/narwhals/_compliant/dataframe.py @@ -18,6 +18,7 @@ from narwhals._compliant.typing import EagerSeriesT from narwhals._compliant.typing import NativeFrameT_co from narwhals._expression_parsing import evaluate_output_names_and_aliases +from narwhals._translate import DictConvertible from narwhals._translate import NumpyConvertible from narwhals.utils import Version from narwhals.utils import _StoresNative @@ -47,9 +48,12 @@ T = TypeVar("T") +_ToDict: TypeAlias = "dict[str, CompliantSeriesT] | dict[str, list[Any]]" # noqa: PYI047 + class CompliantDataFrame( NumpyConvertible["_2DArray", "_2DArray"], + DictConvertible["_ToDict[CompliantSeriesT]", Mapping[str, Any]], _StoresNative[NativeFrameT_co], Sized, Protocol[CompliantSeriesT, CompliantExprT_contra, NativeFrameT_co], @@ -62,6 +66,15 @@ class CompliantDataFrame( def __narwhals_dataframe__(self) -> Self: ... def __narwhals_namespace__(self) -> Any: ... @classmethod + def from_dict( + cls, + data: Mapping[str, Any], + /, + *, + context: _FullContext, + schema: Mapping[str, DType] | Schema | None, + ) -> Self: ... + @classmethod def from_numpy( cls, data: _2DArray, From d85199360238d64b13c2e0967aed1ff4b08acc6f Mon Sep 17 00:00:00 2001 From: dangotbanned <125183946+dangotbanned@users.noreply.github.com> Date: Thu, 27 Mar 2025 18:51:32 +0000 Subject: [PATCH 03/12] feat: Add `PolarsDataFrame.from_dict` --- narwhals/_polars/dataframe.py | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) diff --git a/narwhals/_polars/dataframe.py b/narwhals/_polars/dataframe.py index 02c148afc0..febe537999 100644 --- a/narwhals/_polars/dataframe.py +++ b/narwhals/_polars/dataframe.py @@ -94,6 +94,23 @@ def __init__( self._version = version validate_backend_version(self._implementation, self._backend_version) + @classmethod + def from_dict( + cls, + data: Mapping[str, Any], + /, + *, + context: _FullContext, + schema: Mapping[str, DType] | Schema | None, + ) -> Self: + from narwhals.schema import Schema + + pl_schema = Schema(schema).to_polars() if schema is not None else schema + native = pl.from_dict(data, pl_schema) + return cls( + native, backend_version=context._backend_version, version=context._version + ) + @classmethod def from_numpy( cls, From 69da9d4eb4de23db90605638a46e506a1d9bc232 Mon Sep 17 00:00:00 2001 From: dangotbanned <125183946+dangotbanned@users.noreply.github.com> Date: Thu, 27 Mar 2025 19:03:35 +0000 Subject: [PATCH 04/12] feat: Add `ArrowDataFrame.from_dict` --- narwhals/_arrow/dataframe.py | 21 +++++++++++++++++++++ 1 file changed, 21 insertions(+) diff --git a/narwhals/_arrow/dataframe.py b/narwhals/_arrow/dataframe.py index 22650ab25f..44d419d80b 100644 --- a/narwhals/_arrow/dataframe.py +++ b/narwhals/_arrow/dataframe.py @@ -93,6 +93,27 @@ def __init__( self._version = version validate_backend_version(self._implementation, self._backend_version) + @classmethod + def from_dict( + cls, + data: Mapping[str, Any], + /, + *, + context: _FullContext, + schema: Mapping[str, DType] | Schema | None, + ) -> Self: + from narwhals.schema import Schema + + pa_schema = Schema(schema).to_arrow() if schema is not None else schema + # NOTE: Stubs too narrow on `data` + native = pa.table(data, schema=pa_schema) # type: ignore[arg-type] + return cls( + native, + backend_version=context._backend_version, + version=context._version, + validate_column_names=True, + ) + @classmethod def from_numpy( cls, From 434a30911c1d72defc64a74601c036335a4aee4f Mon Sep 17 00:00:00 2001 From: dangotbanned <125183946+dangotbanned@users.noreply.github.com> Date: Thu, 27 Mar 2025 19:05:57 +0000 Subject: [PATCH 05/12] refactor: Use `Table.from_pydict` instead No typing issues and slightly faster route to the same call --- narwhals/_arrow/dataframe.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/narwhals/_arrow/dataframe.py b/narwhals/_arrow/dataframe.py index 44d419d80b..6674e45853 100644 --- a/narwhals/_arrow/dataframe.py +++ b/narwhals/_arrow/dataframe.py @@ -105,8 +105,7 @@ def from_dict( from narwhals.schema import Schema pa_schema = Schema(schema).to_arrow() if schema is not None else schema - # NOTE: Stubs too narrow on `data` - native = pa.table(data, schema=pa_schema) # type: ignore[arg-type] + native = pa.Table.from_pydict(data, schema=pa_schema) return cls( native, backend_version=context._backend_version, From 346c25925bc967f5129ea471d5c2c262cb23148b Mon Sep 17 00:00:00 2001 From: dangotbanned <125183946+dangotbanned@users.noreply.github.com> Date: Thu, 27 Mar 2025 19:17:11 +0000 Subject: [PATCH 06/12] feat: Add `PandasLikeDataFrame.from_dict` --- narwhals/_pandas_like/dataframe.py | 44 ++++++++++++++++++++++++++++++ 1 file changed, 44 insertions(+) diff --git a/narwhals/_pandas_like/dataframe.py b/narwhals/_pandas_like/dataframe.py index 7fe858f379..71cf0ad28b 100644 --- a/narwhals/_pandas_like/dataframe.py +++ b/narwhals/_pandas_like/dataframe.py @@ -16,6 +16,7 @@ from narwhals._compliant import EagerDataFrame from narwhals._pandas_like.series import PANDAS_TO_NUMPY_DTYPE_MISSING from narwhals._pandas_like.series import PandasLikeSeries +from narwhals._pandas_like.utils import align_and_extract_native from narwhals._pandas_like.utils import align_series_full_broadcast from narwhals._pandas_like.utils import check_column_names_are_unique from narwhals._pandas_like.utils import convert_str_slice_to_int_slice @@ -113,6 +114,49 @@ def __init__( if validate_column_names: check_column_names_are_unique(native_dataframe.columns) + @classmethod + def from_dict( + cls, + data: Mapping[str, Any], + /, + *, + context: _FullContext, + schema: Mapping[str, DType] | Schema | None, + ) -> Self: + from narwhals.schema import Schema + from narwhals.translate import from_native + + implementation = context._implementation + ns = implementation.to_native_namespace() + Series = cast("type[pd.Series[Any]]", ns.Series) # noqa: N806 + DataFrame = cast("type[pd.DataFrame]", ns.DataFrame) # noqa: N806 + aligned_data: dict[str, pd.Series[Any] | Any] = {} + left_most: PandasLikeSeries | None = None + for name, series in data.items(): + if isinstance(series, Series): + compliant = from_native(series, series_only=True)._compliant_series + if left_most is None: + left_most = cast("PandasLikeSeries", compliant) + aligned_data[name] = series + else: + aligned_data[name] = align_and_extract_native(left_most, compliant)[1] + else: + aligned_data[name] = series + + native = DataFrame.from_dict(aligned_data) + if schema: + it: Iterable[DTypeBackend] = ( + get_dtype_backend(dtype, implementation) for dtype in native.dtypes + ) + native = native.astype(Schema(schema).to_pandas(it)) + return cls( + native, + implementation=implementation, + backend_version=context._backend_version, + version=context._version, + validate_column_names=True, + ) + @classmethod def from_numpy( cls, From 682439ef7bc8386a5e890e601b4343a7b4055071 Mon Sep 17 00:00:00 2001 From: dangotbanned <125183946+dangotbanned@users.noreply.github.com> Date: Thu, 27 Mar 2025 19:18:45 +0000 Subject: [PATCH 07/12] chore: Include `Version` in private signatures --- narwhals/functions.py | 7 ++++--- narwhals/stable/v1/__init__.py | 2 +- 2 files changed, 5 insertions(+), 4 deletions(-) diff --git a/narwhals/functions.py b/narwhals/functions.py index a91d9fd8b6..b61cb9ac6b 100644 --- a/narwhals/functions.py +++ b/narwhals/functions.py @@ -367,14 +367,15 @@ def from_dict( | 1 2 4 | └──────────────────┘ """ - return _from_dict_impl(data, schema, backend=backend) + return _from_dict_impl(data, schema, backend=backend, version=Version.V1) def _from_dict_impl( data: Mapping[str, Any], - schema: Mapping[str, DType] | Schema | None = None, + schema: Mapping[str, DType] | Schema | None, *, - backend: ModuleType | Implementation | str | None = None, + backend: ModuleType | Implementation | str | None, + version: Version, ) -> DataFrame[Any]: from narwhals.series import Series diff --git a/narwhals/stable/v1/__init__.py b/narwhals/stable/v1/__init__.py index e2a1037774..d07c9c5c74 100644 --- a/narwhals/stable/v1/__init__.py +++ b/narwhals/stable/v1/__init__.py @@ -2296,7 +2296,7 @@ def from_dict( A new DataFrame. """ return _stableify( # type: ignore[no-any-return] - _from_dict_impl(data, schema, backend=backend) + _from_dict_impl(data, schema, backend=backend, version=Version.V1) ) From f14a05e7c54d27cb3bf6e3926d96df6ef1eb4a62 Mon Sep 17 00:00:00 2001 From: dangotbanned <125183946+dangotbanned@users.noreply.github.com> Date: Thu, 27 Mar 2025 19:30:38 +0000 Subject: [PATCH 08/12] refactor: Update `_from_dict_impl` --- narwhals/functions.py | 79 ++++++++++--------------------------------- 1 file changed, 18 insertions(+), 61 deletions(-) diff --git a/narwhals/functions.py b/narwhals/functions.py index b61cb9ac6b..4f6afb53a5 100644 --- a/narwhals/functions.py +++ b/narwhals/functions.py @@ -23,10 +23,10 @@ from narwhals._expression_parsing import extract_compliant from narwhals._expression_parsing import infer_kind from narwhals._expression_parsing import is_scalar_like +from narwhals.dependencies import is_narwhals_series from narwhals.dependencies import is_numpy_array from narwhals.dependencies import is_numpy_array_2d from narwhals.expr import Expr -from narwhals.schema import Schema from narwhals.series import Series from narwhals.translate import from_native from narwhals.translate import to_native @@ -51,12 +51,11 @@ from narwhals._compliant import CompliantExpr from narwhals._compliant import CompliantNamespace - from narwhals._pandas_like.series import PandasLikeSeries from narwhals.dataframe import DataFrame from narwhals.dataframe import LazyFrame from narwhals.dtypes import DType + from narwhals.schema import Schema from narwhals.series import Series - from narwhals.typing import DTypeBackend from narwhals.typing import IntoDataFrameT from narwhals.typing import IntoExpr from narwhals.typing import IntoFrameT @@ -377,74 +376,26 @@ def _from_dict_impl( backend: ModuleType | Implementation | str | None, version: Version, ) -> DataFrame[Any]: - from narwhals.series import Series - if not data: msg = "from_dict cannot be called with empty dictionary" raise ValueError(msg) if backend is None: for val in data.values(): - if isinstance(val, Series): + if is_narwhals_series(val): native_namespace = val.__native_namespace__() break else: msg = "Calling `from_dict` without `backend` is only supported if all input values are already Narwhals Series" raise TypeError(msg) data = {key: to_native(value, pass_through=True) for key, value in data.items()} - eager_backend = Implementation.from_native_namespace(native_namespace) - else: - eager_backend = Implementation.from_backend(backend) - native_namespace = eager_backend.to_native_namespace() - - supported_eager_backends = ( - Implementation.POLARS, - Implementation.PANDAS, - Implementation.PYARROW, - Implementation.MODIN, - Implementation.CUDF, - ) - if eager_backend is not None and eager_backend not in supported_eager_backends: - msg = f"Unsupported `backend` value.\nExpected one of {supported_eager_backends} or None, got: {eager_backend}." - raise ValueError(msg) - if eager_backend is Implementation.POLARS: - schema_pl = Schema(schema).to_polars() if schema else None - native_frame = native_namespace.from_dict(data, schema=schema_pl) - elif eager_backend.is_pandas_like(): - from narwhals._pandas_like.utils import align_and_extract_native - - aligned_data = {} - left_most_series = None - for key, native_series in data.items(): - if isinstance(native_series, native_namespace.Series): - compliant_series = from_native( - native_series, series_only=True - )._compliant_series - if left_most_series is None: - left_most_series = cast("PandasLikeSeries", compliant_series) - aligned_data[key] = native_series - else: - aligned_data[key] = align_and_extract_native( - left_most_series, compliant_series - )[1] - else: - aligned_data[key] = native_series - - native_frame = native_namespace.DataFrame.from_dict(aligned_data) - - if schema: - from narwhals._pandas_like.utils import get_dtype_backend - - it: Iterable[DTypeBackend] = ( - get_dtype_backend(native_type, eager_backend) - for native_type in native_frame.dtypes - ) - pd_schema = Schema(schema).to_pandas(it) - native_frame = native_frame.astype(pd_schema) - - elif eager_backend is Implementation.PYARROW: - pa_schema = Schema(schema).to_arrow() if schema is not None else schema - native_frame = native_namespace.table(data, schema=pa_schema) - else: # pragma: no cover + backend = native_namespace + implementation = Implementation.from_backend(backend) + if is_eager_allowed(implementation): + ns = _into_compliant_namespace(implementation, version) + frame = ns._dataframe.from_dict(data, schema=schema, context=ns) + return from_native(frame, eager_only=True) + elif implementation is Implementation.UNKNOWN: # pragma: no cover + native_namespace = implementation.to_native_namespace() try: # implementation is UNKNOWN, Narwhals extension using this feature should # implement `from_dict` function in the top-level namespace. @@ -452,7 +403,13 @@ def _from_dict_impl( except AttributeError as e: msg = "Unknown namespace is expected to implement `from_dict` function." raise AttributeError(msg) from e - return from_native(native_frame, eager_only=True) + return from_native(native_frame, eager_only=True) + msg = ( + f"Unsupported `backend` value.\nExpected one of " + f"{Implementation.POLARS, Implementation.PANDAS, Implementation.PYARROW, Implementation.MODIN, Implementation.CUDF} " + f"or None, got: {implementation}." + ) + raise ValueError(msg) @deprecate_native_namespace(warn_version="1.31.0", required=True) From 836038fb0a1f894f1e4f9e693c9f4624505c0e0a Mon Sep 17 00:00:00 2001 From: dangotbanned <125183946+dangotbanned@users.noreply.github.com> Date: Thu, 27 Mar 2025 19:41:23 +0000 Subject: [PATCH 09/12] refactor: Split out `_from_dict_no_backend` No preference on the name - but I find this a lot easier to read --- narwhals/functions.py | 24 +++++++++++++++--------- 1 file changed, 15 insertions(+), 9 deletions(-) diff --git a/narwhals/functions.py b/narwhals/functions.py index 4f6afb53a5..8daead2c53 100644 --- a/narwhals/functions.py +++ b/narwhals/functions.py @@ -380,15 +380,7 @@ def _from_dict_impl( msg = "from_dict cannot be called with empty dictionary" raise ValueError(msg) if backend is None: - for val in data.values(): - if is_narwhals_series(val): - native_namespace = val.__native_namespace__() - break - else: - msg = "Calling `from_dict` without `backend` is only supported if all input values are already Narwhals Series" - raise TypeError(msg) - data = {key: to_native(value, pass_through=True) for key, value in data.items()} - backend = native_namespace + data, backend = _from_dict_no_backend(data) implementation = Implementation.from_backend(backend) if is_eager_allowed(implementation): ns = _into_compliant_namespace(implementation, version) @@ -412,6 +404,20 @@ def _from_dict_impl( raise ValueError(msg) +def _from_dict_no_backend( + data: Mapping[str, Series[Any] | Any], / +) -> tuple[dict[str, Series[Any] | Any], ModuleType]: + for val in data.values(): + if is_narwhals_series(val): + native_namespace = val.__native_namespace__() + break + else: + msg = "Calling `from_dict` without `backend` is only supported if all input values are already Narwhals Series" + raise TypeError(msg) + data = {key: to_native(value, pass_through=True) for key, value in data.items()} + return data, native_namespace + + @deprecate_native_namespace(warn_version="1.31.0", required=True) def from_numpy( data: _2DArray, From d9bf5bd7449f706f6192dc82c4fb2bc69f1452d7 Mon Sep 17 00:00:00 2001 From: dangotbanned <125183946+dangotbanned@users.noreply.github.com> Date: Thu, 27 Mar 2025 19:46:27 +0000 Subject: [PATCH 10/12] fix: `3.8` compat https://github.com/narwhals-dev/narwhals/actions/runs/14115367918/job/39544135815?pr=2304 --- narwhals/_translate.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/narwhals/_translate.py b/narwhals/_translate.py index 9f4438bd8b..3c74f74aff 100644 --- a/narwhals/_translate.py +++ b/narwhals/_translate.py @@ -74,7 +74,7 @@ def from_iterable( ToDictDT_co = TypeVar( - "ToDictDT_co", bound=Mapping[str, Any], covariant=True, default=dict[str, Any] + "ToDictDT_co", bound=Mapping[str, Any], covariant=True, default="dict[str, Any]" ) FromDictDT_contra = TypeVar( "FromDictDT_contra", From 94a13f9f2508365ac5b987403250c49fb178e41b Mon Sep 17 00:00:00 2001 From: dangotbanned <125183946+dangotbanned@users.noreply.github.com> Date: Thu, 27 Mar 2025 20:47:52 +0000 Subject: [PATCH 11/12] perf: Skip `from_native` when we know `PandasLikeSeries` Resolves https://github.com/narwhals-dev/narwhals/pull/2304#discussion_r2017511875 --- narwhals/_pandas_like/dataframe.py | 16 +++++++++++----- 1 file changed, 11 insertions(+), 5 deletions(-) diff --git a/narwhals/_pandas_like/dataframe.py b/narwhals/_pandas_like/dataframe.py index 71cf0ad28b..d10ec93b93 100644 --- a/narwhals/_pandas_like/dataframe.py +++ b/narwhals/_pandas_like/dataframe.py @@ -124,9 +124,10 @@ def from_dict( schema: Mapping[str, DType] | Schema | None, ) -> Self: from narwhals.schema import Schema - from narwhals.translate import from_native implementation = context._implementation + backend_version = context._backend_version + version = context._version ns = implementation.to_native_namespace() Series = cast("type[pd.Series[Any]]", ns.Series) # noqa: N806 DataFrame = cast("type[pd.DataFrame]", ns.DataFrame) # noqa: N806 @@ -134,9 +135,14 @@ def from_dict( left_most: PandasLikeSeries | None = None for name, series in data.items(): if isinstance(series, Series): - compliant = from_native(series, series_only=True)._compliant_series + compliant = PandasLikeSeries( + series, + implementation=implementation, + backend_version=backend_version, + version=version, + ) if left_most is None: - left_most = cast("PandasLikeSeries", compliant) + left_most = compliant aligned_data[name] = series else: aligned_data[name] = align_and_extract_native(left_most, compliant)[1] @@ -152,8 +158,8 @@ def from_dict( return cls( native, implementation=implementation, - backend_version=context._backend_version, - version=context._version, + backend_version=backend_version, + version=version, validate_column_names=True, ) From 56a7222ca3d30e2eb353eba6094c49a5e6def8a2 Mon Sep 17 00:00:00 2001 From: Dan Redding <125183946+dangotbanned@users.noreply.github.com> Date: Thu, 27 Mar 2025 22:37:56 +0000 Subject: [PATCH 12/12] fix: version Co-authored-by: Francesco Bruzzesi <42817048+FBruzzesi@users.noreply.github.com> --- narwhals/functions.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/narwhals/functions.py b/narwhals/functions.py index 14ab3ddaaa..9523ea16bb 100644 --- a/narwhals/functions.py +++ b/narwhals/functions.py @@ -328,7 +328,7 @@ def from_dict( | 1 2 4 | └──────────────────┘ """ - return _from_dict_impl(data, schema, backend=backend, version=Version.V1) + return _from_dict_impl(data, schema, backend=backend, version=Version.MAIN) def _from_dict_impl(