diff --git a/narwhals/_arrow/dataframe.py b/narwhals/_arrow/dataframe.py index 22650ab25f..6674e45853 100644 --- a/narwhals/_arrow/dataframe.py +++ b/narwhals/_arrow/dataframe.py @@ -93,6 +93,26 @@ def __init__( self._version = version validate_backend_version(self._implementation, self._backend_version) + @classmethod + def from_dict( + cls, + data: Mapping[str, Any], + /, + *, + context: _FullContext, + schema: Mapping[str, DType] | Schema | None, + ) -> Self: + from narwhals.schema import Schema + + pa_schema = Schema(schema).to_arrow() if schema is not None else schema + native = pa.Table.from_pydict(data, schema=pa_schema) + return cls( + native, + backend_version=context._backend_version, + version=context._version, + validate_column_names=True, + ) + @classmethod def from_numpy( cls, diff --git a/narwhals/_compliant/dataframe.py b/narwhals/_compliant/dataframe.py index 76f4fdc333..6c35c7b972 100644 --- a/narwhals/_compliant/dataframe.py +++ b/narwhals/_compliant/dataframe.py @@ -18,6 +18,7 @@ from narwhals._compliant.typing import EagerSeriesT from narwhals._compliant.typing import NativeFrameT_co from narwhals._expression_parsing import evaluate_output_names_and_aliases +from narwhals._translate import DictConvertible from narwhals._translate import NumpyConvertible from narwhals.utils import Version from narwhals.utils import _StoresNative @@ -47,9 +48,12 @@ T = TypeVar("T") +_ToDict: TypeAlias = "dict[str, CompliantSeriesT] | dict[str, list[Any]]" # noqa: PYI047 + class CompliantDataFrame( NumpyConvertible["_2DArray", "_2DArray"], + DictConvertible["_ToDict[CompliantSeriesT]", Mapping[str, Any]], _StoresNative[NativeFrameT_co], Sized, Protocol[CompliantSeriesT, CompliantExprT_contra, NativeFrameT_co], @@ -62,6 +66,15 @@ class CompliantDataFrame( def __narwhals_dataframe__(self) -> Self: ... def __narwhals_namespace__(self) -> Any: ... @classmethod + def from_dict( + cls, + data: Mapping[str, Any], + /, + *, + context: _FullContext, + schema: Mapping[str, DType] | Schema | None, + ) -> Self: ... + @classmethod def from_numpy( cls, data: _2DArray, diff --git a/narwhals/_pandas_like/dataframe.py b/narwhals/_pandas_like/dataframe.py index 7fe858f379..d10ec93b93 100644 --- a/narwhals/_pandas_like/dataframe.py +++ b/narwhals/_pandas_like/dataframe.py @@ -16,6 +16,7 @@ from narwhals._compliant import EagerDataFrame from narwhals._pandas_like.series import PANDAS_TO_NUMPY_DTYPE_MISSING from narwhals._pandas_like.series import PandasLikeSeries +from narwhals._pandas_like.utils import align_and_extract_native from narwhals._pandas_like.utils import align_series_full_broadcast from narwhals._pandas_like.utils import check_column_names_are_unique from narwhals._pandas_like.utils import convert_str_slice_to_int_slice @@ -113,6 +114,55 @@ def __init__( if validate_column_names: check_column_names_are_unique(native_dataframe.columns) + @classmethod + def from_dict( + cls, + data: Mapping[str, Any], + /, + *, + context: _FullContext, + schema: Mapping[str, DType] | Schema | None, + ) -> Self: + from narwhals.schema import Schema + + implementation = context._implementation + backend_version = context._backend_version + version = context._version + ns = implementation.to_native_namespace() + Series = cast("type[pd.Series[Any]]", ns.Series) # noqa: N806 + DataFrame = cast("type[pd.DataFrame]", ns.DataFrame) # noqa: N806 + aligned_data: dict[str, pd.Series[Any] | Any] = {} + left_most: PandasLikeSeries | None = None + for name, series in data.items(): + if isinstance(series, Series): + compliant = PandasLikeSeries( + series, + implementation=implementation, + backend_version=backend_version, + version=version, + ) + if left_most is None: + left_most = compliant + aligned_data[name] = series + else: + aligned_data[name] = align_and_extract_native(left_most, compliant)[1] + else: + aligned_data[name] = series + + native = DataFrame.from_dict(aligned_data) + if schema: + it: Iterable[DTypeBackend] = ( + get_dtype_backend(dtype, implementation) for dtype in native.dtypes + ) + native = native.astype(Schema(schema).to_pandas(it)) + return cls( + native, + implementation=implementation, + backend_version=backend_version, + version=version, + validate_column_names=True, + ) + @classmethod def from_numpy( cls, diff --git a/narwhals/_polars/dataframe.py b/narwhals/_polars/dataframe.py index 02c148afc0..febe537999 100644 --- a/narwhals/_polars/dataframe.py +++ b/narwhals/_polars/dataframe.py @@ -94,6 +94,23 @@ def __init__( self._version = version validate_backend_version(self._implementation, self._backend_version) + @classmethod + def from_dict( + cls, + data: Mapping[str, Any], + /, + *, + context: _FullContext, + schema: Mapping[str, DType] | Schema | None, + ) -> Self: + from narwhals.schema import Schema + + pl_schema = Schema(schema).to_polars() if schema is not None else schema + native = pl.from_dict(data, pl_schema) + return cls( + native, backend_version=context._backend_version, version=context._version + ) + @classmethod def from_numpy( cls, diff --git a/narwhals/_translate.py b/narwhals/_translate.py index 11aa6afc40..3c74f74aff 100644 --- a/narwhals/_translate.py +++ b/narwhals/_translate.py @@ -3,6 +3,7 @@ from typing import TYPE_CHECKING from typing import Any from typing import Iterable +from typing import Mapping from typing import Protocol if TYPE_CHECKING: @@ -70,3 +71,30 @@ class FromIterable(Protocol[FromIterableT_contra]): def from_iterable( cls, data: Iterable[FromIterableT_contra], *args: Any, **kwds: Any ) -> Self: ... + + +ToDictDT_co = TypeVar( + "ToDictDT_co", bound=Mapping[str, Any], covariant=True, default="dict[str, Any]" +) +FromDictDT_contra = TypeVar( + "FromDictDT_contra", + bound=Mapping[str, Any], + contravariant=True, + default=Mapping[str, Any], +) + + +class ToDict(Protocol[ToDictDT_co]): + def to_dict(self, *args: Any, **kwds: Any) -> ToDictDT_co: ... + + +class FromDict(Protocol[FromDictDT_contra]): + @classmethod + def from_dict(cls, data: FromDictDT_contra, *args: Any, **kwds: Any) -> Self: ... + + +class DictConvertible( + ToDict[ToDictDT_co], + FromDict[FromDictDT_contra], + Protocol[ToDictDT_co, FromDictDT_contra], +): ... diff --git a/narwhals/functions.py b/narwhals/functions.py index b0e4a12d84..9523ea16bb 100644 --- a/narwhals/functions.py +++ b/narwhals/functions.py @@ -23,10 +23,10 @@ from narwhals._expression_parsing import extract_compliant from narwhals._expression_parsing import infer_kind from narwhals._expression_parsing import is_scalar_like +from narwhals.dependencies import is_narwhals_series from narwhals.dependencies import is_numpy_array from narwhals.dependencies import is_numpy_array_2d from narwhals.expr import Expr -from narwhals.schema import Schema from narwhals.series import Series from narwhals.translate import from_native from narwhals.translate import to_native @@ -50,12 +50,11 @@ from narwhals._compliant import CompliantExpr from narwhals._compliant import CompliantNamespace - from narwhals._pandas_like.series import PandasLikeSeries from narwhals.dataframe import DataFrame from narwhals.dataframe import LazyFrame from narwhals.dtypes import DType + from narwhals.schema import Schema from narwhals.series import Series - from narwhals.typing import DTypeBackend from narwhals.typing import IntoDataFrameT from narwhals.typing import IntoExpr from narwhals.typing import IntoFrameT @@ -329,83 +328,28 @@ def from_dict( | 1 2 4 | └──────────────────┘ """ - return _from_dict_impl(data, schema, backend=backend) + return _from_dict_impl(data, schema, backend=backend, version=Version.MAIN) def _from_dict_impl( data: Mapping[str, Any], - schema: Mapping[str, DType] | Schema | None = None, + schema: Mapping[str, DType] | Schema | None, *, - backend: ModuleType | Implementation | str | None = None, + backend: ModuleType | Implementation | str | None, + version: Version, ) -> DataFrame[Any]: - from narwhals.series import Series - if not data: msg = "from_dict cannot be called with empty dictionary" raise ValueError(msg) if backend is None: - for val in data.values(): - if isinstance(val, Series): - native_namespace = val.__native_namespace__() - break - else: - msg = "Calling `from_dict` without `backend` is only supported if all input values are already Narwhals Series" - raise TypeError(msg) - data = {key: to_native(value, pass_through=True) for key, value in data.items()} - eager_backend = Implementation.from_native_namespace(native_namespace) - else: - eager_backend = Implementation.from_backend(backend) - native_namespace = eager_backend.to_native_namespace() - - supported_eager_backends = ( - Implementation.POLARS, - Implementation.PANDAS, - Implementation.PYARROW, - Implementation.MODIN, - Implementation.CUDF, - ) - if eager_backend is not None and eager_backend not in supported_eager_backends: - msg = f"Unsupported `backend` value.\nExpected one of {supported_eager_backends} or None, got: {eager_backend}." - raise ValueError(msg) - if eager_backend is Implementation.POLARS: - schema_pl = Schema(schema).to_polars() if schema else None - native_frame = native_namespace.from_dict(data, schema=schema_pl) - elif eager_backend.is_pandas_like(): - from narwhals._pandas_like.utils import align_and_extract_native - - aligned_data = {} - left_most_series = None - for key, native_series in data.items(): - if isinstance(native_series, native_namespace.Series): - compliant_series = from_native( - native_series, series_only=True - )._compliant_series - if left_most_series is None: - left_most_series = cast("PandasLikeSeries", compliant_series) - aligned_data[key] = native_series - else: - aligned_data[key] = align_and_extract_native( - left_most_series, compliant_series - )[1] - else: - aligned_data[key] = native_series - - native_frame = native_namespace.DataFrame.from_dict(aligned_data) - - if schema: - from narwhals._pandas_like.utils import get_dtype_backend - - it: Iterable[DTypeBackend] = ( - get_dtype_backend(native_type, eager_backend) - for native_type in native_frame.dtypes - ) - pd_schema = Schema(schema).to_pandas(it) - native_frame = native_frame.astype(pd_schema) - - elif eager_backend is Implementation.PYARROW: - pa_schema = Schema(schema).to_arrow() if schema is not None else schema - native_frame = native_namespace.table(data, schema=pa_schema) - else: # pragma: no cover + data, backend = _from_dict_no_backend(data) + implementation = Implementation.from_backend(backend) + if is_eager_allowed(implementation): + ns = _into_compliant_namespace(implementation, version) + frame = ns._dataframe.from_dict(data, schema=schema, context=ns) + return from_native(frame, eager_only=True) + elif implementation is Implementation.UNKNOWN: # pragma: no cover + native_namespace = implementation.to_native_namespace() try: # implementation is UNKNOWN, Narwhals extension using this feature should # implement `from_dict` function in the top-level namespace. @@ -413,7 +357,27 @@ def _from_dict_impl( except AttributeError as e: msg = "Unknown namespace is expected to implement `from_dict` function." raise AttributeError(msg) from e - return from_native(native_frame, eager_only=True) + return from_native(native_frame, eager_only=True) + msg = ( + f"Unsupported `backend` value.\nExpected one of " + f"{Implementation.POLARS, Implementation.PANDAS, Implementation.PYARROW, Implementation.MODIN, Implementation.CUDF} " + f"or None, got: {implementation}." + ) + raise ValueError(msg) + + +def _from_dict_no_backend( + data: Mapping[str, Series[Any] | Any], / +) -> tuple[dict[str, Series[Any] | Any], ModuleType]: + for val in data.values(): + if is_narwhals_series(val): + native_namespace = val.__native_namespace__() + break + else: + msg = "Calling `from_dict` without `backend` is only supported if all input values are already Narwhals Series" + raise TypeError(msg) + data = {key: to_native(value, pass_through=True) for key, value in data.items()} + return data, native_namespace @deprecate_native_namespace(warn_version="1.31.0", required=True) diff --git a/narwhals/stable/v1/__init__.py b/narwhals/stable/v1/__init__.py index e2a1037774..d07c9c5c74 100644 --- a/narwhals/stable/v1/__init__.py +++ b/narwhals/stable/v1/__init__.py @@ -2296,7 +2296,7 @@ def from_dict( A new DataFrame. """ return _stableify( # type: ignore[no-any-return] - _from_dict_impl(data, schema, backend=backend) + _from_dict_impl(data, schema, backend=backend, version=Version.V1) )