Skip to content
Merged
Show file tree
Hide file tree
Changes from 11 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
83 changes: 59 additions & 24 deletions narwhals/functions.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,7 @@
from narwhals.utils import Implementation
from narwhals.utils import Version
from narwhals.utils import flatten
from narwhals.utils import issue_deprecation_warning
from narwhals.utils import parse_version
from narwhals.utils import validate_laziness

Expand Down Expand Up @@ -374,6 +375,7 @@ def from_dict(
data: dict[str, Any],
schema: dict[str, DType] | Schema | None = None,
*,
backend: ModuleType | Implementation | str | None = None,
native_namespace: ModuleType | None = None,
) -> DataFrame[Any]:
"""Instantiate DataFrame from dictionary.
Expand All @@ -388,9 +390,22 @@ def from_dict(
Arguments:
data: Dictionary to create DataFrame from.
schema: The DataFrame schema as Schema or dict of {name: type}.
native_namespace: The native library to use for DataFrame creation. Only
backend: specifies which eager backend instantiate to. Only
necessary if inputs are not Narwhals Series.

`backend` can be specified in various ways:

- As `Implementation.<BACKEND>` with `BACKEND` being `PANDAS`, `PYARROW`,
`POLARS`, `MODIN` or `CUDF`.
- As a string: `"pandas"`, `"pyarrow"`, `"polars"`, `"modin"` or `"cudf"`.
- Directly as a module `pandas`, `pyarrow`, `polars`, `modin` or `cudf`.
native_namespace: The native library to use for DataFrame creation.

**Deprecated** (v1.26.0):
Please use `backend` instead. Note that `native_namespace` is still available
(and won't emit a deprecation warning) if you use `narwhals.stable.v1`,
see [perfect backwards compatibility policy](../backcompat.md/).

Returns:
A new DataFrame.

Expand All @@ -400,24 +415,20 @@ def from_dict(
>>> import pyarrow as pa
>>> import narwhals as nw
>>> from narwhals.typing import IntoFrameT
>>> data = {"a": [1, 2, 3], "b": [4, 5, 6]}

Let's create a new dataframe of the same class as the dataframe we started with, from a dict of new data:
Let's create a new dataframe and specify the backend argument.

>>> def agnostic_from_dict(df_native: IntoFrameT) -> IntoFrameT:
... new_data = {"c": [5, 2], "d": [1, 4]}
... native_namespace = nw.get_native_namespace(df_native)
... return nw.from_dict(
... new_data, native_namespace=native_namespace
... ).to_native()
>>> def agnostic_from_dict(backend: str) -> IntoFrameT:
... data = {"c": [5, 2], "d": [1, 4]}
... return nw.from_dict(data, backend=backend).to_native()

Let's see what happens when passing pandas, Polars or PyArrow input:

>>> agnostic_from_dict(pd.DataFrame(data))
>>> agnostic_from_dict(backend="pandas")
c d
0 5 1
1 2 4
>>> agnostic_from_dict(pl.DataFrame(data))
>>> agnostic_from_dict(backend="polars")
shape: (2, 2)
β”Œβ”€β”€β”€β”€β”€β”¬β”€β”€β”€β”€β”€β”
β”‚ c ┆ d β”‚
Expand All @@ -427,27 +438,38 @@ def from_dict(
β”‚ 5 ┆ 1 β”‚
β”‚ 2 ┆ 4 β”‚
β””β”€β”€β”€β”€β”€β”΄β”€β”€β”€β”€β”€β”˜
>>> agnostic_from_dict(pa.table(data))
>>> agnostic_from_dict(backend="pyarrow")
pyarrow.Table
c: int64
d: int64
----
c: [[5,2]]
d: [[1,4]]
"""
if native_namespace is not None and backend is None: # pragma: no cover
msg = (
"Please use `backend` instead. Note that `native_namespace` is still available"
"(and won't emit a deprecation warning) if you use `narwhals.stable.v1`, "
"see [perfect backwards compatibility policy](../backcompat.md/)."
)
issue_deprecation_warning(msg, _version="1.26.0")
backend = native_namespace
elif native_namespace is not None and backend is not None:
msg = "Can't pass both `native_namespace` and `backend`"
raise ValueError(msg)
return _from_dict_impl(
data,
schema,
native_namespace=native_namespace,
backend=backend,
version=Version.MAIN,
)


def _from_dict_impl(
def _from_dict_impl( # noqa: PLR0915
data: dict[str, Any],
schema: dict[str, DType] | Schema | None = None,
*,
native_namespace: ModuleType | None = None,
backend: ModuleType | Implementation | str | None = None,
version: Version,
) -> DataFrame[Any]:
from narwhals.series import Series
Expand All @@ -456,18 +478,31 @@ def _from_dict_impl(
if not data:
msg = "from_dict cannot be called with empty dictionary"
raise ValueError(msg)
if native_namespace is None:
if backend is None:
for val in data.values():
if isinstance(val, Series):
native_namespace = val.__native_namespace__()
break
else:
msg = "Calling `from_dict` without `native_namespace` is only supported if all input values are already Narwhals Series"
msg = "Calling `from_dict` without `backend` is only supported if all input values are already Narwhals Series"
raise TypeError(msg)
data = {key: to_native(value, pass_through=True) for key, value in data.items()}
implementation = Implementation.from_native_namespace(native_namespace)
eager_backend = Implementation.from_native_namespace(native_namespace)
else:
eager_backend = Implementation.from_backend(backend)
native_namespace = eager_backend.to_native_namespace()

if implementation is Implementation.POLARS:
supported_eager_backends = (
Implementation.POLARS,
Implementation.PANDAS,
Implementation.PYARROW,
Implementation.MODIN,
Implementation.CUDF,
)
if eager_backend is not None and eager_backend not in supported_eager_backends:
msg = f"Unsupported `backend` value.\nExpected one of {supported_eager_backends} or None, got: {eager_backend}."
raise ValueError(msg)
if eager_backend is Implementation.POLARS:
if schema:
from narwhals._polars.utils import (
narwhals_to_native_dtype as polars_narwhals_to_native_dtype,
Expand All @@ -481,11 +516,11 @@ def _from_dict_impl(
schema_pl = None

native_frame = native_namespace.from_dict(data, schema=schema_pl)
elif implementation in {
elif eager_backend in (
Implementation.PANDAS,
Implementation.MODIN,
Implementation.CUDF,
}:
):
aligned_data = {}
left_most_series = None
for key, native_series in data.items():
Expand Down Expand Up @@ -515,16 +550,16 @@ def _from_dict_impl(
schema = {
name: pandas_like_narwhals_to_native_dtype(
dtype=schema[name],
dtype_backend=get_dtype_backend(native_type, implementation),
implementation=implementation,
dtype_backend=get_dtype_backend(native_type, eager_backend),
implementation=eager_backend,
backend_version=backend_version,
version=version,
)
for name, native_type in native_frame.dtypes.items()
}
native_frame = native_frame.astype(schema)

elif implementation is Implementation.PYARROW:
elif eager_backend is Implementation.PYARROW:
if schema:
from narwhals._arrow.utils import (
narwhals_to_native_dtype as arrow_narwhals_to_native_dtype,
Expand Down
23 changes: 21 additions & 2 deletions narwhals/stable/v1/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -2162,6 +2162,7 @@ def from_dict(
data: dict[str, Any],
schema: dict[str, DType] | Schema | None = None,
*,
backend: ModuleType | Implementation | str | None = None,
native_namespace: ModuleType | None = None,
) -> DataFrame[Any]:
"""Instantiate DataFrame from dictionary.
Expand All @@ -2176,17 +2177,35 @@ def from_dict(
Arguments:
data: Dictionary to create DataFrame from.
schema: The DataFrame schema as Schema or dict of {name: type}.
native_namespace: The native library to use for DataFrame creation. Only
backend: specifies which eager backend instantiate to. Only
necessary if inputs are not Narwhals Series.

`backend` can be specified in various ways:

- As `Implementation.<BACKEND>` with `BACKEND` being `PANDAS`, `PYARROW`,
`POLARS`, `MODIN` or `CUDF`.
- As a string: `"pandas"`, `"pyarrow"`, `"polars"`, `"modin"` or `"cudf"`.
- Directly as a module `pandas`, `pyarrow`, `polars`, `modin` or `cudf`.
native_namespace: The native library to use for DataFrame creation.

**Deprecated** (v1.26.0):
Please use `backend` instead. Note that `native_namespace` is still available
(and won't emit a deprecation warning) if you use `narwhals.stable.v1`,
see [perfect backwards compatibility policy](../backcompat.md/).

Returns:
A new DataFrame.
"""
if native_namespace is not None and backend is None: # pragma: no cover
backend = native_namespace
elif native_namespace is not None and backend is not None:
msg = "Can't pass both `native_namespace` and `backend`"
raise ValueError(msg)
return _stableify( # type: ignore[no-any-return]
_from_dict_impl(
data,
schema,
native_namespace=native_namespace,
backend=backend,
version=Version.V1,
)
)
Expand Down
45 changes: 35 additions & 10 deletions narwhals/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -161,16 +161,41 @@ def to_native_namespace(self: Self) -> ModuleType:
Returns:
Native module.
"""
mapping = {
Implementation.PANDAS: get_pandas(),
Implementation.MODIN: get_modin(),
Implementation.CUDF: get_cudf(),
Implementation.PYARROW: get_pyarrow(),
Implementation.PYSPARK: get_pyspark_sql(),
Implementation.POLARS: get_polars(),
Implementation.DASK: get_dask_dataframe(),
}
return mapping[self] # type: ignore[no-any-return]
if self is Implementation.PANDAS:
import pandas as pd # ignore-banned-import

return pd # type: ignore[no-any-return]
if self is Implementation.MODIN:
import modin.pandas

return modin.pandas # type: ignore[no-any-return]
if self is Implementation.CUDF:
import cudf # ignore-banned-import

return cudf # type: ignore[no-any-return]
if self is Implementation.PYARROW:
import pyarrow as pa # ignore-banned-import

return pa # type: ignore[no-any-return]
if self is Implementation.PYSPARK:
import pyspark.sql

return pyspark.sql # type: ignore[no-any-return]
if self is Implementation.POLARS:
import polars as pl # ignore-banned-import

return pl
if self is Implementation.DASK:
import dask.dataframe # ignore-banned-import

return dask.dataframe # type: ignore[no-any-return]

if self is Implementation.DUCKDB:
import duckdb # ignore-banned-import

return duckdb # type: ignore[no-any-return]
msg = "Not supported Implementation" # pragma: no cover
raise AssertionError(msg)

def is_pandas(self: Self) -> bool:
"""Return whether implementation is pandas.
Expand Down
Loading
Loading