Skip to content
Draft
69 changes: 36 additions & 33 deletions narwhals/_arrow/dataframe.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@
check_column_names_are_unique,
convert_str_slice_to_int_slice,
generate_temporary_column_name,
is_sequence_of,
not_implemented,
parse_columns_to_drop,
scale_bytes,
Expand Down Expand Up @@ -53,6 +54,7 @@
from narwhals._utils import Version, _LimitedContext
from narwhals.dtypes import DType
from narwhals.typing import (
IntoNullableSchema,
IntoSchema,
JoinStrategy,
SizedMultiIndexSelector,
Expand Down Expand Up @@ -119,36 +121,39 @@ def from_dict(
/,
*,
context: _LimitedContext,
schema: IntoSchema | Mapping[str, DType | None] | None,
schema: IntoSchema | IntoNullableSchema | None = None,
) -> Self:
from narwhals._utils import NullableSchema

if not schema and not data:
return cls.from_native(pa.table({}), context=context)
if not schema:
return cls.from_native(pa.table(data), context=context) # type: ignore[arg-type]
if not any(dtype is None for dtype in schema.values()):
from narwhals.schema import Schema
nullable_schema = NullableSchema(schema)

if nullable_schema.is_nullable:
if context._implementation._backend_version() < (14,):
msg = "Passing `None` dtype in `from_dict` requires PyArrow>=14"
raise NotImplementedError(msg)
res = pa.table(
{
name: pa.chunked_array( # type: ignore[misc]
[data[name] if data else []],
type=narwhals_to_native_dtype(nw_dtype, version=context._version)
if nw_dtype is not None
else None,
)
for name, nw_dtype in nullable_schema.items()
}
)
return cls.from_native(pa.table(res), context=context)

pa_schema = Schema(cast("IntoSchema", schema)).to_arrow()
if pa_schema and not data:
native = pa_schema.empty_table()
else:
native = pa.Table.from_pydict(data, schema=pa_schema)
return cls.from_native(native, context=context)
if context._implementation._backend_version() < (14,):
msg = "Passing `None` dtype in `from_dict` requires PyArrow>=14"
raise NotImplementedError(msg)
res = pa.table(
{
name: pa.chunked_array( # type: ignore[misc]
[data[name] if data else []],
type=narwhals_to_native_dtype(nw_dtype, version=context._version)
if nw_dtype is not None
else None,
)
for name, nw_dtype in schema.items()
}
)
return cls.from_native(pa.table(res), context=context)
pa_schema = nullable_schema.to_schema().to_arrow()
if pa_schema and not data:
native = pa_schema.empty_table()
else:
native = pa.Table.from_pydict(data, schema=pa_schema)
return cls.from_native(native, context=context)

@classmethod
def from_dicts(
Expand All @@ -157,17 +162,15 @@ def from_dicts(
/,
*,
context: _LimitedContext,
schema: IntoSchema | Mapping[str, DType | None] | None,
schema: IntoSchema | IntoNullableSchema | None = None,
) -> Self:
from narwhals.schema import Schema
from narwhals._utils import NullableSchema

if schema and any(dtype is None for dtype in schema.values()):
if schema and (nullable_schema := NullableSchema(schema)).is_nullable:
msg = "`from_dicts` with `schema` where any dtype is `None` is not supported for PyArrow."
raise NotImplementedError(msg)
pa_schema = (
Schema(cast("IntoSchema", schema)).to_arrow()
if schema is not None
else schema
nullable_schema.to_schema().to_arrow() if schema is not None else schema
)
if pa_schema and not data:
native = pa_schema.empty_table()
Expand Down Expand Up @@ -195,10 +198,10 @@ def from_numpy(
from narwhals.schema import Schema

arrays = [pa.array(val) for val in data.T]
if isinstance(schema, (Mapping, Schema)):
native = pa.Table.from_arrays(arrays, schema=Schema(schema).to_arrow())
else:
if is_sequence_of(schema, str) or schema is None:
native = pa.Table.from_arrays(arrays, cls._numpy_column_names(data, schema))
else:
native = pa.Table.from_arrays(arrays, schema=Schema(schema).to_arrow())
return cls.from_native(native, context=context)

def __narwhals_namespace__(self) -> ArrowNamespace:
Expand Down
5 changes: 3 additions & 2 deletions narwhals/_compliant/dataframe.py
Original file line number Diff line number Diff line change
Expand Up @@ -61,6 +61,7 @@
from narwhals.exceptions import ColumnNotFoundError
from narwhals.typing import (
AsofJoinStrategy,
IntoNullableSchema,
IntoSchema,
JoinStrategy,
MultiColSelector,
Expand Down Expand Up @@ -191,7 +192,7 @@ def from_dict(
/,
*,
context: CompliantNamespaceAny,
schema: IntoSchema | Mapping[str, DType | None] | None,
schema: IntoSchema | IntoNullableSchema | None,
) -> Self: ...
@classmethod
def from_dicts(
Expand All @@ -200,7 +201,7 @@ def from_dicts(
/,
*,
context: _LimitedContext,
schema: IntoSchema | Mapping[str, DType | None] | None,
schema: IntoSchema | IntoNullableSchema | None,
) -> Self: ...
@classmethod
def from_numpy(
Expand Down
25 changes: 16 additions & 9 deletions narwhals/_pandas_like/dataframe.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,5 @@
from __future__ import annotations

from collections.abc import Iterable, Iterator, Mapping, Sequence
from itertools import chain, product
from typing import TYPE_CHECKING, Any, Callable, Literal, cast, overload

Expand Down Expand Up @@ -28,6 +27,7 @@
check_column_names_are_unique,
exclude_column_names,
generate_temporary_column_name,
is_sequence_of,
parse_columns_to_drop,
scale_bytes,
zip_strict,
Expand All @@ -36,6 +36,7 @@
from narwhals.exceptions import InvalidOperationError, ShapeError

if TYPE_CHECKING:
from collections.abc import Iterable, Iterator, Mapping, Sequence
from io import BytesIO
from pathlib import Path
from types import ModuleType
Expand All @@ -56,6 +57,7 @@
from narwhals.typing import (
AsofJoinStrategy,
DTypeBackend,
IntoNullableSchema,
IntoSchema,
JoinStrategy,
PivotAgg,
Expand Down Expand Up @@ -148,8 +150,11 @@ def from_dict(
/,
*,
context: _LimitedContext,
schema: IntoSchema | Mapping[str, DType | None] | None,
schema: IntoSchema | IntoNullableSchema | None = None,
) -> Self:
from narwhals._utils import NullableSchema

schema = NullableSchema(schema) if schema is not None else None
implementation = context._implementation
pdx = implementation.to_native_namespace()
Series = cast("type[pd.Series[Any]]", pdx.Series)
Expand Down Expand Up @@ -196,8 +201,11 @@ def from_dicts(
/,
*,
context: _LimitedContext,
schema: IntoSchema | Mapping[str, DType | None] | None,
schema: IntoSchema | IntoNullableSchema | None = None,
) -> Self:
from narwhals._utils import NullableSchema

schema = NullableSchema(schema) if schema is not None else None
implementation = context._implementation
ns = implementation.to_native_namespace()
DataFrame = cast("type[pd.DataFrame]", ns.DataFrame)
Expand Down Expand Up @@ -250,16 +258,15 @@ def from_numpy(

implementation = context._implementation
DataFrame: Constructor = implementation.to_native_namespace().DataFrame
if isinstance(schema, (Mapping, Schema)):
if is_sequence_of(schema, str) or schema is None:
native = DataFrame(data, columns=cls._numpy_column_names(data, schema))
else:
schema = Schema(schema)
it: Iterable[DTypeBackend] = (
get_dtype_backend(native_type, implementation)
for native_type in schema.values()
)
native = DataFrame(data, columns=schema.keys()).astype(
Schema(schema).to_pandas(it)
)
else:
native = DataFrame(data, columns=cls._numpy_column_names(data, schema))
native = DataFrame(data, columns=schema.keys()).astype(schema.to_pandas(it))
return cls.from_native(native, context=context)

def __narwhals_dataframe__(self) -> Self:
Expand Down
20 changes: 13 additions & 7 deletions narwhals/_polars/dataframe.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@
is_index_selector,
is_range,
is_sequence_like,
is_sequence_of,
is_slice_index,
is_slice_none,
parse_columns_to_drop,
Expand Down Expand Up @@ -50,6 +51,7 @@
from narwhals.dataframe import DataFrame, LazyFrame
from narwhals.dtypes import DType
from narwhals.typing import (
IntoNullableSchema,
IntoSchema,
JoinStrategy,
MultiColSelector,
Expand Down Expand Up @@ -317,14 +319,16 @@ def from_dict(
/,
*,
context: _LimitedContext,
schema: IntoSchema | Mapping[str, DType | None] | None,
schema: IntoSchema | IntoNullableSchema | None = None,
) -> Self:
from narwhals._utils import NullableSchema

pl_schema = (
{
key: narwhals_to_native_dtype(dtype, context._version)
if dtype is not None
else None
for (key, dtype) in schema.items()
for (key, dtype) in NullableSchema(schema).items()
}
if schema
else None
Expand All @@ -338,14 +342,16 @@ def from_dicts(
/,
*,
context: _LimitedContext,
schema: IntoSchema | Mapping[str, DType | None] | None,
schema: IntoSchema | IntoNullableSchema | None = None,
) -> Self:
from narwhals._utils import NullableSchema

pl_schema = (
{
key: narwhals_to_native_dtype(dtype, context._version)
if dtype is not None
else None
for (key, dtype) in schema.items()
for (key, dtype) in NullableSchema(schema).items()
}
if schema
else None
Expand Down Expand Up @@ -378,9 +384,9 @@ def from_numpy(
from narwhals.schema import Schema

pl_schema = (
Schema(schema).to_polars()
if isinstance(schema, (Mapping, Schema))
else schema
schema
if is_sequence_of(schema, str) or schema is None
else Schema(schema).to_polars()
)
return cls.from_native(pl.from_numpy(data, pl_schema), context=context)

Expand Down
22 changes: 22 additions & 0 deletions narwhals/_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
import os
import re
import sys
from collections import OrderedDict
from collections.abc import Collection, Container, Iterable, Iterator, Mapping, Sequence
from datetime import timezone
from enum import Enum, auto
Expand Down Expand Up @@ -111,13 +112,17 @@
)
from narwhals.dataframe import DataFrame, LazyFrame
from narwhals.dtypes import DType
from narwhals.schema import Schema
from narwhals.series import Series
from narwhals.typing import (
CompliantDataFrame,
CompliantLazyFrame,
CompliantSeries,
DTypes,
FileSource,
IntoDType,
IntoNullableSchema,
IntoSchema,
IntoSeriesT,
MultiIndexSelector,
SingleIndexSelector,
Expand Down Expand Up @@ -2107,3 +2112,20 @@ def extend_bool(
Stolen from https://github.com/pola-rs/polars/blob/b8bfb07a4a37a8d449d6d1841e345817431142df/py-polars/polars/_utils/various.py#L580-L594
"""
return (value,) * n_match if isinstance(value, bool) else tuple(value)


class NullableSchema(OrderedDict[str, "IntoDType | None"]):
Copy link
Member Author

@FBruzzesi FBruzzesi Oct 31, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Reason for this class is mostly two folded:

  • Use as a utility to convert a Sequence[tuple[str, DType {|None}]] into a mapping. Hence make it easy to use the same API (key, dtype in obj.items()).
  • Easily set a flag to know if any value passed is None

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The name Nullable* is making me think this is related to #3176 (comment) again 🫣

I'm not opposed to having classes to make our internal API cleaner btw πŸ‘

Copy link
Member Author

@FBruzzesi FBruzzesi Nov 1, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The name Nullable* is making me think this is related to #3176 (comment) again 🫣

Not sure I see how it's related. Passing None seems more like to be a free card, "live and let live" kind of behavior.

I'm not opposed to having classes to make our internal API cleaner btw πŸ‘

Any preference in having it prefixed by _?

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Any preference in having it prefixed by _?

It is already in _utils, so no need for a prefix πŸ™‚

The current _ names are an artifact from when we had everything in utils.py

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Not sure I see how it's related. Passing None seems more like to be a free card, "live and let live" kind of behavior.

I'll try to give a more complete explanation later, but I was hinting at Nullable, Null, None being a bit overloaded.
Linking to (#3176 (comment)) was supposed to show that I made this mistake already πŸ˜‚

def __init__(self, schema: IntoSchema | IntoNullableSchema | None = None) -> None:
schema = schema or {}
super().__init__(schema)
self.is_nullable = None in self.values()

def to_schema(self) -> Schema:
"""Converts to Schema by filtering out None values."""
from narwhals.schema import Schema

if self.is_nullable: # pragma: no cover
msg = "Cannot convert nullable mapping into `Schema`"
raise AssertionError(msg)

return Schema(self.items()) # type: ignore[arg-type]
13 changes: 10 additions & 3 deletions narwhals/dataframe.py
Original file line number Diff line number Diff line change
Expand Up @@ -72,14 +72,14 @@
from narwhals._expression_parsing import ExprMetadata
from narwhals._translate import IntoArrowTable
from narwhals._typing import EagerAllowed, IntoBackend, LazyAllowed, Polars
from narwhals.dtypes import DType
from narwhals.group_by import GroupBy, LazyGroupBy
from narwhals.typing import (
AsofJoinStrategy,
IntoDataFrame,
IntoExpr,
IntoFrame,
IntoLazyFrame,
IntoNullableSchema,
IntoSchema,
JoinStrategy,
MultiColSelector as _MultiColSelector,
Expand Down Expand Up @@ -559,7 +559,7 @@ def from_arrow(
def from_dict(
cls,
data: Mapping[str, Any],
schema: IntoSchema | Mapping[str, DType | None] | None = None,
schema: IntoSchema | IntoNullableSchema | None = None,
*,
backend: IntoBackend[EagerAllowed] | None = None,
) -> DataFrame[Any]:
Expand Down Expand Up @@ -601,8 +601,15 @@ def from_dict(
| 1 2 4 |
β””β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”˜
"""
from narwhals._utils import NullableSchema

if backend is None:
data, backend = _from_dict_no_backend(data)
if (schema and data) and (
diff := set(NullableSchema(schema).keys()).symmetric_difference(data.keys())
):
msg = f"Keys in `schema` and `data` are expected to match, found unmatched keys: {diff}"
raise InvalidOperationError(msg)
implementation = Implementation.from_backend(backend)
if is_eager_allowed(implementation):
ns = cls._version.namespace.from_backend(implementation).compliant
Expand All @@ -620,7 +627,7 @@ def from_dict(
def from_dicts(
cls,
data: Sequence[Mapping[str, Any]],
schema: IntoSchema | Mapping[str, DType | None] | None = None,
schema: IntoSchema | IntoNullableSchema | None = None,
*,
backend: IntoBackend[EagerAllowed],
) -> DataFrame[Any]:
Expand Down
Loading
Loading