narwhals-dev · FBruzzesi · Oct 31, 2025 · Oct 31, 2025 · Oct 31, 2025 · Nov 1, 2025
diff --git a/narwhals/_arrow/dataframe.py b/narwhals/_arrow/dataframe.py
@@ -20,6 +20,7 @@
     check_column_names_are_unique,
     convert_str_slice_to_int_slice,
     generate_temporary_column_name,
+    is_sequence_of,
     not_implemented,
     parse_columns_to_drop,
     scale_bytes,
@@ -53,6 +54,7 @@
     from narwhals._utils import Version, _LimitedContext
     from narwhals.dtypes import DType
     from narwhals.typing import (
+        IntoNullableSchema,
         IntoSchema,
         JoinStrategy,
         SizedMultiIndexSelector,
@@ -119,36 +121,39 @@ def from_dict(
         /,
         *,
         context: _LimitedContext,
-        schema: IntoSchema | Mapping[str, DType | None] | None,
+        schema: IntoSchema | IntoNullableSchema | None = None,
     ) -> Self:
+        from narwhals._utils import NullableSchema
+
         if not schema and not data:
             return cls.from_native(pa.table({}), context=context)
         if not schema:
             return cls.from_native(pa.table(data), context=context)  # type: ignore[arg-type]
-        if not any(dtype is None for dtype in schema.values()):
-            from narwhals.schema import Schema
+        nullable_schema = NullableSchema(schema)
+
+        if nullable_schema.is_nullable:
+            if context._implementation._backend_version() < (14,):
+                msg = "Passing `None` dtype in `from_dict` requires PyArrow>=14"
+                raise NotImplementedError(msg)
+            res = pa.table(
+                {
+                    name: pa.chunked_array(  # type: ignore[misc]
+                        [data[name] if data else []],
+                        type=narwhals_to_native_dtype(nw_dtype, version=context._version)
+                        if nw_dtype is not None
+                        else None,
+                    )
+                    for name, nw_dtype in nullable_schema.items()
+                }
+            )
+            return cls.from_native(pa.table(res), context=context)
 
-            pa_schema = Schema(cast("IntoSchema", schema)).to_arrow()
-            if pa_schema and not data:
-                native = pa_schema.empty_table()
-            else:
-                native = pa.Table.from_pydict(data, schema=pa_schema)
-            return cls.from_native(native, context=context)
-        if context._implementation._backend_version() < (14,):
-            msg = "Passing `None` dtype in `from_dict` requires PyArrow>=14"
-            raise NotImplementedError(msg)
-        res = pa.table(
-            {
-                name: pa.chunked_array(  # type: ignore[misc]
-                    [data[name] if data else []],
-                    type=narwhals_to_native_dtype(nw_dtype, version=context._version)
-                    if nw_dtype is not None
-                    else None,
-                )
-                for name, nw_dtype in schema.items()
-            }
-        )
-        return cls.from_native(pa.table(res), context=context)
+        pa_schema = nullable_schema.to_schema().to_arrow()
+        if pa_schema and not data:
+            native = pa_schema.empty_table()
+        else:
+            native = pa.Table.from_pydict(data, schema=pa_schema)
+        return cls.from_native(native, context=context)
 
     @classmethod
     def from_dicts(
@@ -157,17 +162,15 @@ def from_dicts(
         /,
         *,
         context: _LimitedContext,
-        schema: IntoSchema | Mapping[str, DType | None] | None,
+        schema: IntoSchema | IntoNullableSchema | None = None,
     ) -> Self:
-        from narwhals.schema import Schema
+        from narwhals._utils import NullableSchema
 
-        if schema and any(dtype is None for dtype in schema.values()):
+        if schema and (nullable_schema := NullableSchema(schema)).is_nullable:
             msg = "`from_dicts` with `schema` where any dtype is `None` is not supported for PyArrow."
             raise NotImplementedError(msg)
         pa_schema = (
-            Schema(cast("IntoSchema", schema)).to_arrow()
-            if schema is not None
-            else schema
+            nullable_schema.to_schema().to_arrow() if schema is not None else schema
         )
         if pa_schema and not data:
             native = pa_schema.empty_table()
@@ -195,10 +198,10 @@ def from_numpy(
         from narwhals.schema import Schema
 
         arrays = [pa.array(val) for val in data.T]
-        if isinstance(schema, (Mapping, Schema)):
-            native = pa.Table.from_arrays(arrays, schema=Schema(schema).to_arrow())
-        else:
+        if is_sequence_of(schema, str) or schema is None:
             native = pa.Table.from_arrays(arrays, cls._numpy_column_names(data, schema))
+        else:
+            native = pa.Table.from_arrays(arrays, schema=Schema(schema).to_arrow())
         return cls.from_native(native, context=context)
 
     def __narwhals_namespace__(self) -> ArrowNamespace:

diff --git a/narwhals/_compliant/dataframe.py b/narwhals/_compliant/dataframe.py
@@ -61,6 +61,7 @@
     from narwhals.exceptions import ColumnNotFoundError
     from narwhals.typing import (
         AsofJoinStrategy,
+        IntoNullableSchema,
         IntoSchema,
         JoinStrategy,
         MultiColSelector,
@@ -191,7 +192,7 @@ def from_dict(
         /,
         *,
         context: CompliantNamespaceAny,
-        schema: IntoSchema | Mapping[str, DType | None] | None,
+        schema: IntoSchema | IntoNullableSchema | None,
     ) -> Self: ...
     @classmethod
     def from_dicts(
@@ -200,7 +201,7 @@ def from_dicts(
         /,
         *,
         context: _LimitedContext,
-        schema: IntoSchema | Mapping[str, DType | None] | None,
+        schema: IntoSchema | IntoNullableSchema | None,
     ) -> Self: ...
     @classmethod
     def from_numpy(

diff --git a/narwhals/_pandas_like/dataframe.py b/narwhals/_pandas_like/dataframe.py
@@ -1,6 +1,5 @@
 from __future__ import annotations
 
-from collections.abc import Iterable, Iterator, Mapping, Sequence
 from itertools import chain, product
 from typing import TYPE_CHECKING, Any, Callable, Literal, cast, overload
 
@@ -28,6 +27,7 @@
     check_column_names_are_unique,
     exclude_column_names,
     generate_temporary_column_name,
+    is_sequence_of,
     parse_columns_to_drop,
     scale_bytes,
     zip_strict,
@@ -36,6 +36,7 @@
 from narwhals.exceptions import InvalidOperationError, ShapeError
 
 if TYPE_CHECKING:
+    from collections.abc import Iterable, Iterator, Mapping, Sequence
     from io import BytesIO
     from pathlib import Path
     from types import ModuleType
@@ -56,6 +57,7 @@
     from narwhals.typing import (
         AsofJoinStrategy,
         DTypeBackend,
+        IntoNullableSchema,
         IntoSchema,
         JoinStrategy,
         PivotAgg,
@@ -148,8 +150,11 @@ def from_dict(
         /,
         *,
         context: _LimitedContext,
-        schema: IntoSchema | Mapping[str, DType | None] | None,
+        schema: IntoSchema | IntoNullableSchema | None = None,
     ) -> Self:
+        from narwhals._utils import NullableSchema
+
+        schema = NullableSchema(schema) if schema is not None else None
         implementation = context._implementation
         pdx = implementation.to_native_namespace()
         Series = cast("type[pd.Series[Any]]", pdx.Series)
@@ -196,8 +201,11 @@ def from_dicts(
         /,
         *,
         context: _LimitedContext,
-        schema: IntoSchema | Mapping[str, DType | None] | None,
+        schema: IntoSchema | IntoNullableSchema | None = None,
     ) -> Self:
+        from narwhals._utils import NullableSchema
+
+        schema = NullableSchema(schema) if schema is not None else None
         implementation = context._implementation
         ns = implementation.to_native_namespace()
         DataFrame = cast("type[pd.DataFrame]", ns.DataFrame)
@@ -250,16 +258,15 @@ def from_numpy(
 
         implementation = context._implementation
         DataFrame: Constructor = implementation.to_native_namespace().DataFrame
-        if isinstance(schema, (Mapping, Schema)):
+        if is_sequence_of(schema, str) or schema is None:
+            native = DataFrame(data, columns=cls._numpy_column_names(data, schema))
+        else:
+            schema = Schema(schema)
             it: Iterable[DTypeBackend] = (
                 get_dtype_backend(native_type, implementation)
                 for native_type in schema.values()
             )
-            native = DataFrame(data, columns=schema.keys()).astype(
-                Schema(schema).to_pandas(it)
-            )
-        else:
-            native = DataFrame(data, columns=cls._numpy_column_names(data, schema))
+            native = DataFrame(data, columns=schema.keys()).astype(schema.to_pandas(it))
         return cls.from_native(native, context=context)
 
     def __narwhals_dataframe__(self) -> Self:

diff --git a/narwhals/_polars/dataframe.py b/narwhals/_polars/dataframe.py
@@ -23,6 +23,7 @@
     is_index_selector,
     is_range,
     is_sequence_like,
+    is_sequence_of,
     is_slice_index,
     is_slice_none,
     parse_columns_to_drop,
@@ -50,6 +51,7 @@
     from narwhals.dataframe import DataFrame, LazyFrame
     from narwhals.dtypes import DType
     from narwhals.typing import (
+        IntoNullableSchema,
         IntoSchema,
         JoinStrategy,
         MultiColSelector,
@@ -317,14 +319,16 @@ def from_dict(
         /,
         *,
         context: _LimitedContext,
-        schema: IntoSchema | Mapping[str, DType | None] | None,
+        schema: IntoSchema | IntoNullableSchema | None = None,
     ) -> Self:
+        from narwhals._utils import NullableSchema
+
         pl_schema = (
             {
                 key: narwhals_to_native_dtype(dtype, context._version)
                 if dtype is not None
                 else None
-                for (key, dtype) in schema.items()
+                for (key, dtype) in NullableSchema(schema).items()
             }
             if schema
             else None
@@ -338,14 +342,16 @@ def from_dicts(
         /,
         *,
         context: _LimitedContext,
-        schema: IntoSchema | Mapping[str, DType | None] | None,
+        schema: IntoSchema | IntoNullableSchema | None = None,
     ) -> Self:
+        from narwhals._utils import NullableSchema
+
         pl_schema = (
             {
                 key: narwhals_to_native_dtype(dtype, context._version)
                 if dtype is not None
                 else None
-                for (key, dtype) in schema.items()
+                for (key, dtype) in NullableSchema(schema).items()
             }
             if schema
             else None
@@ -378,9 +384,9 @@ def from_numpy(
         from narwhals.schema import Schema
 
         pl_schema = (
-            Schema(schema).to_polars()
-            if isinstance(schema, (Mapping, Schema))
-            else schema
+            schema
+            if is_sequence_of(schema, str) or schema is None
+            else Schema(schema).to_polars()
         )
         return cls.from_native(pl.from_numpy(data, pl_schema), context=context)
 

diff --git a/narwhals/_utils.py b/narwhals/_utils.py
@@ -3,6 +3,7 @@
 import os
 import re
 import sys
+from collections import OrderedDict
 from collections.abc import Collection, Container, Iterable, Iterator, Mapping, Sequence
 from datetime import timezone
 from enum import Enum, auto
@@ -111,13 +112,17 @@
     )
     from narwhals.dataframe import DataFrame, LazyFrame
     from narwhals.dtypes import DType
+    from narwhals.schema import Schema
     from narwhals.series import Series
     from narwhals.typing import (
         CompliantDataFrame,
         CompliantLazyFrame,
         CompliantSeries,
         DTypes,
         FileSource,
+        IntoDType,
+        IntoNullableSchema,
+        IntoSchema,
         IntoSeriesT,
         MultiIndexSelector,
         SingleIndexSelector,
@@ -2107,3 +2112,20 @@ def extend_bool(
     Stolen from https://github.com/pola-rs/polars/blob/b8bfb07a4a37a8d449d6d1841e345817431142df/py-polars/polars/_utils/various.py#L580-L594
     """
     return (value,) * n_match if isinstance(value, bool) else tuple(value)
+
+
+class NullableSchema(OrderedDict[str, "IntoDType | None"]):
+    def __init__(self, schema: IntoSchema | IntoNullableSchema | None = None) -> None:
+        schema = schema or {}
+        super().__init__(schema)
+        self.is_nullable = None in self.values()
+
+    def to_schema(self) -> Schema:
+        """Converts to Schema by filtering out None values."""
+        from narwhals.schema import Schema
+
+        if self.is_nullable:  # pragma: no cover
+            msg = "Cannot convert nullable mapping into `Schema`"
+            raise AssertionError(msg)
+
+        return Schema(self.items())  # type: ignore[arg-type]
diff --git a/narwhals/dataframe.py b/narwhals/dataframe.py
@@ -72,14 +72,14 @@
     from narwhals._expression_parsing import ExprMetadata
     from narwhals._translate import IntoArrowTable
     from narwhals._typing import EagerAllowed, IntoBackend, LazyAllowed, Polars
-    from narwhals.dtypes import DType
     from narwhals.group_by import GroupBy, LazyGroupBy
     from narwhals.typing import (
         AsofJoinStrategy,
         IntoDataFrame,
         IntoExpr,
         IntoFrame,
         IntoLazyFrame,
+        IntoNullableSchema,
         IntoSchema,
         JoinStrategy,
         MultiColSelector as _MultiColSelector,
@@ -559,7 +559,7 @@ def from_arrow(
     def from_dict(
         cls,
         data: Mapping[str, Any],
-        schema: IntoSchema | Mapping[str, DType | None] | None = None,
+        schema: IntoSchema | IntoNullableSchema | None = None,
         *,
         backend: IntoBackend[EagerAllowed] | None = None,
     ) -> DataFrame[Any]:
@@ -601,8 +601,15 @@ def from_dict(
             |     1  2  4      |
             └──────────────────┘
         """
+        from narwhals._utils import NullableSchema
+
         if backend is None:
             data, backend = _from_dict_no_backend(data)
+        if (schema and data) and (
+            diff := set(NullableSchema(schema).keys()).symmetric_difference(data.keys())
+        ):
+            msg = f"Keys in `schema` and `data` are expected to match, found unmatched keys: {diff}"
+            raise InvalidOperationError(msg)
         implementation = Implementation.from_backend(backend)
         if is_eager_allowed(implementation):
             ns = cls._version.namespace.from_backend(implementation).compliant
@@ -620,7 +627,7 @@ def from_dict(
     def from_dicts(
         cls,
         data: Sequence[Mapping[str, Any]],
-        schema: IntoSchema | Mapping[str, DType | None] | None = None,
+        schema: IntoSchema | IntoNullableSchema | None = None,
         *,
         backend: IntoBackend[EagerAllowed],
     ) -> DataFrame[Any]: