Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
20 changes: 8 additions & 12 deletions narwhals/_arrow/dataframe.py
Original file line number Diff line number Diff line change
Expand Up @@ -47,8 +47,8 @@
from narwhals._typing import _EagerAllowedImpl, _LazyAllowedImpl
from narwhals._utils import Version, _LimitedContext
from narwhals.dtypes import DType
from narwhals.schema import Schema
from narwhals.typing import (
IntoSchema,
JoinStrategy,
SizedMultiIndexSelector,
SizedMultiNameSelector,
Expand Down Expand Up @@ -114,11 +114,9 @@ def from_dict(
/,
*,
context: _LimitedContext,
schema: IntoSchema | None,
schema: Schema | None,
) -> Self:
from narwhals.schema import Schema

pa_schema = Schema(schema).to_arrow() if schema is not None else schema
pa_schema = schema.to_arrow() if schema is not None else schema
if pa_schema and not data:
native = pa_schema.empty_table()
else:
Expand All @@ -132,11 +130,9 @@ def from_dicts(
/,
*,
context: _LimitedContext,
schema: IntoSchema | None,
schema: Schema | None,
) -> Self:
from narwhals.schema import Schema

pa_schema = Schema(schema).to_arrow() if schema is not None else schema
pa_schema = schema.to_arrow() if schema is not None else schema
if pa_schema and not data:
native = pa_schema.empty_table()
else:
Expand All @@ -158,13 +154,13 @@ def from_numpy(
/,
*,
context: _LimitedContext,
schema: IntoSchema | Sequence[str] | None,
schema: Schema | Sequence[str] | None,
) -> Self:
from narwhals.schema import Schema

arrays = [pa.array(val) for val in data.T]
if isinstance(schema, (Mapping, Schema)):
native = pa.Table.from_arrays(arrays, schema=Schema(schema).to_arrow())
if isinstance(schema, Schema):
native = pa.Table.from_arrays(arrays, schema=schema.to_arrow())
else:
native = pa.Table.from_arrays(arrays, cls._numpy_column_names(data, schema))
return cls.from_native(native, context=context)
Expand Down
8 changes: 4 additions & 4 deletions narwhals/_compliant/dataframe.py
Original file line number Diff line number Diff line change
Expand Up @@ -58,9 +58,9 @@
from narwhals.dataframe import DataFrame
from narwhals.dtypes import DType
from narwhals.exceptions import ColumnNotFoundError
from narwhals.schema import Schema
from narwhals.typing import (
AsofJoinStrategy,
IntoSchema,
JoinStrategy,
MultiColSelector,
MultiIndexSelector,
Expand Down Expand Up @@ -190,7 +190,7 @@ def from_dict(
/,
*,
context: _LimitedContext,
schema: IntoSchema | None,
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think that if every one of these constructors is going to now require Schema, then we shouldn't return a dict from:

  • CompliantFrame.schema (and friends)

I understand that these changes make things cleaner inside the methods, but the classes as a whole are now less-ergonomic πŸ€”

Copy link
Member

@dangotbanned dangotbanned Oct 31, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I'm bringing this up since from the perspective of plugins, this is an API break. on second read, that was a bit strong 🫣

We've already established that is expected for now, but I'd rather we get the consistency right in one swoop πŸ™‚

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Ok that's fair! I will address the original issue and a bit more, but not too much else πŸ˜‚

schema: Schema | None,
) -> Self: ...
@classmethod
def from_dicts(
Expand All @@ -199,7 +199,7 @@ def from_dicts(
/,
*,
context: _LimitedContext,
schema: IntoSchema | None,
schema: Schema | None,
) -> Self: ...
@classmethod
def from_numpy(
Expand All @@ -208,7 +208,7 @@ def from_numpy(
/,
*,
context: _LimitedContext,
schema: IntoSchema | Sequence[str] | None,
schema: Schema | Sequence[str] | None,
) -> Self: ...
def __array__(self, dtype: Any, *, copy: bool | None) -> _2DArray: ...
def __getitem__(
Expand Down
6 changes: 3 additions & 3 deletions narwhals/_compliant/namespace.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,11 +29,11 @@

from narwhals._compliant.selectors import CompliantSelectorNamespace
from narwhals._utils import Implementation, Version
from narwhals.schema import Schema
from narwhals.typing import (
ConcatMethod,
Into1DArray,
IntoDType,
IntoSchema,
NonNestedLiteral,
_2DArray,
)
Expand Down Expand Up @@ -206,14 +206,14 @@ def from_numpy(self, data: Into1DArray, /, schema: None = ...) -> EagerSeriesT_c

@overload
def from_numpy(
self, data: _2DArray, /, schema: IntoSchema | Sequence[str] | None
self, data: _2DArray, /, schema: Schema | Sequence[str] | None
) -> EagerDataFrameT: ...

def from_numpy(
self,
data: Into1DArray | _2DArray,
/,
schema: IntoSchema | Sequence[str] | None = None,
schema: Schema | Sequence[str] | None = None,
) -> EagerDataFrameT | EagerSeriesT_co:
if is_numpy_array_2d(data):
return self._dataframe.from_numpy(data, schema=schema, context=self)
Expand Down
26 changes: 9 additions & 17 deletions narwhals/_pandas_like/dataframe.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,5 @@
from __future__ import annotations

from collections.abc import Iterable, Iterator, Mapping, Sequence
from itertools import chain, product
from typing import TYPE_CHECKING, Any, Callable, Literal, cast, overload

Expand Down Expand Up @@ -33,8 +32,10 @@
)
from narwhals.dependencies import is_pandas_like_dataframe
from narwhals.exceptions import InvalidOperationError, ShapeError
from narwhals.schema import Schema

if TYPE_CHECKING:
from collections.abc import Iterable, Iterator, Mapping, Sequence
from io import BytesIO
from pathlib import Path
from types import ModuleType
Expand All @@ -55,7 +56,6 @@
from narwhals.typing import (
AsofJoinStrategy,
DTypeBackend,
IntoSchema,
JoinStrategy,
PivotAgg,
SizedMultiIndexSelector,
Expand Down Expand Up @@ -147,10 +147,8 @@ def from_dict(
/,
*,
context: _LimitedContext,
schema: IntoSchema | None,
schema: Schema | None,
) -> Self:
from narwhals.schema import Schema

implementation = context._implementation
ns = implementation.to_native_namespace()
Series = cast("type[pd.Series[Any]]", ns.Series)
Expand All @@ -175,7 +173,7 @@ def from_dict(
backend: Iterable[DTypeBackend] | None = None
if aligned_data:
backend = iter_dtype_backends(native.dtypes, implementation)
native = native.astype(Schema(schema).to_pandas(backend))
native = native.astype(schema.to_pandas(backend))
return cls.from_native(native, context=context)

@classmethod
Expand All @@ -185,10 +183,8 @@ def from_dicts(
/,
*,
context: _LimitedContext,
schema: IntoSchema | None,
schema: Schema | None,
) -> Self:
from narwhals.schema import Schema

implementation = context._implementation
ns = implementation.to_native_namespace()
DataFrame = cast("type[pd.DataFrame]", ns.DataFrame)
Expand All @@ -200,7 +196,7 @@ def from_dicts(
backend: Iterable[DTypeBackend] | None = None
if data:
backend = iter_dtype_backends(native.dtypes, implementation)
native = native.astype(Schema(schema).to_pandas(backend))
native = native.astype(schema.to_pandas(backend))
return cls.from_native(native, context=context)

@staticmethod
Expand All @@ -223,20 +219,16 @@ def from_numpy(
/,
*,
context: _LimitedContext,
schema: IntoSchema | Sequence[str] | None,
schema: Schema | Sequence[str] | None,
) -> Self:
from narwhals.schema import Schema

implementation = context._implementation
DataFrame: Constructor = implementation.to_native_namespace().DataFrame
if isinstance(schema, (Mapping, Schema)):
if isinstance(schema, Schema):
it: Iterable[DTypeBackend] = (
get_dtype_backend(native_type, implementation)
for native_type in schema.values()
)
native = DataFrame(data, columns=schema.keys()).astype(
Schema(schema).to_pandas(it)
)
native = DataFrame(data, columns=schema.keys()).astype(schema.to_pandas(it))
else:
native = DataFrame(data, columns=cls._numpy_column_names(data, schema))
return cls.from_native(native, context=context)
Expand Down
24 changes: 7 additions & 17 deletions narwhals/_polars/dataframe.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,7 @@
)
from narwhals.dependencies import is_numpy_array_1d
from narwhals.exceptions import ColumnNotFoundError
from narwhals.schema import Schema

if TYPE_CHECKING:
from collections.abc import Iterable
Expand All @@ -49,7 +50,6 @@
from narwhals.dataframe import DataFrame, LazyFrame
from narwhals.dtypes import DType
from narwhals.typing import (
IntoSchema,
JoinStrategy,
MultiColSelector,
MultiIndexSelector,
Expand Down Expand Up @@ -316,11 +316,9 @@ def from_dict(
/,
*,
context: _LimitedContext,
schema: IntoSchema | None,
schema: Schema | None,
) -> Self:
from narwhals.schema import Schema

pl_schema = Schema(schema).to_polars() if schema is not None else schema
pl_schema = schema.to_polars() if schema is not None else schema
return cls.from_native(pl.from_dict(data, pl_schema), context=context)

@classmethod
Expand All @@ -330,11 +328,9 @@ def from_dicts(
/,
*,
context: _LimitedContext,
schema: IntoSchema | None,
schema: Schema | None,
) -> Self:
from narwhals.schema import Schema

pl_schema = Schema(schema).to_polars() if schema is not None else schema
pl_schema = schema.to_polars() if schema is not None else schema
if not data:
native = pl.DataFrame(schema=pl_schema)
elif FROM_DICTS_ACCEPTS_MAPPINGS or isinstance(data[0], dict):
Expand All @@ -358,15 +354,9 @@ def from_numpy(
/,
*,
context: _LimitedContext, # NOTE: Maybe only `Implementation`?
schema: IntoSchema | Sequence[str] | None,
schema: Schema | Sequence[str] | None,
) -> Self:
from narwhals.schema import Schema

pl_schema = (
Schema(schema).to_polars()
if isinstance(schema, (Mapping, Schema))
else schema
)
pl_schema = schema.to_polars() if isinstance(schema, Schema) else schema
return cls.from_native(pl.from_numpy(data, pl_schema), context=context)

def to_narwhals(self) -> DataFrame[pl.DataFrame]:
Expand Down
7 changes: 4 additions & 3 deletions narwhals/_polars/namespace.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,8 @@
from narwhals._polars.dataframe import Method, PolarsDataFrame, PolarsLazyFrame
from narwhals._polars.typing import FrameT
from narwhals._utils import Version, _LimitedContext
from narwhals.typing import Into1DArray, IntoDType, IntoSchema, TimeUnit, _2DArray
from narwhals.schema import Schema
from narwhals.typing import Into1DArray, IntoDType, TimeUnit, _2DArray


class PolarsNamespace:
Expand Down Expand Up @@ -97,14 +98,14 @@ def from_numpy(self, data: Into1DArray, /, schema: None = ...) -> PolarsSeries:

@overload
def from_numpy(
self, data: _2DArray, /, schema: IntoSchema | Sequence[str] | None
self, data: _2DArray, /, schema: Schema | Sequence[str] | None
) -> PolarsDataFrame: ...

def from_numpy(
self,
data: Into1DArray | _2DArray,
/,
schema: IntoSchema | Sequence[str] | None = None,
schema: Schema | Sequence[str] | None = None,
) -> PolarsDataFrame | PolarsSeries:
if is_numpy_array_2d(data):
return self._dataframe.from_numpy(data, schema=schema, context=self)
Expand Down
14 changes: 11 additions & 3 deletions narwhals/dataframe.py
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,7 @@
is_lazy_allowed,
is_list_of,
is_sequence_like,
is_sequence_of,
is_slice_none,
predicates_contains_list_of_bool,
qualified_type_name,
Expand Down Expand Up @@ -603,7 +604,8 @@ def from_dict(
implementation = Implementation.from_backend(backend)
if is_eager_allowed(implementation):
ns = cls._version.namespace.from_backend(implementation).compliant
compliant = ns._dataframe.from_dict(data, schema=schema, context=ns)
schema_ = Schema(schema) if schema is not None else None
compliant = ns._dataframe.from_dict(data, schema=schema_, context=ns)
return cls(compliant, level="full")
# NOTE: (#2786) needs resolving for extensions
msg = (
Expand Down Expand Up @@ -673,7 +675,8 @@ def from_dicts(
implementation = Implementation.from_backend(backend)
if is_eager_allowed(implementation):
ns = cls._version.namespace.from_backend(implementation).compliant
compliant = ns._dataframe.from_dicts(data, schema=schema, context=ns)
schema_ = Schema(schema) if schema is not None else None
compliant = ns._dataframe.from_dicts(data, schema=schema_, context=ns)
return cls(compliant, level="full")
# NOTE: (#2786) needs resolving for extensions
msg = (
Expand Down Expand Up @@ -745,8 +748,13 @@ def from_numpy(
raise TypeError(msg)
implementation = Implementation.from_backend(backend)
if is_eager_allowed(implementation):
schema_ = (
schema
if schema is None or is_sequence_of(schema, str)
else Schema(schema)
)
ns = cls._version.namespace.from_backend(implementation).compliant
return cls(ns.from_numpy(data, schema), level="full")
return cls(ns.from_numpy(data, schema_), level="full")
msg = (
f"{implementation} support in Narwhals is lazy-only, but `DataFrame.from_numpy` is an eager-only function.\n\n"
"Hint: you may want to use an eager backend and then call `.lazy`, e.g.:\n\n"
Expand Down
Loading
Loading