Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
24 commits
Select commit Hold shift + click to select a range
3b49eff
WIP: Add relaxed versions for all but Dask
FBruzzesi Jan 11, 2026
336f652
WIP: Add unit tests
FBruzzesi Jan 11, 2026
7d4cb37
fixup col name and pyarrow
FBruzzesi Jan 11, 2026
0f17849
minor standardization
FBruzzesi Jan 11, 2026
cebeeda
pandas-like promote_dtype_backend
FBruzzesi Jan 11, 2026
942af32
dask and tests
FBruzzesi Jan 11, 2026
9743d24
add to_supertype coverage
FBruzzesi Jan 11, 2026
1aa2232
Merge branch 'dtypes/supertyping' into feat/supertyping-relaxed-concat
FBruzzesi Jan 11, 2026
3328f2f
skip ibis diagonal
FBruzzesi Jan 11, 2026
23d0cd4
fix(typing): Make `pyright` happier
dangotbanned Jan 11, 2026
e0ce9eb
fix(typing): Pacify `mypy` for `pandas_like`
dangotbanned Jan 11, 2026
4b484f4
wow that was a useless error message!
dangotbanned Jan 11, 2026
683c835
fix(typing): Tell `mypy` we have a wider type than the first assignment
dangotbanned Jan 11, 2026
5bffccd
perf: Avoid unnecessary `lambda`s
dangotbanned Jan 11, 2026
8fabb13
perf: Use a generator instead of intermediate `dict`
dangotbanned Jan 12, 2026
c658320
perf: Optimize, rename `promote_dtype_backends`
dangotbanned Jan 14, 2026
909f06b
ibis diagonal relaxed
FBruzzesi Jan 20, 2026
c832b72
combine_schemas -> merge_schemas
FBruzzesi Jan 20, 2026
41f8679
merge head
FBruzzesi Jan 30, 2026
4a1b946
preserve unknown for lazy backends
FBruzzesi Jan 30, 2026
cdae5c8
merge head and solve conflicts
FBruzzesi Jan 31, 2026
b205ddb
preserve original dtype if not supported by narwhals
FBruzzesi Jan 31, 2026
61205dd
require pyarrow 19
FBruzzesi Jan 31, 2026
b3576dc
add reason
FBruzzesi Jan 31, 2026
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
46 changes: 44 additions & 2 deletions narwhals/_arrow/namespace.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
from __future__ import annotations

import operator
from functools import reduce
from functools import partial, reduce
from itertools import chain
from typing import TYPE_CHECKING, Literal

Expand All @@ -18,7 +18,8 @@
combine_alias_output_names,
combine_evaluate_output_names,
)
from narwhals._utils import Implementation
from narwhals._utils import Implementation, safe_cast
from narwhals.schema import Schema, merge_schemas, to_supertype

if TYPE_CHECKING:
from collections.abc import Iterator, Sequence
Expand Down Expand Up @@ -176,6 +177,32 @@ def _concat_diagonal(self, dfs: Sequence[pa.Table], /) -> pa.Table:
return pa.concat_tables(dfs, promote_options="default")
return pa.concat_tables(dfs, promote=True) # pragma: no cover

def _concat_diagonal_relaxed(self, dfs: Sequence[pa.Table], /) -> pa.Table:
native_schemas = tuple(table.schema for table in dfs)
out_schema = reduce(
merge_schemas, (Schema.from_arrow(pa_schema) for pa_schema in native_schemas)
)
to_schemas = (
{
name: dtype
for name, dtype in out_schema.items()
if name in native_schema.names
}
for native_schema in native_schemas
)
version = self._version
to_compliant = partial(
self._dataframe,
version=version,
validate_backend_version=False,
validate_column_names=False,
)
tables = tuple(
to_compliant(tbl).select(*safe_cast(self, to_schema)).native
for tbl, to_schema in zip(dfs, to_schemas)
)
return self._concat_diagonal(tables)

def _concat_horizontal(self, dfs: Sequence[pa.Table], /) -> pa.Table:
names = list(chain.from_iterable(df.column_names for df in dfs))
arrays = tuple(chain.from_iterable(df.itercolumns() for df in dfs))
Expand All @@ -194,6 +221,21 @@ def _concat_vertical(self, dfs: Sequence[pa.Table], /) -> pa.Table:
raise TypeError(msg)
return pa.concat_tables(dfs)

def _concat_vertical_relaxed(self, dfs: Sequence[pa.Table], /) -> pa.Table:
out_schema = reduce(to_supertype, (Schema.from_arrow(tbl.schema) for tbl in dfs))
version = self._version
to_compliant = partial(
self._dataframe,
version=version,
validate_backend_version=False,
validate_column_names=False,
)
tables = (
to_compliant(tbl).select(*safe_cast(self, out_schema)).native for tbl in dfs
)

return pa.concat_tables(tables)

@property
def selectors(self) -> ArrowSelectorNamespace:
return ArrowSelectorNamespace.from_namespace(self)
Expand Down
10 changes: 10 additions & 0 deletions narwhals/_compliant/namespace.py
Original file line number Diff line number Diff line change
Expand Up @@ -273,10 +273,16 @@ def from_numpy(
return self._series.from_numpy(data, context=self)

def _concat_diagonal(self, dfs: Sequence[NativeFrameT], /) -> NativeFrameT: ...
def _concat_diagonal_relaxed(
self, dfs: Sequence[NativeFrameT], /
) -> NativeFrameT: ...
def _concat_horizontal(
self, dfs: Sequence[NativeFrameT | Any], /
) -> NativeFrameT: ...
def _concat_vertical(self, dfs: Sequence[NativeFrameT], /) -> NativeFrameT: ...
def _concat_vertical_relaxed(
self, dfs: Sequence[NativeFrameT], /
) -> NativeFrameT: ...
def concat(
self, items: Iterable[EagerDataFrameT], *, how: ConcatMethod
) -> EagerDataFrameT:
Expand All @@ -285,8 +291,12 @@ def concat(
native = self._concat_horizontal(dfs)
elif how == "vertical":
native = self._concat_vertical(dfs)
elif how == "vertical_relaxed":
native = self._concat_vertical_relaxed(dfs)
elif how == "diagonal":
native = self._concat_diagonal(dfs)
elif how == "diagonal_relaxed":
native = self._concat_diagonal_relaxed(dfs)
else: # pragma: no cover
raise NotImplementedError
return self._dataframe.from_native(native, context=self)
25 changes: 21 additions & 4 deletions narwhals/_dask/namespace.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,9 @@
combine_alias_output_names,
combine_evaluate_output_names,
)
from narwhals._pandas_like.utils import promote_dtype_backends
from narwhals._utils import Implementation, is_nested_literal, zip_strict
from narwhals.schema import Schema, merge_schemas, to_supertype

if TYPE_CHECKING:
from collections.abc import Iterable, Iterator
Expand Down Expand Up @@ -143,10 +145,7 @@ def func(df: DaskLazyFrame) -> list[dx.Series]:
def concat(
self, items: Iterable[DaskLazyFrame], *, how: ConcatMethod
) -> DaskLazyFrame:
if not items:
msg = "No items to concatenate" # pragma: no cover
raise AssertionError(msg)
dfs = [i._native_frame for i in items]
dfs = [item.native for item in items]
cols_0 = dfs[0].columns
if how == "vertical":
for i, df in enumerate(dfs[1:], start=1):
Expand All @@ -167,6 +166,24 @@ def concat(
return DaskLazyFrame(
dd.concat(dfs, axis=0, join="outer"), version=self._version
)
if how == "vertical_relaxed":
schemas = tuple(df.dtypes.to_dict() for df in dfs)
out_schema = reduce(
to_supertype, (Schema.from_pandas_like(schema) for schema in schemas)
).to_pandas(promote_dtype_backends(schemas, self._implementation))

to_concat = [df.astype(out_schema) for df in dfs]
return DaskLazyFrame(
dd.concat(to_concat, axis=0, join="inner"), version=self._version
)
if how == "diagonal_relaxed":
schemas = tuple(df.dtypes.to_dict() for df in dfs)
out_schema = reduce(
merge_schemas, (Schema.from_pandas_like(schema) for schema in schemas)
).to_pandas(promote_dtype_backends(schemas, self._implementation))

native_res = dd.concat(dfs, axis=0, join="outer").astype(out_schema)
return DaskLazyFrame(native_res, version=self._version)

raise NotImplementedError

Expand Down
59 changes: 45 additions & 14 deletions narwhals/_duckdb/namespace.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@
from typing import TYPE_CHECKING, Any

import duckdb
from duckdb import CoalesceOperator, Expression
from duckdb import CoalesceOperator, DuckDBPyRelation, Expression

from narwhals._duckdb.dataframe import DuckDBLazyFrame
from narwhals._duckdb.expr import DuckDBExpr
Expand All @@ -26,13 +26,12 @@
combine_evaluate_output_names,
)
from narwhals._sql.namespace import SQLNamespace
from narwhals._utils import Implementation
from narwhals._utils import Implementation, safe_cast
from narwhals.schema import Schema, merge_schemas, to_supertype

if TYPE_CHECKING:
from collections.abc import Iterable

from duckdb import DuckDBPyRelation # noqa: F401

from narwhals._compliant.window import WindowInputs
from narwhals._utils import Version
from narwhals.typing import ConcatMethod, IntoDType, PythonLiteral
Expand Down Expand Up @@ -82,23 +81,55 @@ def _coalesce(self, *exprs: Expression) -> Expression:
def concat(
self, items: Iterable[DuckDBLazyFrame], *, how: ConcatMethod
) -> DuckDBLazyFrame:
native_items = [item._native_frame for item in items]
items = list(items)
items = tuple(items)
first = items[0]
schema = first.schema
if how == "vertical" and not all(x.schema == schema for x in items[1:]):
msg = "inputs should all have the same schema"
raise TypeError(msg)

if how == "vertical":
schema = first.schema
if not all(x.schema == schema for x in items[1:]):
msg = "inputs should all have the same schema"
raise TypeError(msg)

res = reduce(DuckDBPyRelation.union, (item.native for item in items))
return first._with_native(res)

if how == "vertical_relaxed":
schemas: Iterable[Schema] = (Schema(df.collect_schema()) for df in items)
out_schema = reduce(to_supertype, schemas)
native_items = (
item.select(*safe_cast(self, out_schema)).native for item in items
)
res = reduce(DuckDBPyRelation.union, native_items)
return first._with_native(res)

if how == "diagonal":
res = first.native
for _item in native_items[1:]:
res, *others = (item.native for item in items)
for _item in others:
# TODO(unassigned): use relational API when available https://github.com/duckdb/duckdb/discussions/16996
res = duckdb.sql("""
from res select * union all by name from _item select *
""")
return first._with_native(res)
res = reduce(lambda x, y: x.union(y), native_items)
return first._with_native(res)

if how == "diagonal_relaxed":
schemas = [Schema(df.collect_schema()) for df in items]
out_schema = reduce(merge_schemas, schemas)
native_items = (
item.select(
*(
self.col(name)
if name in schema
else self.lit(None, dtype=dtype).alias(name)
for name, dtype in out_schema.items()
)
)
.select(*safe_cast(self, out_schema))
.native
for item, schema in zip(items, schemas)
)
res = reduce(DuckDBPyRelation.union, native_items)
return first._with_native(res)
raise NotImplementedError

def concat_str(
self, *exprs: DuckDBExpr, separator: str, ignore_nulls: bool
Expand Down
13 changes: 10 additions & 3 deletions narwhals/_ibis/namespace.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,8 @@
from narwhals._ibis.selectors import IbisSelectorNamespace
from narwhals._ibis.utils import function, lit, narwhals_to_native_dtype
from narwhals._sql.namespace import SQLNamespace
from narwhals._utils import Implementation
from narwhals._utils import Implementation, safe_cast
from narwhals.schema import Schema, to_supertype

if TYPE_CHECKING:
from collections.abc import Iterable, Sequence
Expand Down Expand Up @@ -68,8 +69,13 @@ def concat(
self, items: Iterable[IbisLazyFrame], *, how: ConcatMethod
) -> IbisLazyFrame:
frames: Sequence[IbisLazyFrame] = tuple(items)
if how == "diagonal":
if how.startswith("diagonal"):
frames = self.align_diagonal(frames)

if how.endswith("relaxed"):
schemas = (Schema(frame.collect_schema()) for frame in frames)
out_schema = reduce(to_supertype, schemas)
frames = [frame.select(*safe_cast(self, out_schema)) for frame in frames]
try:
result = ibis.union(*(lf.native for lf in frames))
except ibis.IbisError:
Expand All @@ -78,7 +84,8 @@ def concat(
msg = "inputs should all have the same schema"
raise TypeError(msg) from None
raise
return frames[0]._with_native(result)
else:
return self._lazyframe.from_native(result, context=self)

def concat_str(
self, *exprs: IbisExpr, separator: str, ignore_nulls: bool
Expand Down
38 changes: 37 additions & 1 deletion narwhals/_pandas_like/namespace.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,8 +16,15 @@
from narwhals._pandas_like.selectors import PandasSelectorNamespace
from narwhals._pandas_like.series import PandasLikeSeries
from narwhals._pandas_like.typing import NativeDataFrameT, NativeSeriesT
from narwhals._pandas_like.utils import is_non_nullable_boolean
from narwhals._pandas_like.utils import (
cast_native,
is_non_nullable_boolean,
iter_cast_native,
native_schema,
promote_dtype_backends,
)
from narwhals._utils import zip_strict
from narwhals.schema import Schema, merge_schemas, to_supertype

if TYPE_CHECKING:
from collections.abc import Iterable, Sequence
Expand Down Expand Up @@ -286,6 +293,21 @@ def _concat_diagonal(self, dfs: Sequence[NativeDataFrameT], /) -> NativeDataFram
return self._concat(dfs, axis=VERTICAL, copy=False)
return self._concat(dfs, axis=VERTICAL)

def _concat_diagonal_relaxed(
self, dfs: Sequence[NativeDataFrameT], /
) -> NativeDataFrameT:
schemas = tuple(native_schema(df) for df in dfs)
out_schema = reduce(
merge_schemas, (Schema.from_pandas_like(schema) for schema in schemas)
).to_pandas(promote_dtype_backends(schemas, self._implementation))

native_res = (
self._concat(dfs, axis=VERTICAL, copy=False)
if self._implementation.is_pandas() and self._backend_version < (3,)
else self._concat(dfs, axis=VERTICAL)
)
return cast_native(native_res, out_schema)

def _concat_horizontal(
self, dfs: Sequence[NativeDataFrameT | NativeSeriesT], /
) -> NativeDataFrameT:
Expand Down Expand Up @@ -318,6 +340,20 @@ def _concat_vertical(self, dfs: Sequence[NativeDataFrameT], /) -> NativeDataFram
return self._concat(dfs, axis=VERTICAL, copy=False)
return self._concat(dfs, axis=VERTICAL)

def _concat_vertical_relaxed(
self, dfs: Sequence[NativeDataFrameT], /
) -> NativeDataFrameT:
schemas = tuple(native_schema(df) for df in dfs)
out_schema = reduce(
to_supertype, (Schema.from_pandas_like(schema) for schema in schemas)
).to_pandas(promote_dtype_backends(schemas, self._implementation))

if self._implementation.is_pandas() and self._backend_version < (3,):
return self._concat(
iter_cast_native(dfs, out_schema), axis=VERTICAL, copy=False
)
return self._concat(iter_cast_native(dfs, out_schema), axis=VERTICAL)

def concat_str(
self, *exprs: PandasLikeExpr, separator: str, ignore_nulls: bool
) -> PandasLikeExpr:
Expand Down
Loading
Loading