diff --git a/narwhals/_dask/namespace.py b/narwhals/_dask/namespace.py index 389faed6e0..9b837c69c5 100644 --- a/narwhals/_dask/namespace.py +++ b/narwhals/_dask/namespace.py @@ -175,24 +175,6 @@ def concat( backend_version=self._backend_version, version=self._version, ) - if how == "horizontal": - all_column_names: list[str] = [ - column for frame in dfs for column in frame.columns - ] - if len(all_column_names) != len(set(all_column_names)): # pragma: no cover - duplicates = [ - i for i in all_column_names if all_column_names.count(i) > 1 - ] - msg = ( - f"Columns with name(s): {', '.join(duplicates)} " - "have more than one occurrence" - ) - raise AssertionError(msg) - return DaskLazyFrame( - dd.concat(dfs, axis=1, join="outer"), - backend_version=self._backend_version, - version=self._version, - ) if how == "diagonal": return DaskLazyFrame( dd.concat(dfs, axis=0, join="outer"), diff --git a/narwhals/_duckdb/namespace.py b/narwhals/_duckdb/namespace.py index b755be3723..d9a6ebac51 100644 --- a/narwhals/_duckdb/namespace.py +++ b/narwhals/_duckdb/namespace.py @@ -60,21 +60,16 @@ def _lazyframe(self) -> type[DuckDBLazyFrame]: return DuckDBLazyFrame def concat( - self, items: Iterable[DuckDBLazyFrame], *, how: ConcatMethod + self: Self, items: Iterable[DuckDBLazyFrame], *, how: ConcatMethod ) -> DuckDBLazyFrame: - if how == "horizontal": - msg = "horizontal concat not supported for duckdb. Please join instead" - raise TypeError(msg) - if how == "diagonal": - msg = "Not implemented yet" - raise NotImplementedError(msg) + native_items = [item._native_frame for item in items] items = list(items) first = items[0] schema = first.schema if how == "vertical" and not all(x.schema == schema for x in items[1:]): msg = "inputs should all have the same schema" raise TypeError(msg) - res = reduce(lambda x, y: x.union(y), (item._native_frame for item in items)) + res = reduce(lambda x, y: x.union(y), native_items) return first._with_native(res) def concat_str( diff --git a/narwhals/_spark_like/namespace.py b/narwhals/_spark_like/namespace.py index 6d97f0f670..667df59c44 100644 --- a/narwhals/_spark_like/namespace.py +++ b/narwhals/_spark_like/namespace.py @@ -192,13 +192,6 @@ def concat( self, items: Iterable[SparkLikeLazyFrame], *, how: ConcatMethod ) -> SparkLikeLazyFrame: dfs = [item._native_frame for item in items] - if how == "horizontal": - msg = ( - "Horizontal concatenation is not supported for LazyFrame backed by " - "a PySpark DataFrame." - ) - raise NotImplementedError(msg) - if how == "vertical": cols_0 = dfs[0].columns for i, df in enumerate(dfs[1:], start=1): diff --git a/narwhals/functions.py b/narwhals/functions.py index 8792844eba..a9b01a2e02 100644 --- a/narwhals/functions.py +++ b/narwhals/functions.py @@ -27,6 +27,7 @@ from narwhals.dependencies import is_numpy_array from narwhals.dependencies import is_numpy_array_2d from narwhals.dependencies import is_pyarrow_table +from narwhals.exceptions import InvalidOperationError from narwhals.expr import Expr from narwhals.series import Series from narwhals.translate import from_native @@ -79,12 +80,13 @@ def concat(items: Iterable[FrameT], *, how: ConcatMethod = "vertical") -> FrameT - vertical: Concatenate vertically. Column names must match. - horizontal: Concatenate horizontally. If lengths don't match, then - missing rows are filled with null values. + missing rows are filled with null values. This is only supported + when all inputs are (eager) DataFrames. - diagonal: Finds a union between the column schemas and fills missing column values with null. Returns: - A new DataFrame, Lazyframe resulting from the concatenation. + A new DataFrame or LazyFrame resulting from the concatenation. Raises: TypeError: The items to concatenate should either all be eager, or all lazy @@ -151,15 +153,23 @@ def concat(items: Iterable[FrameT], *, how: ConcatMethod = "vertical") -> FrameT |z: [[null,null],["x","y"]]| └──────────────────────────┘ """ - if how not in {"horizontal", "vertical", "diagonal"}: # pragma: no cover - msg = "Only vertical, horizontal and diagonal concatenations are supported." - raise NotImplementedError(msg) + from narwhals.dependencies import is_narwhals_lazyframe + if not items: - msg = "No items to concatenate" + msg = "No items to concatenate." raise ValueError(msg) items = list(items) validate_laziness(items) + if how not in {"horizontal", "vertical", "diagonal"}: # pragma: no cover + msg = "Only vertical, horizontal and diagonal concatenations are supported." + raise NotImplementedError(msg) first_item = items[0] + if is_narwhals_lazyframe(first_item) and how == "horizontal": + msg = ( + "Horizontal concatenation is not supported for LazyFrames.\n\n" + "Hint: you may want to use `join` instead." + ) + raise InvalidOperationError(msg) plx = first_item.__narwhals_namespace__() return first_item._with_compliant( plx.concat([df._compliant_frame for df in items], how=how), diff --git a/narwhals/stable/v1/__init__.py b/narwhals/stable/v1/__init__.py index 01d3654630..92245b2671 100644 --- a/narwhals/stable/v1/__init__.py +++ b/narwhals/stable/v1/__init__.py @@ -2069,12 +2069,13 @@ def concat(items: Iterable[FrameT], *, how: ConcatMethod = "vertical") -> FrameT - vertical: Concatenate vertically. Column names must match. - horizontal: Concatenate horizontally. If lengths don't match, then - missing rows are filled with null values. + missing rows are filled with null values. This is only supported + when all inputs are (eager) DataFrames. - diagonal: Finds a union between the column schemas and fills missing column values with null. Returns: - A new DataFrame, Lazyframe resulting from the concatenation. + A new DataFrame or LazyFrame resulting from the concatenation. Raises: TypeError: The items to concatenate should either all be eager, or all lazy diff --git a/tests/frame/concat_test.py b/tests/frame/concat_test.py index bbdd306fe3..1e663212d8 100644 --- a/tests/frame/concat_test.py +++ b/tests/frame/concat_test.py @@ -1,22 +1,22 @@ from __future__ import annotations +import re + import pytest import narwhals.stable.v1 as nw +from narwhals.exceptions import InvalidOperationError from tests.utils import Constructor +from tests.utils import ConstructorEager from tests.utils import assert_equal_data -def test_concat_horizontal( - constructor: Constructor, request: pytest.FixtureRequest -) -> None: - if ("pyspark" in str(constructor)) or "duckdb" in str(constructor): - request.applymarker(pytest.mark.xfail) +def test_concat_horizontal(constructor_eager: ConstructorEager) -> None: data = {"a": [1, 3, 2], "b": [4, 4, 6], "z": [7.0, 8.0, 9.0]} - df_left = nw.from_native(constructor(data)).lazy() + df_left = nw.from_native(constructor_eager(data), eager_only=True) data_right = {"c": [6, 12, -1], "d": [0, -4, 2]} - df_right = nw.from_native(constructor(data_right)).lazy() + df_right = nw.from_native(constructor_eager(data_right), eager_only=True) result = nw.concat([df_left, df_right], how="horizontal") expected = { @@ -30,6 +30,9 @@ def test_concat_horizontal( with pytest.raises(ValueError, match="No items"): nw.concat([]) + pattern = re.compile(r"horizontal.+not supported.+lazyframe", re.IGNORECASE) + with pytest.raises(InvalidOperationError, match=pattern): + nw.concat([df_left.lazy()], how="horizontal") def test_concat_vertical(constructor: Constructor) -> None: diff --git a/tests/series_only/hist_test.py b/tests/series_only/hist_test.py index f28747ccde..8d8b30adb1 100644 --- a/tests/series_only/hist_test.py +++ b/tests/series_only/hist_test.py @@ -13,6 +13,11 @@ from tests.utils import ConstructorEager from tests.utils import assert_equal_data +xfail_hist = pytest.mark.xfail( + reason="https://github.com/narwhals-dev/narwhals/issues/2348", strict=False +) + + data = { "int": [0, 1, 2, 3, 4, 5, 6], } @@ -76,6 +81,7 @@ ] +@xfail_hist @pytest.mark.parametrize("params", bins_and_expected) @pytest.mark.parametrize("include_breakpoint", [True, False]) @pytest.mark.filterwarnings( @@ -161,6 +167,7 @@ def test_hist_bin( assert_equal_data(result, expected) +@xfail_hist @pytest.mark.parametrize("params", counts_and_expected) @pytest.mark.parametrize("include_breakpoint", [True, False]) @pytest.mark.filterwarnings( @@ -232,6 +239,7 @@ def test_hist_count( ) +@xfail_hist @pytest.mark.filterwarnings( "ignore:`Series.hist` is being called from the stable API although considered an unstable feature." ) @@ -268,6 +276,7 @@ def test_hist_count_no_spread( assert_equal_data(result, expected) +@xfail_hist @pytest.mark.filterwarnings( "ignore:`Series.hist` is being called from the stable API although considered an unstable feature." ) @@ -283,6 +292,7 @@ def test_hist_bin_and_bin_count() -> None: s.hist(bins=[1, 3], bin_count=4) +@xfail_hist @pytest.mark.filterwarnings( "ignore:`Series.hist` is being called from the stable API although considered an unstable feature." ) @@ -331,6 +341,7 @@ def test_hist_small_bins( s["values"].hist(bins=[1, 3], bin_count=4) +@xfail_hist @pytest.mark.filterwarnings( "ignore:`Series.hist` is being called from the stable API although considered an unstable feature." ) @@ -365,6 +376,7 @@ def test_hist_non_monotonic(constructor_eager: ConstructorEager) -> None: st.floats(min_value=0.001, max_value=1_000, allow_nan=False), max_size=50 ), ) +@xfail_hist @pytest.mark.filterwarnings( "ignore:`Series.hist` is being called from the stable API although considered an unstable feature.", "ignore:invalid value encountered in cast:RuntimeWarning", @@ -421,6 +433,7 @@ def test_hist_bin_hypotheis( ), bin_count=st.integers(min_value=0, max_value=1_000), ) +@xfail_hist @pytest.mark.skipif( POLARS_VERSION < (1, 15), reason="hist(bin_count=...) behavior significantly changed after this version",