From fb99b52d1a74ea5c3a78aa642f8d3e85f2162df4 Mon Sep 17 00:00:00 2001 From: FBruzzesi Date: Sat, 31 Jan 2026 15:59:27 +0100 Subject: [PATCH 1/7] feat: Disallow casting temporal to numeric --- narwhals/_arrow/series.py | 8 +++-- narwhals/_dask/expr.py | 22 +++++++++--- narwhals/_duckdb/expr.py | 15 ++++++-- narwhals/_ibis/expr.py | 20 ++++++++--- narwhals/_pandas_like/series.py | 9 +++-- narwhals/_polars/series.py | 9 +++-- narwhals/_spark_like/expr.py | 15 ++++++-- narwhals/dtypes.py | 23 ++++++++++++ narwhals/expr.py | 9 +++++ narwhals/series.py | 9 +++++ tests/expr_and_series/cast_test.py | 57 ++++++++++++++++++++++++++++++ 11 files changed, 174 insertions(+), 22 deletions(-) diff --git a/narwhals/_arrow/series.py b/narwhals/_arrow/series.py index 45a6d6b3d2..590b25a6b7 100644 --- a/narwhals/_arrow/series.py +++ b/narwhals/_arrow/series.py @@ -28,12 +28,14 @@ from narwhals._typing_compat import assert_never from narwhals._utils import ( Implementation, + Version, generate_temporary_column_name, is_list_of, no_default, not_implemented, ) from narwhals.dependencies import is_numpy_array_1d +from narwhals.dtypes import _validate_cast_temporal_to_numeric from narwhals.exceptions import InvalidOperationError, ShapeError if TYPE_CHECKING: @@ -61,7 +63,7 @@ ) from narwhals._compliant.series import HistData from narwhals._typing import NoDefault - from narwhals._utils import Version, _LimitedContext + from narwhals._utils import _LimitedContext from narwhals.dtypes import DType from narwhals.typing import ( ClosedInterval, @@ -569,7 +571,9 @@ def is_nan(self) -> Self: return self._with_native(pc.is_nan(self.native), preserve_broadcast=True) def cast(self, dtype: IntoDType) -> Self: - data_type = narwhals_to_native_dtype(dtype, self._version) + if (version := self._version) != Version.V1: + _validate_cast_temporal_to_numeric(source=self.dtype, target=dtype) + data_type = narwhals_to_native_dtype(dtype, version) return self._with_native(pc.cast(self.native, data_type), preserve_broadcast=True) def null_count(self, *, _return_py_scalar: bool = True) -> int: diff --git a/narwhals/_dask/expr.py b/narwhals/_dask/expr.py index 9b70e59a3e..a25818b717 100644 --- a/narwhals/_dask/expr.py +++ b/narwhals/_dask/expr.py @@ -19,10 +19,12 @@ from narwhals._pandas_like.utils import get_dtype_backend, native_to_narwhals_dtype from narwhals._utils import ( Implementation, + Version, generate_temporary_column_name, no_default, not_implemented, ) +from narwhals.dtypes import _validate_cast_temporal_to_numeric from narwhals.exceptions import InvalidOperationError if TYPE_CHECKING: @@ -40,7 +42,7 @@ from narwhals._dask.dataframe import DaskLazyFrame from narwhals._dask.namespace import DaskNamespace from narwhals._typing import NoDefault - from narwhals._utils import Version, _LimitedContext + from narwhals._utils import _LimitedContext from narwhals.typing import ( FillNullStrategy, IntoDType, @@ -613,11 +615,21 @@ def func(df: DaskLazyFrame) -> Sequence[dx.Series]: ) def cast(self, dtype: IntoDType) -> Self: - def func(expr: dx.Series) -> dx.Series: - native_dtype = narwhals_to_native_dtype(dtype, self._version) - return expr.astype(native_dtype) + def func(df: DaskLazyFrame) -> list[dx.Series]: + if (version := self._version) != Version.V1: + schema = df.schema + for name in self._evaluate_output_names(df): + _validate_cast_temporal_to_numeric(source=schema[name], target=dtype) - return self._with_callable(func) + native_dtype = narwhals_to_native_dtype(dtype, version) + return [expr.astype(native_dtype) for expr in self._call(df)] + + return self.__class__( + func, + evaluate_output_names=self._evaluate_output_names, + alias_output_names=self._alias_output_names, + version=self._version, + ) def is_finite(self) -> Self: import dask.array as da diff --git a/narwhals/_duckdb/expr.py b/narwhals/_duckdb/expr.py index 7a0977a54c..1226a16646 100644 --- a/narwhals/_duckdb/expr.py +++ b/narwhals/_duckdb/expr.py @@ -22,6 +22,7 @@ ) from narwhals._sql.expr import SQLExpr from narwhals._utils import Implementation, Version, extend_bool, no_default +from narwhals.dtypes import _validate_cast_temporal_to_numeric if TYPE_CHECKING: from collections.abc import Sequence @@ -267,13 +268,23 @@ def _fill_constant(expr: Expression, value: Any) -> Expression: def cast(self, dtype: IntoDType) -> Self: def func(df: DuckDBLazyFrame) -> list[Expression]: + if (version := self._version) != Version.V1: + schema = df.collect_schema() + for name in self._evaluate_output_names(df): + _validate_cast_temporal_to_numeric(source=schema[name], target=dtype) + tz = DeferredTimeZone(df.native) - native_dtype = narwhals_to_native_dtype(dtype, self._version, tz) + native_dtype = narwhals_to_native_dtype(dtype, version, tz) return [expr.cast(native_dtype) for expr in self(df)] def window_f(df: DuckDBLazyFrame, inputs: DuckDBWindowInputs) -> list[Expression]: + if (version := self._version) != Version.V1: + schema = df.collect_schema() + for name in self._evaluate_output_names(df): + _validate_cast_temporal_to_numeric(source=schema[name], target=dtype) + tz = DeferredTimeZone(df.native) - native_dtype = narwhals_to_native_dtype(dtype, self._version, tz) + native_dtype = narwhals_to_native_dtype(dtype, version, tz) return [expr.cast(native_dtype) for expr in self.window_function(df, inputs)] return self.__class__( diff --git a/narwhals/_ibis/expr.py b/narwhals/_ibis/expr.py index 9de16232bb..7e45f177e3 100644 --- a/narwhals/_ibis/expr.py +++ b/narwhals/_ibis/expr.py @@ -28,6 +28,7 @@ not_implemented, zip_strict, ) +from narwhals.dtypes import _validate_cast_temporal_to_numeric if TYPE_CHECKING: from collections.abc import Iterator, Sequence @@ -269,12 +270,21 @@ def _fill_null(expr: ir.Value, value: ir.Scalar) -> ir.Value: return self._with_callable(_fill_null, value=value) def cast(self, dtype: IntoDType) -> Self: - def _func(expr: ir.Column) -> ir.Value: - native_dtype = narwhals_to_native_dtype(dtype, self._version) - # ibis `cast` overloads do not include DataType, only literals - return expr.cast(native_dtype) # type: ignore[unused-ignore] + def func(df: IbisLazyFrame) -> list[ir.Value]: + if (version := self._version) != Version.V1: + schema = df.collect_schema() + for name in self._evaluate_output_names(df): + _validate_cast_temporal_to_numeric(source=schema[name], target=dtype) + + native_dtype = narwhals_to_native_dtype(dtype, version) + return [expr.cast(native_dtype) for expr in self(df)] # type: ignore[misc] - return self._with_callable(_func) + return self.__class__( + func, + evaluate_output_names=self._evaluate_output_names, + alias_output_names=self._alias_output_names, + version=self._version, + ) def is_unique(self) -> Self: return self._with_callable( diff --git a/narwhals/_pandas_like/series.py b/narwhals/_pandas_like/series.py index 187a5c6c42..8dd7003c36 100644 --- a/narwhals/_pandas_like/series.py +++ b/narwhals/_pandas_like/series.py @@ -25,8 +25,9 @@ set_index, ) from narwhals._typing_compat import assert_never -from narwhals._utils import Implementation, is_list_of, no_default, parse_version +from narwhals._utils import Implementation, Version, is_list_of, no_default, parse_version from narwhals.dependencies import is_numpy_array_1d, is_pandas_like_series +from narwhals.dtypes import _validate_cast_temporal_to_numeric from narwhals.exceptions import InvalidOperationError if TYPE_CHECKING: @@ -44,7 +45,7 @@ from narwhals._pandas_like.namespace import PandasLikeNamespace from narwhals._pandas_like.typing import NativeSeriesT from narwhals._typing import NoDefault - from narwhals._utils import Version, _LimitedContext + from narwhals._utils import _LimitedContext from narwhals.dtypes import DType from narwhals.typing import ( ClosedInterval, @@ -308,6 +309,8 @@ def _scatter_in_place(self, indices: Self, values: Self) -> None: self.native.iloc[indices.native] = values_native def cast(self, dtype: IntoDType) -> Self: + if (version := self._version) != Version.V1: + _validate_cast_temporal_to_numeric(source=self.dtype, target=dtype) if self.dtype == dtype and self.native.dtype != "object": # Avoid dealing with pandas' type-system if we can. Note that it's only # safe to do this if we're not starting with object dtype, see tests/expr_and_series/cast_test.py::test_cast_object_pandas @@ -317,7 +320,7 @@ def cast(self, dtype: IntoDType) -> Self: dtype, dtype_backend=get_dtype_backend(self.native.dtype, self._implementation), implementation=self._implementation, - version=self._version, + version=version, ) return self._with_native(self.native.astype(pd_dtype), preserve_broadcast=True) diff --git a/narwhals/_polars/series.py b/narwhals/_polars/series.py index 2ff271fc66..dfbacead3a 100644 --- a/narwhals/_polars/series.py +++ b/narwhals/_polars/series.py @@ -20,8 +20,9 @@ narwhals_to_native_dtype, native_to_narwhals_dtype, ) -from narwhals._utils import Implementation, no_default, requires +from narwhals._utils import Implementation, Version, no_default, requires from narwhals.dependencies import is_numpy_array_1d, is_pandas_index +from narwhals.dtypes import _validate_cast_temporal_to_numeric if TYPE_CHECKING: from collections.abc import Iterable, Iterator, Mapping, Sequence @@ -35,7 +36,7 @@ from narwhals._polars.dataframe import Method, PolarsDataFrame from narwhals._polars.namespace import PolarsNamespace from narwhals._typing import NoDefault - from narwhals._utils import Version, _LimitedContext + from narwhals._utils import _LimitedContext from narwhals.dtypes import DType from narwhals.series import Series from narwhals.typing import ( @@ -288,7 +289,9 @@ def __getitem__(self, item: MultiIndexSelector[Self]) -> Any | Self: return self._from_native_object(self.native.__getitem__(item)) def cast(self, dtype: IntoDType) -> Self: - dtype_pl = narwhals_to_native_dtype(dtype, self._version) + if (version := self._version) != Version.V1: + _validate_cast_temporal_to_numeric(source=self.dtype, target=dtype) + dtype_pl = narwhals_to_native_dtype(dtype, version) return self._with_native(self.native.cast(dtype_pl)) def clip(self, lower_bound: PolarsSeries, upper_bound: PolarsSeries) -> Self: diff --git a/narwhals/_spark_like/expr.py b/narwhals/_spark_like/expr.py index 404f31a818..46de0540d8 100644 --- a/narwhals/_spark_like/expr.py +++ b/narwhals/_spark_like/expr.py @@ -23,6 +23,7 @@ not_implemented, zip_strict, ) +from narwhals.dtypes import _validate_cast_temporal_to_numeric if TYPE_CHECKING: from collections.abc import Iterator, Mapping, Sequence @@ -247,16 +248,26 @@ def __invert__(self) -> Self: def cast(self, dtype: IntoDType) -> Self: def func(df: SparkLikeLazyFrame) -> Sequence[Column]: + if (version := self._version) != Version.V1: + schema = df.collect_schema() + for name in self._evaluate_output_names(df): + _validate_cast_temporal_to_numeric(source=schema[name], target=dtype) + spark_dtype = narwhals_to_native_dtype( - dtype, self._version, self._native_dtypes, df.native.sparkSession + dtype, version, self._native_dtypes, df.native.sparkSession ) return [expr.cast(spark_dtype) for expr in self(df)] def window_f( df: SparkLikeLazyFrame, inputs: SparkWindowInputs ) -> Sequence[Column]: + if (version := self._version) != Version.V1: + schema = df.collect_schema() + for name in self._evaluate_output_names(df): + _validate_cast_temporal_to_numeric(source=schema[name], target=dtype) + spark_dtype = narwhals_to_native_dtype( - dtype, self._version, self._native_dtypes, df.native.sparkSession + dtype, version, self._native_dtypes, df.native.sparkSession ) return [expr.cast(spark_dtype) for expr in self.window_function(df, inputs)] diff --git a/narwhals/dtypes.py b/narwhals/dtypes.py index 587b6c2758..e74f8803d9 100644 --- a/narwhals/dtypes.py +++ b/narwhals/dtypes.py @@ -59,6 +59,29 @@ def _validate_into_dtype(dtype: Any) -> None: raise TypeError(msg) +def _validate_cast_temporal_to_numeric( + source: DType | type[DType], target: IntoDType +) -> None: + """Validate that we're not casting from temporal to numeric types. + + Arguments: + source: The source data type. + target: The target data type to cast to. + + Raises: + InvalidOperationError: If attempting to cast from temporal to integer. + """ + if source.is_temporal() and target.is_numeric(): + msg = ( + "Casting from temporal type to numeric is not supported.\n\n" + "Hint: Use `.dt` accessor methods instead, such as:\n" + " - `.dt.timestamp()` for Unix timestamp.\n" + " - `.dt.year()`, `.dt.month()`, `.dt.day()`, ..., for date components.\n" + " - `.dt.total_seconds()`, `.dt.total_milliseconds(), ..., for duration total time." + ) + raise InvalidOperationError(msg) + + class DTypeClass(type): """Metaclass for DType classes. diff --git a/narwhals/expr.py b/narwhals/expr.py index 4f402ba93b..176e0b48f1 100644 --- a/narwhals/expr.py +++ b/narwhals/expr.py @@ -171,6 +171,15 @@ def cast(self, dtype: IntoDType) -> Self: Arguments: dtype: Data type that the object will be cast into. + Note: + Unlike polars, we don't allow to cast from a temporal to a numeric data type. + + Use `.dt` accessor methods instead, such as: + + * `.dt.timestamp()` for Unix timestamp. + * `.dt.year()`, `.dt.month()`, `.dt.day()`, ..., for date components. + * `.dt.total_seconds()`, `.dt.total_milliseconds(), ..., for duration total time. + Examples: >>> import pandas as pd >>> import narwhals as nw diff --git a/narwhals/series.py b/narwhals/series.py index c3290c6873..9a82f489d9 100644 --- a/narwhals/series.py +++ b/narwhals/series.py @@ -608,6 +608,15 @@ def cast(self, dtype: IntoDType) -> Self: Arguments: dtype: Data type that the object will be cast into. + Note: + Unlike polars, we don't allow to cast from a temporal to a numeric data type. + + Use `.dt` accessor methods instead, such as: + + * `.dt.timestamp()` for Unix timestamp. + * `.dt.year()`, `.dt.month()`, `.dt.day()`, ..., for date components. + * `.dt.total_seconds()`, `.dt.total_milliseconds(), ..., for duration total time. + Examples: >>> import pyarrow as pa >>> import narwhals as nw diff --git a/tests/expr_and_series/cast_test.py b/tests/expr_and_series/cast_test.py index cfadaff347..32acb32381 100644 --- a/tests/expr_and_series/cast_test.py +++ b/tests/expr_and_series/cast_test.py @@ -6,6 +6,7 @@ import pytest import narwhals as nw +from narwhals.exceptions import InvalidOperationError from tests.utils import ( PANDAS_VERSION, PYARROW_VERSION, @@ -442,3 +443,59 @@ def test_cast_object_pandas() -> None: s = nw.from_native(pd.DataFrame({"a": [2, 3, None]}, dtype=object))["a"] assert s[0] == 2 assert s.cast(nw.String)[0] == "2" + + +NUMERIC_DTYPES = [ + nw.Int8, + nw.Int16, + nw.Int32, + nw.Int64, + nw.Float32, + nw.Float64, + nw.UInt32, + nw.UInt64, +] + + +@pytest.mark.parametrize( + "values", [[datetime(2000, 1, 1, 12, 0), None], [timedelta(365, 59), None]] +) +@pytest.mark.parametrize(("target_dtype"), NUMERIC_DTYPES) +def test_cast_temporal_to_numeric_raises_expr( + constructor: Constructor, + request: pytest.FixtureRequest, + values: list[datetime] | list[timedelta], + target_dtype: nw.dtypes.DType, +) -> None: + if "polars" in str(constructor): + reason = "Polars expressions wrap native expressions" + request.applymarker(pytest.mark.xfail(reason=reason)) + + if isinstance(values[0], timedelta) and "spark" in str(constructor): + reason = "interval not implemented" + request.applymarker(pytest.mark.xfail(reason=reason)) + + df = nw.from_native(constructor({"a": values})).lazy() + msg = "Casting from temporal type to numeric" + with pytest.raises(InvalidOperationError, match=msg): + df.select(nw.col("a").cast(target_dtype)).collect() + + +@pytest.mark.parametrize( + "values", + [ + [datetime(2000, 1, 1, 12, 0), datetime(2000, 1, 2, 12, 0), None], + [timedelta(2, 59), timedelta(1, 59), None], + ], +) +@pytest.mark.parametrize(("target_dtype"), NUMERIC_DTYPES) +def test_cast_temporal_to_numeric_raises_series( + constructor_eager: ConstructorEager, + values: list[datetime] | list[timedelta], + target_dtype: nw.dtypes.DType, +) -> None: + df = nw.from_native(constructor_eager({"a": values}), eager_only=True) + series = df["a"] + msg = "Casting from temporal type to numeric" + with pytest.raises(InvalidOperationError, match=msg): + series.cast(target_dtype) From a7941bde04bb4506cb8164cc8b75dc5403611bc2 Mon Sep 17 00:00:00 2001 From: FBruzzesi Date: Sat, 31 Jan 2026 18:40:58 +0100 Subject: [PATCH 2/7] simplify, collect_schema only in dtype.is_numeric() case, add suppress(Exception) --- narwhals/_dask/expr.py | 12 ++++++++---- narwhals/_duckdb/expr.py | 35 +++++++++++++++++++++------------- narwhals/_ibis/expr.py | 14 +++++++++----- narwhals/_spark_like/expr.py | 35 +++++++++++++++++----------------- tests/v1_test.py | 37 ++++++++++++++++++++++++++++++++++++ 5 files changed, 94 insertions(+), 39 deletions(-) diff --git a/narwhals/_dask/expr.py b/narwhals/_dask/expr.py index a25818b717..fa9a26e81d 100644 --- a/narwhals/_dask/expr.py +++ b/narwhals/_dask/expr.py @@ -1,6 +1,7 @@ from __future__ import annotations import warnings +from contextlib import suppress from typing import TYPE_CHECKING, Any, Callable, cast import pandas as pd @@ -616,10 +617,13 @@ def func(df: DaskLazyFrame) -> Sequence[dx.Series]: def cast(self, dtype: IntoDType) -> Self: def func(df: DaskLazyFrame) -> list[dx.Series]: - if (version := self._version) != Version.V1: - schema = df.schema - for name in self._evaluate_output_names(df): - _validate_cast_temporal_to_numeric(source=schema[name], target=dtype) + if dtype.is_numeric() and (version := self._version) != Version.V1: + with suppress(Exception): + schema = df.schema + for name in self._evaluate_output_names(df): + _validate_cast_temporal_to_numeric( + source=schema[name], target=dtype + ) native_dtype = narwhals_to_native_dtype(dtype, version) return [expr.astype(native_dtype) for expr in self._call(df)] diff --git a/narwhals/_duckdb/expr.py b/narwhals/_duckdb/expr.py index 1226a16646..92fe07585b 100644 --- a/narwhals/_duckdb/expr.py +++ b/narwhals/_duckdb/expr.py @@ -1,6 +1,7 @@ from __future__ import annotations import operator +from contextlib import suppress from typing import TYPE_CHECKING, Any, Callable, cast from duckdb import CoalesceOperator, StarExpression @@ -43,6 +44,12 @@ from narwhals._utils import _LimitedContext from narwhals.typing import FillNullStrategy, IntoDType, RollingInterpolationMethod + try: + import duckdb.sqltypes as duckdb_dtypes + except ModuleNotFoundError: + # DuckDB pre 1.3 + import duckdb.typing as duckdb_dtypes + DuckDBWindowFunction = WindowFunction[DuckDBLazyFrame, Expression] DuckDBWindowInputs = WindowInputs[Expression] @@ -267,24 +274,26 @@ def _fill_constant(expr: Expression, value: Any) -> Expression: return self._with_elementwise(_fill_constant, value=value) def cast(self, dtype: IntoDType) -> Self: - def func(df: DuckDBLazyFrame) -> list[Expression]: - if (version := self._version) != Version.V1: - schema = df.collect_schema() - for name in self._evaluate_output_names(df): - _validate_cast_temporal_to_numeric(source=schema[name], target=dtype) + def _validated_dtype( + dtype: IntoDType, df: DuckDBLazyFrame + ) -> duckdb_dtypes.DuckDBPyType: + if dtype.is_numeric() and (version := self._version) != Version.V1: + with suppress(Exception): + schema = df.collect_schema() + for name in self._evaluate_output_names(df): + _validate_cast_temporal_to_numeric( + source=schema[name], target=dtype + ) tz = DeferredTimeZone(df.native) - native_dtype = narwhals_to_native_dtype(dtype, version, tz) + return narwhals_to_native_dtype(dtype, version, tz) + + def func(df: DuckDBLazyFrame) -> list[Expression]: + native_dtype = _validated_dtype(dtype, df) return [expr.cast(native_dtype) for expr in self(df)] def window_f(df: DuckDBLazyFrame, inputs: DuckDBWindowInputs) -> list[Expression]: - if (version := self._version) != Version.V1: - schema = df.collect_schema() - for name in self._evaluate_output_names(df): - _validate_cast_temporal_to_numeric(source=schema[name], target=dtype) - - tz = DeferredTimeZone(df.native) - native_dtype = narwhals_to_native_dtype(dtype, version, tz) + native_dtype = _validated_dtype(dtype, df) return [expr.cast(native_dtype) for expr in self.window_function(df, inputs)] return self.__class__( diff --git a/narwhals/_ibis/expr.py b/narwhals/_ibis/expr.py index 7e45f177e3..a6ab5bfc30 100644 --- a/narwhals/_ibis/expr.py +++ b/narwhals/_ibis/expr.py @@ -1,6 +1,7 @@ from __future__ import annotations import operator +from contextlib import suppress from typing import TYPE_CHECKING, Any, Callable, TypeVar, cast import ibis @@ -271,13 +272,16 @@ def _fill_null(expr: ir.Value, value: ir.Scalar) -> ir.Value: def cast(self, dtype: IntoDType) -> Self: def func(df: IbisLazyFrame) -> list[ir.Value]: - if (version := self._version) != Version.V1: - schema = df.collect_schema() - for name in self._evaluate_output_names(df): - _validate_cast_temporal_to_numeric(source=schema[name], target=dtype) + if dtype.is_numeric() and (version := self._version) != Version.V1: + with suppress(Exception): + schema = df.collect_schema() + for name in self._evaluate_output_names(df): + _validate_cast_temporal_to_numeric( + source=schema[name], target=dtype + ) native_dtype = narwhals_to_native_dtype(dtype, version) - return [expr.cast(native_dtype) for expr in self(df)] # type: ignore[misc] + return [expr.cast(native_dtype) for expr in self(df)] # pyright: ignore[reportArgumentType, reportCallIssue] return self.__class__( func, diff --git a/narwhals/_spark_like/expr.py b/narwhals/_spark_like/expr.py index 46de0540d8..a5fe94cc5a 100644 --- a/narwhals/_spark_like/expr.py +++ b/narwhals/_spark_like/expr.py @@ -1,6 +1,7 @@ from __future__ import annotations import operator +from contextlib import suppress from typing import TYPE_CHECKING, Any, Callable, ClassVar, Literal, cast from narwhals._spark_like.expr_dt import SparkLikeExprDateTimeNamespace @@ -41,6 +42,7 @@ ) from narwhals._spark_like.dataframe import SparkLikeLazyFrame from narwhals._spark_like.namespace import SparkLikeNamespace + from narwhals._spark_like.utils import _NativeDType from narwhals._typing import NoDefault from narwhals._utils import _LimitedContext from narwhals.typing import FillNullStrategy, IntoDType, RankMethod @@ -247,29 +249,28 @@ def __invert__(self) -> Self: return self._with_elementwise(invert) def cast(self, dtype: IntoDType) -> Self: - def func(df: SparkLikeLazyFrame) -> Sequence[Column]: - if (version := self._version) != Version.V1: - schema = df.collect_schema() - for name in self._evaluate_output_names(df): - _validate_cast_temporal_to_numeric(source=schema[name], target=dtype) - - spark_dtype = narwhals_to_native_dtype( + def _validated_dtype(dtype: IntoDType, df: SparkLikeLazyFrame) -> _NativeDType: + if dtype.is_numeric() and (version := self._version) != Version.V1: + with suppress(Exception): + schema = df.collect_schema() + for name in self._evaluate_output_names(df): + _validate_cast_temporal_to_numeric( + source=schema[name], target=dtype + ) + + return narwhals_to_native_dtype( dtype, version, self._native_dtypes, df.native.sparkSession ) - return [expr.cast(spark_dtype) for expr in self(df)] + + def func(df: SparkLikeLazyFrame) -> Sequence[Column]: + native_dtype = _validated_dtype(dtype, df) + return [expr.cast(native_dtype) for expr in self(df)] def window_f( df: SparkLikeLazyFrame, inputs: SparkWindowInputs ) -> Sequence[Column]: - if (version := self._version) != Version.V1: - schema = df.collect_schema() - for name in self._evaluate_output_names(df): - _validate_cast_temporal_to_numeric(source=schema[name], target=dtype) - - spark_dtype = narwhals_to_native_dtype( - dtype, version, self._native_dtypes, df.native.sparkSession - ) - return [expr.cast(spark_dtype) for expr in self.window_function(df, inputs)] + native_dtype = _validated_dtype(dtype, df) + return [expr.cast(native_dtype) for expr in self.window_function(df, inputs)] return self.__class__( func, diff --git a/tests/v1_test.py b/tests/v1_test.py index 54d501392b..80dec3315a 100644 --- a/tests/v1_test.py +++ b/tests/v1_test.py @@ -1195,3 +1195,40 @@ def test_any_value_series(constructor_eager: ConstructorEager) -> None: with pytest.warns(NarwhalsUnstableWarning): df["a"].any_value() + + +# !NOTE: Int64 seems to be the only type for which every backend can convert +NUMERIC_DTYPES = [nw_v1.Int64] + + +@pytest.mark.parametrize( + "values", [[datetime(2000, 1, 1, 12, 0), None], [timedelta(365, 59), None]] +) +@pytest.mark.parametrize(("target_dtype"), NUMERIC_DTYPES) +def test_cast_temporal_to_numeric_raises_expr( + constructor: Constructor, + values: list[datetime] | list[timedelta], + target_dtype: nw_v1.dtypes.DType, +) -> None: + df = nw_v1.from_native(constructor({"a": values})).lazy() + schema = df.select(nw_v1.col("a").cast(target_dtype)).collect_schema() + assert schema["a"] == target_dtype + + +@pytest.mark.parametrize( + "values", + [ + [datetime(2000, 1, 1, 12, 0), datetime(2000, 1, 2, 12, 0), None], + [timedelta(2, 59), timedelta(1, 59), None], + ], +) +@pytest.mark.parametrize(("target_dtype"), NUMERIC_DTYPES) +def test_cast_temporal_to_numeric_raises_series( + constructor_eager: ConstructorEager, + values: list[datetime] | list[timedelta], + target_dtype: nw_v1.dtypes.DType, +) -> None: + df = nw_v1.from_native(constructor_eager({"a": values}), eager_only=True) + series = df["a"] + dtype = series.cast(target_dtype).dtype + assert dtype == target_dtype From 8fbbbda32c9b2d8bbfb32ca4b5f19053f43a767e Mon Sep 17 00:00:00 2001 From: FBruzzesi Date: Sat, 31 Jan 2026 18:53:43 +0100 Subject: [PATCH 3/7] facepalm --- narwhals/_dask/expr.py | 12 ++++-------- narwhals/_duckdb/expr.py | 12 ++++-------- narwhals/_ibis/expr.py | 12 ++++-------- narwhals/_spark_like/expr.py | 7 +++++-- 4 files changed, 17 insertions(+), 26 deletions(-) diff --git a/narwhals/_dask/expr.py b/narwhals/_dask/expr.py index fa9a26e81d..adb167c3ab 100644 --- a/narwhals/_dask/expr.py +++ b/narwhals/_dask/expr.py @@ -1,7 +1,6 @@ from __future__ import annotations import warnings -from contextlib import suppress from typing import TYPE_CHECKING, Any, Callable, cast import pandas as pd @@ -617,13 +616,10 @@ def func(df: DaskLazyFrame) -> Sequence[dx.Series]: def cast(self, dtype: IntoDType) -> Self: def func(df: DaskLazyFrame) -> list[dx.Series]: - if dtype.is_numeric() and (version := self._version) != Version.V1: - with suppress(Exception): - schema = df.schema - for name in self._evaluate_output_names(df): - _validate_cast_temporal_to_numeric( - source=schema[name], target=dtype - ) + if (version := self._version) != Version.V1 and dtype.is_numeric(): + schema = df.schema + for name in self._evaluate_output_names(df): + _validate_cast_temporal_to_numeric(source=schema[name], target=dtype) native_dtype = narwhals_to_native_dtype(dtype, version) return [expr.astype(native_dtype) for expr in self._call(df)] diff --git a/narwhals/_duckdb/expr.py b/narwhals/_duckdb/expr.py index 92fe07585b..d7c047b0b4 100644 --- a/narwhals/_duckdb/expr.py +++ b/narwhals/_duckdb/expr.py @@ -1,7 +1,6 @@ from __future__ import annotations import operator -from contextlib import suppress from typing import TYPE_CHECKING, Any, Callable, cast from duckdb import CoalesceOperator, StarExpression @@ -277,13 +276,10 @@ def cast(self, dtype: IntoDType) -> Self: def _validated_dtype( dtype: IntoDType, df: DuckDBLazyFrame ) -> duckdb_dtypes.DuckDBPyType: - if dtype.is_numeric() and (version := self._version) != Version.V1: - with suppress(Exception): - schema = df.collect_schema() - for name in self._evaluate_output_names(df): - _validate_cast_temporal_to_numeric( - source=schema[name], target=dtype - ) + if (version := self._version) != Version.V1 and dtype.is_numeric(): + schema = df.collect_schema() + for name in self._evaluate_output_names(df): + _validate_cast_temporal_to_numeric(source=schema[name], target=dtype) tz = DeferredTimeZone(df.native) return narwhals_to_native_dtype(dtype, version, tz) diff --git a/narwhals/_ibis/expr.py b/narwhals/_ibis/expr.py index a6ab5bfc30..32849d8e19 100644 --- a/narwhals/_ibis/expr.py +++ b/narwhals/_ibis/expr.py @@ -1,7 +1,6 @@ from __future__ import annotations import operator -from contextlib import suppress from typing import TYPE_CHECKING, Any, Callable, TypeVar, cast import ibis @@ -272,13 +271,10 @@ def _fill_null(expr: ir.Value, value: ir.Scalar) -> ir.Value: def cast(self, dtype: IntoDType) -> Self: def func(df: IbisLazyFrame) -> list[ir.Value]: - if dtype.is_numeric() and (version := self._version) != Version.V1: - with suppress(Exception): - schema = df.collect_schema() - for name in self._evaluate_output_names(df): - _validate_cast_temporal_to_numeric( - source=schema[name], target=dtype - ) + if (version := self._version) != Version.V1 and dtype.is_numeric(): + schema = df.collect_schema() + for name in self._evaluate_output_names(df): + _validate_cast_temporal_to_numeric(source=schema[name], target=dtype) native_dtype = narwhals_to_native_dtype(dtype, version) return [expr.cast(native_dtype) for expr in self(df)] # pyright: ignore[reportArgumentType, reportCallIssue] diff --git a/narwhals/_spark_like/expr.py b/narwhals/_spark_like/expr.py index a5fe94cc5a..e47a9399d5 100644 --- a/narwhals/_spark_like/expr.py +++ b/narwhals/_spark_like/expr.py @@ -24,7 +24,7 @@ not_implemented, zip_strict, ) -from narwhals.dtypes import _validate_cast_temporal_to_numeric +from narwhals.dtypes import DType, _validate_cast_temporal_to_numeric if TYPE_CHECKING: from collections.abc import Iterator, Mapping, Sequence @@ -250,9 +250,12 @@ def __invert__(self) -> Self: def cast(self, dtype: IntoDType) -> Self: def _validated_dtype(dtype: IntoDType, df: SparkLikeLazyFrame) -> _NativeDType: - if dtype.is_numeric() and (version := self._version) != Version.V1: + if (version := self._version) != Version.V1 and dtype.is_numeric(): + schema: dict[str, DType] = {} with suppress(Exception): schema = df.collect_schema() + + if schema: for name in self._evaluate_output_names(df): _validate_cast_temporal_to_numeric( source=schema[name], target=dtype From 5b0b398b1cce1dfcfccda540817399d3fb1673bc Mon Sep 17 00:00:00 2001 From: FBruzzesi Date: Sat, 31 Jan 2026 19:05:11 +0100 Subject: [PATCH 4/7] skip old pandas --- tests/v1_test.py | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/tests/v1_test.py b/tests/v1_test.py index 80dec3315a..bb799056b4 100644 --- a/tests/v1_test.py +++ b/tests/v1_test.py @@ -1205,11 +1205,13 @@ def test_any_value_series(constructor_eager: ConstructorEager) -> None: "values", [[datetime(2000, 1, 1, 12, 0), None], [timedelta(365, 59), None]] ) @pytest.mark.parametrize(("target_dtype"), NUMERIC_DTYPES) -def test_cast_temporal_to_numeric_raises_expr( +def test_cast_temporal_to_numeric_expr( constructor: Constructor, values: list[datetime] | list[timedelta], target_dtype: nw_v1.dtypes.DType, ) -> None: + if "pandas" in str(constructor) and PANDAS_VERSION < (2, 0, 0): + pytest.skip(reason="ValueError: Cannot convert NaT values to integer") df = nw_v1.from_native(constructor({"a": values})).lazy() schema = df.select(nw_v1.col("a").cast(target_dtype)).collect_schema() assert schema["a"] == target_dtype @@ -1223,11 +1225,14 @@ def test_cast_temporal_to_numeric_raises_expr( ], ) @pytest.mark.parametrize(("target_dtype"), NUMERIC_DTYPES) -def test_cast_temporal_to_numeric_raises_series( +def test_cast_temporal_to_numeric_series( constructor_eager: ConstructorEager, values: list[datetime] | list[timedelta], target_dtype: nw_v1.dtypes.DType, ) -> None: + if "pandas" in str(constructor_eager) and PANDAS_VERSION < (2, 0, 0): + pytest.skip(reason="ValueError: Cannot convert NaT values to integer") + df = nw_v1.from_native(constructor_eager({"a": values}), eager_only=True) series = df["a"] dtype = series.cast(target_dtype).dtype From b13135fbb202791874d1e1b0763683bc4db4e041 Mon Sep 17 00:00:00 2001 From: FBruzzesi Date: Sat, 7 Feb 2026 13:57:22 +0100 Subject: [PATCH 5/7] remove V1 stability --- narwhals/_arrow/series.py | 5 ++-- narwhals/_dask/expr.py | 4 ++-- narwhals/_duckdb/expr.py | 4 ++-- narwhals/_ibis/expr.py | 4 ++-- narwhals/_pandas_like/series.py | 5 ++-- narwhals/_polars/series.py | 5 ++-- narwhals/_spark_like/expr.py | 4 ++-- tests/v1_test.py | 42 --------------------------------- 8 files changed, 14 insertions(+), 59 deletions(-) diff --git a/narwhals/_arrow/series.py b/narwhals/_arrow/series.py index 590b25a6b7..809ade884f 100644 --- a/narwhals/_arrow/series.py +++ b/narwhals/_arrow/series.py @@ -571,9 +571,8 @@ def is_nan(self) -> Self: return self._with_native(pc.is_nan(self.native), preserve_broadcast=True) def cast(self, dtype: IntoDType) -> Self: - if (version := self._version) != Version.V1: - _validate_cast_temporal_to_numeric(source=self.dtype, target=dtype) - data_type = narwhals_to_native_dtype(dtype, version) + _validate_cast_temporal_to_numeric(source=self.dtype, target=dtype) + data_type = narwhals_to_native_dtype(dtype, self._version) return self._with_native(pc.cast(self.native, data_type), preserve_broadcast=True) def null_count(self, *, _return_py_scalar: bool = True) -> int: diff --git a/narwhals/_dask/expr.py b/narwhals/_dask/expr.py index 9b38f82165..09f90fd71b 100644 --- a/narwhals/_dask/expr.py +++ b/narwhals/_dask/expr.py @@ -616,12 +616,12 @@ def func(df: DaskLazyFrame) -> Sequence[dx.Series]: def cast(self, dtype: IntoDType) -> Self: def func(df: DaskLazyFrame) -> list[dx.Series]: - if (version := self._version) != Version.V1 and dtype.is_numeric(): + if dtype.is_numeric(): schema = df.schema for name in self._evaluate_output_names(df): _validate_cast_temporal_to_numeric(source=schema[name], target=dtype) - native_dtype = narwhals_to_native_dtype(dtype, version) + native_dtype = narwhals_to_native_dtype(dtype, self._version) return [expr.astype(native_dtype) for expr in self._call(df)] return self.__class__( diff --git a/narwhals/_duckdb/expr.py b/narwhals/_duckdb/expr.py index d7c047b0b4..f95736ab71 100644 --- a/narwhals/_duckdb/expr.py +++ b/narwhals/_duckdb/expr.py @@ -276,13 +276,13 @@ def cast(self, dtype: IntoDType) -> Self: def _validated_dtype( dtype: IntoDType, df: DuckDBLazyFrame ) -> duckdb_dtypes.DuckDBPyType: - if (version := self._version) != Version.V1 and dtype.is_numeric(): + if dtype.is_numeric(): schema = df.collect_schema() for name in self._evaluate_output_names(df): _validate_cast_temporal_to_numeric(source=schema[name], target=dtype) tz = DeferredTimeZone(df.native) - return narwhals_to_native_dtype(dtype, version, tz) + return narwhals_to_native_dtype(dtype, self._version, tz) def func(df: DuckDBLazyFrame) -> list[Expression]: native_dtype = _validated_dtype(dtype, df) diff --git a/narwhals/_ibis/expr.py b/narwhals/_ibis/expr.py index 32849d8e19..0abc40e372 100644 --- a/narwhals/_ibis/expr.py +++ b/narwhals/_ibis/expr.py @@ -271,12 +271,12 @@ def _fill_null(expr: ir.Value, value: ir.Scalar) -> ir.Value: def cast(self, dtype: IntoDType) -> Self: def func(df: IbisLazyFrame) -> list[ir.Value]: - if (version := self._version) != Version.V1 and dtype.is_numeric(): + if dtype.is_numeric(): schema = df.collect_schema() for name in self._evaluate_output_names(df): _validate_cast_temporal_to_numeric(source=schema[name], target=dtype) - native_dtype = narwhals_to_native_dtype(dtype, version) + native_dtype = narwhals_to_native_dtype(dtype, self._version) return [expr.cast(native_dtype) for expr in self(df)] # pyright: ignore[reportArgumentType, reportCallIssue] return self.__class__( diff --git a/narwhals/_pandas_like/series.py b/narwhals/_pandas_like/series.py index e2e30e2551..3408f88199 100644 --- a/narwhals/_pandas_like/series.py +++ b/narwhals/_pandas_like/series.py @@ -311,8 +311,7 @@ def _scatter_in_place(self, indices: Self, values: Self) -> None: self.native.iloc[indices.native] = values_native def cast(self, dtype: IntoDType) -> Self: - if (version := self._version) != Version.V1: - _validate_cast_temporal_to_numeric(source=self.dtype, target=dtype) + _validate_cast_temporal_to_numeric(source=self.dtype, target=dtype) if self.dtype == dtype and self.native.dtype != "object": # Avoid dealing with pandas' type-system if we can. Note that it's only # safe to do this if we're not starting with object dtype, see tests/expr_and_series/cast_test.py::test_cast_object_pandas @@ -322,7 +321,7 @@ def cast(self, dtype: IntoDType) -> Self: dtype, dtype_backend=get_dtype_backend(self.native.dtype, self._implementation), implementation=self._implementation, - version=version, + version=self._version, ) return self._with_native(self.native.astype(pd_dtype), preserve_broadcast=True) diff --git a/narwhals/_polars/series.py b/narwhals/_polars/series.py index dfbacead3a..181275492a 100644 --- a/narwhals/_polars/series.py +++ b/narwhals/_polars/series.py @@ -289,9 +289,8 @@ def __getitem__(self, item: MultiIndexSelector[Self]) -> Any | Self: return self._from_native_object(self.native.__getitem__(item)) def cast(self, dtype: IntoDType) -> Self: - if (version := self._version) != Version.V1: - _validate_cast_temporal_to_numeric(source=self.dtype, target=dtype) - dtype_pl = narwhals_to_native_dtype(dtype, version) + _validate_cast_temporal_to_numeric(source=self.dtype, target=dtype) + dtype_pl = narwhals_to_native_dtype(dtype, version=self._version) return self._with_native(self.native.cast(dtype_pl)) def clip(self, lower_bound: PolarsSeries, upper_bound: PolarsSeries) -> Self: diff --git a/narwhals/_spark_like/expr.py b/narwhals/_spark_like/expr.py index e47a9399d5..b1f65fec0e 100644 --- a/narwhals/_spark_like/expr.py +++ b/narwhals/_spark_like/expr.py @@ -250,7 +250,7 @@ def __invert__(self) -> Self: def cast(self, dtype: IntoDType) -> Self: def _validated_dtype(dtype: IntoDType, df: SparkLikeLazyFrame) -> _NativeDType: - if (version := self._version) != Version.V1 and dtype.is_numeric(): + if dtype.is_numeric(): schema: dict[str, DType] = {} with suppress(Exception): schema = df.collect_schema() @@ -262,7 +262,7 @@ def _validated_dtype(dtype: IntoDType, df: SparkLikeLazyFrame) -> _NativeDType: ) return narwhals_to_native_dtype( - dtype, version, self._native_dtypes, df.native.sparkSession + dtype, self._version, self._native_dtypes, df.native.sparkSession ) def func(df: SparkLikeLazyFrame) -> Sequence[Column]: diff --git a/tests/v1_test.py b/tests/v1_test.py index 2a3abb9cb2..330211162f 100644 --- a/tests/v1_test.py +++ b/tests/v1_test.py @@ -1196,45 +1196,3 @@ def test_any_value_series(constructor_eager: ConstructorEager) -> None: with pytest.warns(NarwhalsUnstableWarning): df["a"].any_value() - - -# !NOTE: Int64 seems to be the only type for which every backend can convert -NUMERIC_DTYPES = [nw_v1.Int64] - - -@pytest.mark.parametrize( - "values", [[datetime(2000, 1, 1, 12, 0), None], [timedelta(365, 59), None]] -) -@pytest.mark.parametrize(("target_dtype"), NUMERIC_DTYPES) -def test_cast_temporal_to_numeric_expr( - constructor: Constructor, - values: list[datetime] | list[timedelta], - target_dtype: nw_v1.dtypes.DType, -) -> None: - if "pandas" in str(constructor) and PANDAS_VERSION < (2, 0, 0): - pytest.skip(reason="ValueError: Cannot convert NaT values to integer") - df = nw_v1.from_native(constructor({"a": values})).lazy() - schema = df.select(nw_v1.col("a").cast(target_dtype)).collect_schema() - assert schema["a"] == target_dtype - - -@pytest.mark.parametrize( - "values", - [ - [datetime(2000, 1, 1, 12, 0), datetime(2000, 1, 2, 12, 0), None], - [timedelta(2, 59), timedelta(1, 59), None], - ], -) -@pytest.mark.parametrize(("target_dtype"), NUMERIC_DTYPES) -def test_cast_temporal_to_numeric_series( - constructor_eager: ConstructorEager, - values: list[datetime] | list[timedelta], - target_dtype: nw_v1.dtypes.DType, -) -> None: - if "pandas" in str(constructor_eager) and PANDAS_VERSION < (2, 0, 0): - pytest.skip(reason="ValueError: Cannot convert NaT values to integer") - - df = nw_v1.from_native(constructor_eager({"a": values}), eager_only=True) - series = df["a"] - dtype = series.cast(target_dtype).dtype - assert dtype == target_dtype From cbf6c33531298d3a5c61b1afda1ce5466ebb4ef6 Mon Sep 17 00:00:00 2001 From: FBruzzesi Date: Sat, 7 Feb 2026 14:08:02 +0100 Subject: [PATCH 6/7] fix duckdb_dtypes import --- narwhals/_duckdb/expr.py | 7 +------ 1 file changed, 1 insertion(+), 6 deletions(-) diff --git a/narwhals/_duckdb/expr.py b/narwhals/_duckdb/expr.py index f95736ab71..ad1cb7a8d6 100644 --- a/narwhals/_duckdb/expr.py +++ b/narwhals/_duckdb/expr.py @@ -39,16 +39,11 @@ ) from narwhals._duckdb.dataframe import DuckDBLazyFrame from narwhals._duckdb.namespace import DuckDBNamespace + from narwhals._duckdb.utils import duckdb_dtypes from narwhals._typing import NoDefault from narwhals._utils import _LimitedContext from narwhals.typing import FillNullStrategy, IntoDType, RollingInterpolationMethod - try: - import duckdb.sqltypes as duckdb_dtypes - except ModuleNotFoundError: - # DuckDB pre 1.3 - import duckdb.typing as duckdb_dtypes - DuckDBWindowFunction = WindowFunction[DuckDBLazyFrame, Expression] DuckDBWindowInputs = WindowInputs[Expression] From 4c633076ff8b806db99707e7af168fe8f6d00a43 Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Sun, 8 Mar 2026 09:57:33 +0000 Subject: [PATCH 7/7] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- narwhals/_pandas_like/series.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/narwhals/_pandas_like/series.py b/narwhals/_pandas_like/series.py index 8118549c94..c67132297c 100644 --- a/narwhals/_pandas_like/series.py +++ b/narwhals/_pandas_like/series.py @@ -24,7 +24,7 @@ set_index, ) from narwhals._typing_compat import assert_never -from narwhals._utils import Implementation, Version, is_list_of, no_default, parse_version +from narwhals._utils import Implementation, Version, is_list_of, no_default from narwhals.dependencies import is_numpy_array_1d, is_pandas_like_series from narwhals.dtypes import _validate_cast_temporal_to_numeric from narwhals.exceptions import InvalidOperationError