From 7b1a0d69536018cea1630c5356835594b62a78f6 Mon Sep 17 00:00:00 2001 From: dangotbanned <125183946+dangotbanned@users.noreply.github.com> Date: Sun, 16 Feb 2025 17:41:02 +0000 Subject: [PATCH 1/9] fix(RFC): Use metaclass for safe `DType` attr access Mentioned in -https://github.com/narwhals-dev/narwhals/pull/1991#discussion_r1957328569 - https://github.com/narwhals-dev/narwhals/pull/1807#discussion_r1956331625 --- narwhals/_polars/utils.py | 8 ++++---- narwhals/dtypes.py | 18 ++++++++++++++---- 2 files changed, 18 insertions(+), 8 deletions(-) diff --git a/narwhals/_polars/utils.py b/narwhals/_polars/utils.py index eaae9152b0..b78560d27a 100644 --- a/narwhals/_polars/utils.py +++ b/narwhals/_polars/utils.py @@ -14,6 +14,7 @@ from narwhals.exceptions import NarwhalsError from narwhals.exceptions import ShapeError from narwhals.utils import import_dtypes_module +from narwhals.utils import isinstance_or_issubclass if TYPE_CHECKING: from narwhals._polars.dataframe import PolarsDataFrame @@ -190,10 +191,9 @@ def narwhals_to_native_dtype( if dtype == dtypes.Decimal: msg = "Casting to Decimal is not supported yet." raise NotImplementedError(msg) - if dtype == dtypes.Datetime or isinstance(dtype, dtypes.Datetime): - dt_time_unit: TimeUnit = getattr(dtype, "time_unit", "us") - dt_time_zone = getattr(dtype, "time_zone", None) - return pl.Datetime(dt_time_unit, dt_time_zone) # type: ignore[arg-type] + if isinstance_or_issubclass(dtype, dtypes.Datetime): + # [s] is not allowed for polars + return pl.Datetime(dtype.time_unit, dtype.time_zone) # type: ignore[arg-type] if dtype == dtypes.Duration or isinstance(dtype, dtypes.Duration): du_time_unit: TimeUnit = getattr(dtype, "time_unit", "us") return pl.Duration(time_unit=du_time_unit) # type: ignore[arg-type] diff --git a/narwhals/dtypes.py b/narwhals/dtypes.py index 7d9cf31361..20b514d601 100644 --- a/narwhals/dtypes.py +++ b/narwhals/dtypes.py @@ -448,7 +448,17 @@ class Unknown(DType): """ -class Datetime(TemporalType): +class _DatetimeMeta(type): + @property + def time_unit(cls) -> TimeUnit: + return "us" + + @property + def time_zone(cls) -> str | None: + return None + + +class Datetime(TemporalType, metaclass=_DatetimeMeta): """Data type representing a calendar date and time of day. Arguments: @@ -504,12 +514,12 @@ def __init__( if isinstance(time_zone, timezone): time_zone = str(time_zone) - self.time_unit = time_unit - self.time_zone = time_zone + self.time_unit: TimeUnit = time_unit + self.time_zone: str | None = time_zone def __eq__(self: Self, other: object) -> bool: # allow comparing object instances to class - if type(other) is type and issubclass(other, self.__class__): + if type(other) is _DatetimeMeta and issubclass(other, self.__class__): return True elif isinstance(other, self.__class__): return self.time_unit == other.time_unit and self.time_zone == other.time_zone From 12ac8892337fa92d46bcca78d63fa89b400f105d Mon Sep 17 00:00:00 2001 From: dangotbanned <125183946+dangotbanned@users.noreply.github.com> Date: Mon, 17 Feb 2025 17:04:41 +0000 Subject: [PATCH 2/9] chore: add `_DurationMeta` Both `Duration` and `Datetime` are working with `polars` now. From this point it should just be reducing code for all the other backends --- narwhals/_polars/utils.py | 5 ++--- narwhals/dtypes.py | 17 ++++++++++------- 2 files changed, 12 insertions(+), 10 deletions(-) diff --git a/narwhals/_polars/utils.py b/narwhals/_polars/utils.py index b78560d27a..5366b06c78 100644 --- a/narwhals/_polars/utils.py +++ b/narwhals/_polars/utils.py @@ -194,9 +194,8 @@ def narwhals_to_native_dtype( if isinstance_or_issubclass(dtype, dtypes.Datetime): # [s] is not allowed for polars return pl.Datetime(dtype.time_unit, dtype.time_zone) # type: ignore[arg-type] - if dtype == dtypes.Duration or isinstance(dtype, dtypes.Duration): - du_time_unit: TimeUnit = getattr(dtype, "time_unit", "us") - return pl.Duration(time_unit=du_time_unit) # type: ignore[arg-type] + if isinstance_or_issubclass(dtype, dtypes.Duration): + return pl.Duration(dtype.time_unit) # type: ignore[arg-type] if dtype == dtypes.List: return pl.List(narwhals_to_native_dtype(dtype.inner, version, backend_version)) # type: ignore[union-attr] if dtype == dtypes.Struct: diff --git a/narwhals/dtypes.py b/narwhals/dtypes.py index dbd1e1675c..ac2aaa0236 100644 --- a/narwhals/dtypes.py +++ b/narwhals/dtypes.py @@ -534,7 +534,13 @@ def __repr__(self: Self) -> str: # pragma: no cover return f"{class_name}(time_unit={self.time_unit!r}, time_zone={self.time_zone!r})" -class Duration(TemporalType): +class _DurationMeta(type): + @property + def time_unit(cls) -> TimeUnit: + return "us" + + +class Duration(TemporalType, metaclass=_DurationMeta): """Data type representing a time duration. Arguments: @@ -562,10 +568,7 @@ class Duration(TemporalType): Duration(time_unit='ms') """ - def __init__( - self: Self, - time_unit: TimeUnit = "us", - ) -> None: + def __init__(self: Self, time_unit: TimeUnit = "us") -> None: if time_unit not in ("s", "ms", "us", "ns"): msg = ( "invalid `time_unit`" @@ -573,11 +576,11 @@ def __init__( ) raise ValueError(msg) - self.time_unit = time_unit + self.time_unit: TimeUnit = time_unit def __eq__(self: Self, other: object) -> bool: # allow comparing object instances to class - if type(other) is type and issubclass(other, self.__class__): + if type(other) is _DurationMeta and issubclass(other, self.__class__): return True elif isinstance(other, self.__class__): return self.time_unit == other.time_unit From ed4c412f35522825c07108aa17352632c06d5fef Mon Sep 17 00:00:00 2001 From: dangotbanned <125183946+dangotbanned@users.noreply.github.com> Date: Mon, 17 Feb 2025 17:16:01 +0000 Subject: [PATCH 3/9] refactor: upgrade `_pandas` --- narwhals/_pandas_like/utils.py | 13 ++++++------- 1 file changed, 6 insertions(+), 7 deletions(-) diff --git a/narwhals/_pandas_like/utils.py b/narwhals/_pandas_like/utils.py index 98f613e9fa..a16f5014a7 100644 --- a/narwhals/_pandas_like/utils.py +++ b/narwhals/_pandas_like/utils.py @@ -613,27 +613,26 @@ def narwhals_to_native_dtype( # noqa: PLR0915 # convert to it? return "category" if isinstance_or_issubclass(dtype, dtypes.Datetime): - dt_time_unit = getattr(dtype, "time_unit", "us") - dt_time_zone = getattr(dtype, "time_zone", None) - # Pandas does not support "ms" or "us" time units before version 2.0 - # Let's overwrite with "ns" if implementation is Implementation.PANDAS and backend_version < ( 2, ): # pragma: no cover dt_time_unit = "ns" + else: + dt_time_unit = dtype.time_unit if dtype_backend == "pyarrow": - tz_part = f", tz={dt_time_zone}" if dt_time_zone else "" + tz_part = f", tz={tz}" if (tz := dtype.time_zone) else "" return f"timestamp[{dt_time_unit}{tz_part}][pyarrow]" else: - tz_part = f", {dt_time_zone}" if dt_time_zone else "" + tz_part = f", {tz}" if (tz := dtype.time_zone) else "" return f"datetime64[{dt_time_unit}{tz_part}]" if isinstance_or_issubclass(dtype, dtypes.Duration): - du_time_unit = getattr(dtype, "time_unit", "us") + du_time_unit = dtype.time_unit if implementation is Implementation.PANDAS and backend_version < ( 2, ): # pragma: no cover + # NOTE: I think this assignment was a typo, it never gets used (should be du_) dt_time_unit = "ns" return ( f"duration[{du_time_unit}][pyarrow]" From 3e122c66c97542c881b074344bfd228c8e4bec40 Mon Sep 17 00:00:00 2001 From: dangotbanned <125183946+dangotbanned@users.noreply.github.com> Date: Mon, 17 Feb 2025 17:21:14 +0000 Subject: [PATCH 4/9] refactor: upgrade `_arrow` --- narwhals/_arrow/utils.py | 8 ++------ 1 file changed, 2 insertions(+), 6 deletions(-) diff --git a/narwhals/_arrow/utils.py b/narwhals/_arrow/utils.py index fad917a206..ef32c2e068 100644 --- a/narwhals/_arrow/utils.py +++ b/narwhals/_arrow/utils.py @@ -29,7 +29,6 @@ from narwhals._arrow.typing import Incomplete from narwhals._arrow.typing import StringArray from narwhals.dtypes import DType - from narwhals.typing import TimeUnit from narwhals.typing import _AnyDArray from narwhals.utils import Version @@ -182,12 +181,9 @@ def narwhals_to_native_dtype(dtype: DType | type[DType], version: Version) -> pa if isinstance_or_issubclass(dtype, dtypes.Categorical): return pa.dictionary(pa.uint32(), pa.string()) if isinstance_or_issubclass(dtype, dtypes.Datetime): - time_unit: TimeUnit = getattr(dtype, "time_unit", "us") - time_zone = getattr(dtype, "time_zone", None) - return pa.timestamp(time_unit, tz=time_zone) + return pa.timestamp(dtype.time_unit, tz=dtype.time_zone) # type: ignore[arg-type] if isinstance_or_issubclass(dtype, dtypes.Duration): - time_unit = getattr(dtype, "time_unit", "us") - return pa.duration(time_unit) + return pa.duration(dtype.time_unit) if isinstance_or_issubclass(dtype, dtypes.Date): return pa.date32() if isinstance_or_issubclass(dtype, dtypes.List): From 1db895e124e986c90eecff1f1b97a74c75482c0e Mon Sep 17 00:00:00 2001 From: dangotbanned <125183946+dangotbanned@users.noreply.github.com> Date: Mon, 17 Feb 2025 17:26:13 +0000 Subject: [PATCH 5/9] refactor: "upgrade" `_duckdb` They're all noops, but good to keep consistent --- narwhals/_duckdb/utils.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/narwhals/_duckdb/utils.py b/narwhals/_duckdb/utils.py index 324769ca88..1216e341eb 100644 --- a/narwhals/_duckdb/utils.py +++ b/narwhals/_duckdb/utils.py @@ -176,12 +176,12 @@ def narwhals_to_native_dtype(dtype: DType | type[DType], version: Version) -> st msg = "Categorical not supported by DuckDB" raise NotImplementedError(msg) if isinstance_or_issubclass(dtype, dtypes.Datetime): - _time_unit = getattr(dtype, "time_unit", "us") - _time_zone = getattr(dtype, "time_zone", None) + _time_unit = dtype.time_unit + _time_zone = dtype.time_zone msg = "todo" raise NotImplementedError(msg) if isinstance_or_issubclass(dtype, dtypes.Duration): # pragma: no cover - _time_unit = getattr(dtype, "time_unit", "us") + _time_unit = dtype.time_unit msg = "todo" raise NotImplementedError(msg) if isinstance_or_issubclass(dtype, dtypes.Date): # pragma: no cover From 82195dda75e8aca40d4704d983c48016d0b3ced2 Mon Sep 17 00:00:00 2001 From: dangotbanned <125183946+dangotbanned@users.noreply.github.com> Date: Mon, 17 Feb 2025 17:29:54 +0000 Subject: [PATCH 6/9] refactor: upgrade `_spark_like` --- narwhals/_spark_like/utils.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/narwhals/_spark_like/utils.py b/narwhals/_spark_like/utils.py index 1176164d67..46ee0fd877 100644 --- a/narwhals/_spark_like/utils.py +++ b/narwhals/_spark_like/utils.py @@ -116,7 +116,7 @@ def narwhals_to_native_dtype( if isinstance_or_issubclass(dtype, dtypes.Date): return spark_types.DateType() if isinstance_or_issubclass(dtype, dtypes.Datetime): - dt_time_zone = getattr(dtype, "time_zone", None) + dt_time_zone = dtype.time_zone if dt_time_zone is None: return spark_types.TimestampNTZType() if dt_time_zone != "UTC": # pragma: no cover From 38812e63403754846de54ffcfa46e810b1220d15 Mon Sep 17 00:00:00 2001 From: dangotbanned <125183946+dangotbanned@users.noreply.github.com> Date: Mon, 17 Feb 2025 17:34:43 +0000 Subject: [PATCH 7/9] chore: remove comment moved to https://github.com/narwhals-dev/narwhals/pull/2025/files#r1958596925 --- narwhals/_polars/utils.py | 1 - 1 file changed, 1 deletion(-) diff --git a/narwhals/_polars/utils.py b/narwhals/_polars/utils.py index 5366b06c78..79491760c5 100644 --- a/narwhals/_polars/utils.py +++ b/narwhals/_polars/utils.py @@ -192,7 +192,6 @@ def narwhals_to_native_dtype( msg = "Casting to Decimal is not supported yet." raise NotImplementedError(msg) if isinstance_or_issubclass(dtype, dtypes.Datetime): - # [s] is not allowed for polars return pl.Datetime(dtype.time_unit, dtype.time_zone) # type: ignore[arg-type] if isinstance_or_issubclass(dtype, dtypes.Duration): return pl.Duration(dtype.time_unit) # type: ignore[arg-type] From e8e9a97cbccaf5ef1384edb6f37b7fd3e91d9cd9 Mon Sep 17 00:00:00 2001 From: dangotbanned <125183946+dangotbanned@users.noreply.github.com> Date: Mon, 17 Feb 2025 17:49:30 +0000 Subject: [PATCH 8/9] refactor: simplify `__eq__` The metaclass is much narrower than `type` previously --- narwhals/dtypes.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/narwhals/dtypes.py b/narwhals/dtypes.py index ac2aaa0236..e336958277 100644 --- a/narwhals/dtypes.py +++ b/narwhals/dtypes.py @@ -519,7 +519,7 @@ def __init__( def __eq__(self: Self, other: object) -> bool: # allow comparing object instances to class - if type(other) is _DatetimeMeta and issubclass(other, self.__class__): + if type(other) is _DatetimeMeta: return True elif isinstance(other, self.__class__): return self.time_unit == other.time_unit and self.time_zone == other.time_zone @@ -580,7 +580,7 @@ def __init__(self: Self, time_unit: TimeUnit = "us") -> None: def __eq__(self: Self, other: object) -> bool: # allow comparing object instances to class - if type(other) is _DurationMeta and issubclass(other, self.__class__): + if type(other) is _DurationMeta: return True elif isinstance(other, self.__class__): return self.time_unit == other.time_unit From 50a43f3261d6c01923abd6e27037229a837eb44a Mon Sep 17 00:00:00 2001 From: dangotbanned <125183946+dangotbanned@users.noreply.github.com> Date: Wed, 19 Feb 2025 12:41:30 +0000 Subject: [PATCH 9/9] fix: maybe fix typo "dt_time_unit" Fixes https://github.com/narwhals-dev/narwhals/pull/2025#discussion_r1961563952 --- narwhals/_pandas_like/utils.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/narwhals/_pandas_like/utils.py b/narwhals/_pandas_like/utils.py index a16f5014a7..8bb4317934 100644 --- a/narwhals/_pandas_like/utils.py +++ b/narwhals/_pandas_like/utils.py @@ -628,12 +628,12 @@ def narwhals_to_native_dtype( # noqa: PLR0915 tz_part = f", {tz}" if (tz := dtype.time_zone) else "" return f"datetime64[{dt_time_unit}{tz_part}]" if isinstance_or_issubclass(dtype, dtypes.Duration): - du_time_unit = dtype.time_unit if implementation is Implementation.PANDAS and backend_version < ( 2, ): # pragma: no cover - # NOTE: I think this assignment was a typo, it never gets used (should be du_) - dt_time_unit = "ns" + du_time_unit = "ns" + else: + du_time_unit = dtype.time_unit return ( f"duration[{du_time_unit}][pyarrow]" if dtype_backend == "pyarrow"