diff --git a/narwhals/_arrow/utils.py b/narwhals/_arrow/utils.py index 7bdfc34692..7032bb7c4b 100644 --- a/narwhals/_arrow/utils.py +++ b/narwhals/_arrow/utils.py @@ -13,8 +13,8 @@ import pyarrow as pa import pyarrow.compute as pc +from narwhals._compliant.series import _SeriesNamespace from narwhals.exceptions import ShapeError -from narwhals.utils import _SeriesNamespace from narwhals.utils import import_dtypes_module from narwhals.utils import isinstance_or_issubclass diff --git a/narwhals/_compliant/expr.py b/narwhals/_compliant/expr.py index 7cc4db4c97..ff659799ca 100644 --- a/narwhals/_compliant/expr.py +++ b/narwhals/_compliant/expr.py @@ -21,6 +21,7 @@ from narwhals._compliant.namespace import CompliantNamespace from narwhals._compliant.typing import AliasName from narwhals._compliant.typing import AliasNames +from narwhals._compliant.typing import CompliantExprT_co from narwhals._compliant.typing import CompliantFrameT from narwhals._compliant.typing import CompliantLazyFrameT from narwhals._compliant.typing import CompliantSeriesOrNativeExprT_co @@ -32,7 +33,7 @@ from narwhals.dependencies import get_numpy from narwhals.dependencies import is_numpy_array from narwhals.dtypes import DType -from narwhals.utils import _ExprNamespace +from narwhals.utils import _StoresCompliant from narwhals.utils import deprecated from narwhals.utils import not_implemented from narwhals.utils import unstable @@ -891,6 +892,16 @@ def _is_expr(cls, obj: Self | Any) -> TypeIs[Self]: return hasattr(obj, "__narwhals_expr__") +class _ExprNamespace( # type: ignore[misc] + _StoresCompliant[CompliantExprT_co], Protocol[CompliantExprT_co] +): + _compliant_expr: CompliantExprT_co + + @property + def compliant(self) -> CompliantExprT_co: + return self._compliant_expr + + class EagerExprNamespace(_ExprNamespace[EagerExprT], Generic[EagerExprT]): def __init__(self, expr: EagerExprT, /) -> None: self._compliant_expr = expr diff --git a/narwhals/_compliant/series.py b/narwhals/_compliant/series.py index 6d4ed55972..9b7146da15 100644 --- a/narwhals/_compliant/series.py +++ b/narwhals/_compliant/series.py @@ -2,6 +2,7 @@ from typing import TYPE_CHECKING from typing import Any +from typing import Generic from typing import Iterable from typing import Iterator from typing import Literal @@ -9,9 +10,18 @@ from typing import Protocol from typing import Sequence +from narwhals._compliant.any_namespace import CatNamespace +from narwhals._compliant.any_namespace import DateTimeNamespace +from narwhals._compliant.any_namespace import ListNamespace +from narwhals._compliant.any_namespace import StringNamespace +from narwhals._compliant.any_namespace import StructNamespace +from narwhals._compliant.typing import CompliantSeriesT_co +from narwhals._compliant.typing import EagerSeriesT_co from narwhals._compliant.typing import NativeSeriesT_co from narwhals._translate import FromIterable from narwhals._translate import NumpyConvertible +from narwhals.utils import _StoresCompliant +from narwhals.utils import _StoresNative from narwhals.utils import unstable if TYPE_CHECKING: @@ -297,3 +307,78 @@ def __narwhals_namespace__(self) -> EagerNamespace[Any, Self, Any]: ... def _to_expr(self) -> EagerExpr[Any, Any]: return self.__narwhals_namespace__()._expr._from_series(self) # type: ignore[no-any-return] + + @property + def str(self) -> EagerSeriesStringNamespace[Self, NativeSeriesT_co]: ... + @property + def dt(self) -> EagerSeriesDateTimeNamespace[Self, NativeSeriesT_co]: ... + @property + def cat(self) -> EagerSeriesCatNamespace[Self, NativeSeriesT_co]: ... + @property + def list(self) -> EagerSeriesListNamespace[Self, NativeSeriesT_co]: ... + @property + def struct(self) -> EagerSeriesStructNamespace[Self, NativeSeriesT_co]: ... + + +class _SeriesNamespace( # type: ignore[misc] + _StoresCompliant[CompliantSeriesT_co], + _StoresNative[NativeSeriesT_co], + Protocol[CompliantSeriesT_co, NativeSeriesT_co], +): + _compliant_series: CompliantSeriesT_co + + @property + def compliant(self) -> CompliantSeriesT_co: + return self._compliant_series + + @property + def native(self) -> NativeSeriesT_co: + return self._compliant_series.native # type: ignore[no-any-return] + + def from_native(self, series: Any, /) -> CompliantSeriesT_co: + return self.compliant._from_native_series(series) + + +class EagerSeriesNamespace( + _SeriesNamespace[EagerSeriesT_co, NativeSeriesT_co], + Generic[EagerSeriesT_co, NativeSeriesT_co], +): + _compliant_series: EagerSeriesT_co + + def __init__(self, series: EagerSeriesT_co, /) -> None: + self._compliant_series = series + + +class EagerSeriesCatNamespace( # type: ignore[misc] + _SeriesNamespace[EagerSeriesT_co, NativeSeriesT_co], + CatNamespace[EagerSeriesT_co], + Protocol[EagerSeriesT_co, NativeSeriesT_co], +): ... + + +class EagerSeriesDateTimeNamespace( # type: ignore[misc] + _SeriesNamespace[EagerSeriesT_co, NativeSeriesT_co], + DateTimeNamespace[EagerSeriesT_co], + Protocol[EagerSeriesT_co, NativeSeriesT_co], +): ... + + +class EagerSeriesListNamespace( # type: ignore[misc] + _SeriesNamespace[EagerSeriesT_co, NativeSeriesT_co], + ListNamespace[EagerSeriesT_co], + Protocol[EagerSeriesT_co, NativeSeriesT_co], +): ... + + +class EagerSeriesStringNamespace( # type: ignore[misc] + _SeriesNamespace[EagerSeriesT_co, NativeSeriesT_co], + StringNamespace[EagerSeriesT_co], + Protocol[EagerSeriesT_co, NativeSeriesT_co], +): ... + + +class EagerSeriesStructNamespace( # type: ignore[misc] + _SeriesNamespace[EagerSeriesT_co, NativeSeriesT_co], + StructNamespace[EagerSeriesT_co], + Protocol[EagerSeriesT_co, NativeSeriesT_co], +): ... diff --git a/narwhals/_compliant/typing.py b/narwhals/_compliant/typing.py index fb54ed12f2..5b058f3c41 100644 --- a/narwhals/_compliant/typing.py +++ b/narwhals/_compliant/typing.py @@ -59,10 +59,14 @@ NativeFrameT_co = TypeVar("NativeFrameT_co", bound="NativeFrame", covariant=True) CompliantExprT = TypeVar("CompliantExprT", bound=CompliantExprAny) +CompliantExprT_co = TypeVar("CompliantExprT_co", bound=CompliantExprAny, covariant=True) CompliantExprT_contra = TypeVar( "CompliantExprT_contra", bound=CompliantExprAny, contravariant=True ) CompliantSeriesT = TypeVar("CompliantSeriesT", bound=CompliantSeriesAny) +CompliantSeriesT_co = TypeVar( + "CompliantSeriesT_co", bound=CompliantSeriesAny, covariant=True +) CompliantSeriesOrNativeExprT = TypeVar( "CompliantSeriesOrNativeExprT", bound=CompliantSeriesOrNativeExprAny ) diff --git a/narwhals/_pandas_like/series.py b/narwhals/_pandas_like/series.py index 1151a3eeb7..7d9253bb07 100644 --- a/narwhals/_pandas_like/series.py +++ b/narwhals/_pandas_like/series.py @@ -1079,8 +1079,14 @@ def cat(self: Self) -> PandasLikeSeriesCatNamespace: @property def list(self: Self) -> PandasLikeSeriesListNamespace: + if not hasattr(self.native, "list"): + msg = "Series must be of PyArrow List type to support list namespace." + raise TypeError(msg) return PandasLikeSeriesListNamespace(self) @property def struct(self: Self) -> PandasLikeSeriesStructNamespace: + if not hasattr(self.native, "struct"): + msg = "Series must be of PyArrow Struct type to support struct namespace." + raise TypeError(msg) return PandasLikeSeriesStructNamespace(self) diff --git a/narwhals/_pandas_like/series_cat.py b/narwhals/_pandas_like/series_cat.py index 47e6178650..edf1f13781 100644 --- a/narwhals/_pandas_like/series_cat.py +++ b/narwhals/_pandas_like/series_cat.py @@ -2,18 +2,16 @@ from typing import TYPE_CHECKING -if TYPE_CHECKING: - from typing_extensions import Self +from narwhals._compliant.any_namespace import CatNamespace +from narwhals._pandas_like.utils import PandasLikeSeriesNamespace +if TYPE_CHECKING: from narwhals._pandas_like.series import PandasLikeSeries -class PandasLikeSeriesCatNamespace: - def __init__(self: Self, series: PandasLikeSeries) -> None: - self._compliant_series = series - - def get_categories(self: Self) -> PandasLikeSeries: - s = self._compliant_series._native_series - return self._compliant_series._from_native_series( - s.__class__(s.cat.categories, name=s.name) - ) +class PandasLikeSeriesCatNamespace( + PandasLikeSeriesNamespace, CatNamespace["PandasLikeSeries"] +): + def get_categories(self) -> PandasLikeSeries: + s = self.native + return self.from_native(type(s)(s.cat.categories, name=s.name)) diff --git a/narwhals/_pandas_like/series_dt.py b/narwhals/_pandas_like/series_dt.py index a30a7e7908..901e7ffba4 100644 --- a/narwhals/_pandas_like/series_dt.py +++ b/narwhals/_pandas_like/series_dt.py @@ -3,27 +3,24 @@ from typing import TYPE_CHECKING from typing import Any +from narwhals._compliant.any_namespace import DateTimeNamespace +from narwhals._pandas_like.utils import PandasLikeSeriesNamespace from narwhals._pandas_like.utils import calculate_timestamp_date from narwhals._pandas_like.utils import calculate_timestamp_datetime from narwhals._pandas_like.utils import int_dtype_mapper -from narwhals.utils import Implementation +from narwhals._pandas_like.utils import is_pyarrow_dtype_backend from narwhals.utils import import_dtypes_module if TYPE_CHECKING: - from typing_extensions import Self - from narwhals._pandas_like.series import PandasLikeSeries from narwhals.typing import TimeUnit -class PandasLikeSeriesDateTimeNamespace: - def __init__(self: Self, series: PandasLikeSeries) -> None: - self._compliant_series = series - - def date(self: Self) -> PandasLikeSeries: - result = self._compliant_series._from_native_series( - self._compliant_series._native_series.dt.date, - ) +class PandasLikeSeriesDateTimeNamespace( + PandasLikeSeriesNamespace, DateTimeNamespace["PandasLikeSeries"] +): + def date(self) -> PandasLikeSeries: + result = self.from_native(self.native.dt.date) if str(result.dtype).lower() == "object": msg = ( "Accessing `date` on the default pandas backend " @@ -35,203 +32,160 @@ def date(self: Self) -> PandasLikeSeries: raise NotImplementedError(msg) return result - def year(self: Self) -> PandasLikeSeries: - return self._compliant_series._from_native_series( - self._compliant_series._native_series.dt.year, - ) + def year(self) -> PandasLikeSeries: + return self.from_native(self.native.dt.year) - def month(self: Self) -> PandasLikeSeries: - return self._compliant_series._from_native_series( - self._compliant_series._native_series.dt.month, - ) + def month(self) -> PandasLikeSeries: + return self.from_native(self.native.dt.month) - def day(self: Self) -> PandasLikeSeries: - return self._compliant_series._from_native_series( - self._compliant_series._native_series.dt.day, - ) + def day(self) -> PandasLikeSeries: + return self.from_native(self.native.dt.day) - def hour(self: Self) -> PandasLikeSeries: - return self._compliant_series._from_native_series( - self._compliant_series._native_series.dt.hour, - ) + def hour(self) -> PandasLikeSeries: + return self.from_native(self.native.dt.hour) - def minute(self: Self) -> PandasLikeSeries: - return self._compliant_series._from_native_series( - self._compliant_series._native_series.dt.minute, - ) + def minute(self) -> PandasLikeSeries: + return self.from_native(self.native.dt.minute) - def second(self: Self) -> PandasLikeSeries: - return self._compliant_series._from_native_series( - self._compliant_series._native_series.dt.second, - ) + def second(self) -> PandasLikeSeries: + return self.from_native(self.native.dt.second) - def millisecond(self: Self) -> PandasLikeSeries: + def millisecond(self) -> PandasLikeSeries: return self.microsecond() // 1000 - def microsecond(self: Self) -> PandasLikeSeries: - if self._compliant_series._backend_version < (3, 0, 0) and "pyarrow" in str( - self._compliant_series._native_series.dtype - ): + def microsecond(self) -> PandasLikeSeries: + if self.backend_version < (3, 0, 0) and self._is_pyarrow(): # crazy workaround for https://github.com/pandas-dev/pandas/issues/59154 import pyarrow.compute as pc # ignore-banned-import() - native_series = self._compliant_series._native_series - arr = native_series.array.__arrow_array__() + arr_ns = self.native.array + arr = arr_ns.__arrow_array__() result_arr = pc.add( pc.multiply(pc.millisecond(arr), 1000), pc.microsecond(arr) ) - result = native_series.__class__( - native_series.array.__class__(result_arr), name=native_series.name - ) - return self._compliant_series._from_native_series(result) + result = type(self.native)(type(arr_ns)(result_arr), name=self.native.name) + return self.from_native(result) - return self._compliant_series._from_native_series( - self._compliant_series._native_series.dt.microsecond - ) + return self.from_native(self.native.dt.microsecond) - def nanosecond(self: Self) -> PandasLikeSeries: - return ( - self.microsecond() * 1_000 - + self._compliant_series._native_series.dt.nanosecond - ) + def nanosecond(self) -> PandasLikeSeries: + return self.microsecond() * 1_000 + self.native.dt.nanosecond - def ordinal_day(self: Self) -> PandasLikeSeries: - ser = self._compliant_series._native_series - year_start = ser.dt.year + def ordinal_day(self) -> PandasLikeSeries: + year_start = self.native.dt.year result = ( - ser.to_numpy().astype("datetime64[D]") + self.native.to_numpy().astype("datetime64[D]") - (year_start.to_numpy() - 1970).astype("datetime64[Y]") ).astype("int32") + 1 - dtype = "Int64[pyarrow]" if "pyarrow" in str(ser.dtype) else "int32" - return self._compliant_series._from_native_series( - self._compliant_series._native_series.__class__( - result, dtype=dtype, name=year_start.name - ) + dtype = "Int64[pyarrow]" if self._is_pyarrow() else "int32" + return self.from_native( + type(self.native)(result, dtype=dtype, name=year_start.name) ) - def weekday(self: Self) -> PandasLikeSeries: - return ( - self._compliant_series._from_native_series( - self._compliant_series._native_series.dt.weekday, - ) - + 1 # Pandas is 0-6 while Polars is 1-7 - ) + def weekday(self) -> PandasLikeSeries: + # Pandas is 0-6 while Polars is 1-7 + return self.from_native(self.native.dt.weekday) + 1 - def _get_total_seconds(self: Self) -> Any: - if hasattr(self._compliant_series._native_series.dt, "total_seconds"): - return self._compliant_series._native_series.dt.total_seconds() + def _is_pyarrow(self) -> bool: + return is_pyarrow_dtype_backend(self.native.dtype, self.implementation) + + def _get_total_seconds(self) -> Any: + if hasattr(self.native.dt, "total_seconds"): + return self.native.dt.total_seconds() else: # pragma: no cover return ( - self._compliant_series._native_series.dt.days * 86400 - + self._compliant_series._native_series.dt.seconds - + (self._compliant_series._native_series.dt.microseconds / 1e6) - + (self._compliant_series._native_series.dt.nanoseconds / 1e9) + self.native.dt.days * 86400 + + self.native.dt.seconds + + (self.native.dt.microseconds / 1e6) + + (self.native.dt.nanoseconds / 1e9) ) - def total_minutes(self: Self) -> PandasLikeSeries: + def total_minutes(self) -> PandasLikeSeries: s = self._get_total_seconds() - s_sign = ( - 2 * (s > 0).astype(int_dtype_mapper(s.dtype)) - 1 - ) # this calculates the sign of each series element + # this calculates the sign of each series element + s_sign = 2 * (s > 0).astype(int_dtype_mapper(s.dtype)) - 1 s_abs = s.abs() // 60 if ~s.isna().any(): s_abs = s_abs.astype(int_dtype_mapper(s.dtype)) - return self._compliant_series._from_native_series(s_abs * s_sign) + return self.from_native(s_abs * s_sign) - def total_seconds(self: Self) -> PandasLikeSeries: + def total_seconds(self) -> PandasLikeSeries: s = self._get_total_seconds() - s_sign = ( - 2 * (s > 0).astype(int_dtype_mapper(s.dtype)) - 1 - ) # this calculates the sign of each series element + # this calculates the sign of each series element + s_sign = 2 * (s > 0).astype(int_dtype_mapper(s.dtype)) - 1 s_abs = s.abs() // 1 if ~s.isna().any(): s_abs = s_abs.astype(int_dtype_mapper(s.dtype)) - return self._compliant_series._from_native_series(s_abs * s_sign) + return self.from_native(s_abs * s_sign) - def total_milliseconds(self: Self) -> PandasLikeSeries: + def total_milliseconds(self) -> PandasLikeSeries: s = self._get_total_seconds() * 1e3 - s_sign = ( - 2 * (s > 0).astype(int_dtype_mapper(s.dtype)) - 1 - ) # this calculates the sign of each series element + # this calculates the sign of each series element + s_sign = 2 * (s > 0).astype(int_dtype_mapper(s.dtype)) - 1 s_abs = s.abs() // 1 if ~s.isna().any(): s_abs = s_abs.astype(int_dtype_mapper(s.dtype)) - return self._compliant_series._from_native_series(s_abs * s_sign) + return self.from_native(s_abs * s_sign) - def total_microseconds(self: Self) -> PandasLikeSeries: + def total_microseconds(self) -> PandasLikeSeries: s = self._get_total_seconds() * 1e6 - s_sign = ( - 2 * (s > 0).astype(int_dtype_mapper(s.dtype)) - 1 - ) # this calculates the sign of each series element + # this calculates the sign of each series element + s_sign = 2 * (s > 0).astype(int_dtype_mapper(s.dtype)) - 1 s_abs = s.abs() // 1 if ~s.isna().any(): s_abs = s_abs.astype(int_dtype_mapper(s.dtype)) - return self._compliant_series._from_native_series(s_abs * s_sign) + return self.from_native(s_abs * s_sign) - def total_nanoseconds(self: Self) -> PandasLikeSeries: + def total_nanoseconds(self) -> PandasLikeSeries: s = self._get_total_seconds() * 1e9 - s_sign = ( - 2 * (s > 0).astype(int_dtype_mapper(s.dtype)) - 1 - ) # this calculates the sign of each series element + # this calculates the sign of each series element + s_sign = 2 * (s > 0).astype(int_dtype_mapper(s.dtype)) - 1 s_abs = s.abs() // 1 if ~s.isna().any(): s_abs = s_abs.astype(int_dtype_mapper(s.dtype)) - return self._compliant_series._from_native_series(s_abs * s_sign) + return self.from_native(s_abs * s_sign) - def to_string(self: Self, format: str) -> PandasLikeSeries: + def to_string(self, format: str) -> PandasLikeSeries: # Polars' parser treats `'%.f'` as pandas does `'.%f'` # PyArrow interprets `'%S'` as "seconds, plus fractional seconds" # and doesn't support `%f` - if "pyarrow" not in str(self._compliant_series._native_series.dtype): + if not self._is_pyarrow(): format = format.replace("%S%.f", "%S.%f") else: format = format.replace("%S.%f", "%S").replace("%S%.f", "%S") - return self._compliant_series._from_native_series( - self._compliant_series._native_series.dt.strftime(format) - ) + return self.from_native(self.native.dt.strftime(format)) - def replace_time_zone(self: Self, time_zone: str | None) -> PandasLikeSeries: - if time_zone is not None: - result = self._compliant_series._native_series.dt.tz_localize( - None - ).dt.tz_localize(time_zone) - else: - result = self._compliant_series._native_series.dt.tz_localize(None) - return self._compliant_series._from_native_series(result) - - def convert_time_zone(self: Self, time_zone: str) -> PandasLikeSeries: - if self._compliant_series.dtype.time_zone is None: # type: ignore[attr-defined] - result = self._compliant_series._native_series.dt.tz_localize( - "UTC" - ).dt.tz_convert(time_zone) + def replace_time_zone(self, time_zone: str | None) -> PandasLikeSeries: + de_zone = self.native.dt.tz_localize(None) + result = de_zone.dt.tz_localize(time_zone) if time_zone is not None else de_zone + return self.from_native(result) + + def convert_time_zone(self, time_zone: str) -> PandasLikeSeries: + if self.compliant.dtype.time_zone is None: # type: ignore[attr-defined] + result = self.native.dt.tz_localize("UTC").dt.tz_convert(time_zone) else: - result = self._compliant_series._native_series.dt.tz_convert(time_zone) - return self._compliant_series._from_native_series(result) + result = self.native.dt.tz_convert(time_zone) + return self.from_native(result) - def timestamp(self: Self, time_unit: TimeUnit) -> PandasLikeSeries: - s = self._compliant_series._native_series - dtype = self._compliant_series.dtype - is_pyarrow_dtype = "pyarrow" in str(self._compliant_series._native_series.dtype) + def timestamp(self, time_unit: TimeUnit) -> PandasLikeSeries: + s = self.native + dtype = self.compliant.dtype mask_na = s.isna() - dtypes = import_dtypes_module(self._compliant_series._version) + dtypes = import_dtypes_module(self.version) if dtype == dtypes.Date: # Date is only supported in pandas dtypes if pyarrow-backed s_cast = s.astype("Int32[pyarrow]") result = calculate_timestamp_date(s_cast, time_unit) elif isinstance(dtype, dtypes.Datetime): - original_time_unit = dtype.time_unit - if ( - self._compliant_series._implementation is Implementation.PANDAS - and self._compliant_series._backend_version < (2,) - ): # pragma: no cover - s_cast = s.view("Int64[pyarrow]") if is_pyarrow_dtype else s.view("int64") - else: - s_cast = ( - s.astype("Int64[pyarrow]") if is_pyarrow_dtype else s.astype("int64") - ) - result = calculate_timestamp_datetime(s_cast, original_time_unit, time_unit) + fn = ( + s.view + if (self.implementation.is_pandas() and self.backend_version < (2,)) + else s.astype + ) + s_cast = fn("Int64[pyarrow]") if self._is_pyarrow() else fn("int64") + result = calculate_timestamp_datetime(s_cast, dtype.time_unit, time_unit) else: msg = "Input should be either of Date or Datetime type" raise TypeError(msg) result[mask_na] = None - return self._compliant_series._from_native_series(result) + return self.from_native(result) diff --git a/narwhals/_pandas_like/series_list.py b/narwhals/_pandas_like/series_list.py index da3eea42a3..47be564874 100644 --- a/narwhals/_pandas_like/series_list.py +++ b/narwhals/_pandas_like/series_list.py @@ -2,51 +2,36 @@ from typing import TYPE_CHECKING +from narwhals._compliant.any_namespace import ListNamespace +from narwhals._pandas_like.utils import PandasLikeSeriesNamespace from narwhals._pandas_like.utils import get_dtype_backend from narwhals._pandas_like.utils import narwhals_to_native_dtype from narwhals._pandas_like.utils import set_index -from narwhals.utils import Implementation from narwhals.utils import import_dtypes_module if TYPE_CHECKING: - from typing_extensions import Self - from narwhals._pandas_like.series import PandasLikeSeries -class PandasLikeSeriesListNamespace: - def __init__(self: Self, series: PandasLikeSeries) -> None: - if not hasattr(series._native_series, "list"): - msg = "Series must be of PyArrow List type to support list namespace." - raise TypeError(msg) - self._compliant_series = series - - def len(self: Self) -> PandasLikeSeries: - native_series = self._compliant_series._native_series - native_result = native_series.list.len() - - if ( - self._compliant_series._implementation is Implementation.PANDAS - and self._compliant_series._backend_version < (3, 0) - ): # pragma: no cover - native_result = set_index( - native_result, - index=native_series.index, - implementation=self._compliant_series._implementation, - backend_version=self._compliant_series._backend_version, +class PandasLikeSeriesListNamespace( + PandasLikeSeriesNamespace, ListNamespace["PandasLikeSeries"] +): + def len(self) -> PandasLikeSeries: + result = self.native.list.len() + implementation = self.implementation + backend_version = self.backend_version + if implementation.is_pandas() and backend_version < (3, 0): # pragma: no cover + result = set_index( + result, + self.native.index, + implementation=implementation, + backend_version=backend_version, ) - - implementation = self._compliant_series._implementation - dtype_backend = get_dtype_backend( - dtype=native_result.dtype, implementation=implementation - ) dtype = narwhals_to_native_dtype( - dtype=import_dtypes_module(self._compliant_series._version).UInt32(), - dtype_backend=dtype_backend, - implementation=implementation, - backend_version=self._compliant_series._backend_version, - version=self._compliant_series._version, + import_dtypes_module(self.version).UInt32(), + get_dtype_backend(result.dtype, implementation), + implementation, + backend_version, + self.version, ) - return self._compliant_series._from_native_series( - native_result.astype(dtype) - ).alias(native_series.name) + return self.from_native(result.astype(dtype)).alias(self.native.name) diff --git a/narwhals/_pandas_like/series_str.py b/narwhals/_pandas_like/series_str.py index 68a8134ffe..ef093241fc 100644 --- a/narwhals/_pandas_like/series_str.py +++ b/narwhals/_pandas_like/series_str.py @@ -1,114 +1,78 @@ from __future__ import annotations from typing import TYPE_CHECKING +from typing import Any -from narwhals._pandas_like.utils import get_dtype_backend -from narwhals._pandas_like.utils import to_datetime -from narwhals.utils import Implementation +from narwhals._compliant.any_namespace import StringNamespace +from narwhals._pandas_like.utils import PandasLikeSeriesNamespace +from narwhals._pandas_like.utils import is_pyarrow_dtype_backend if TYPE_CHECKING: - from typing_extensions import Self - from narwhals._pandas_like.series import PandasLikeSeries -class PandasLikeSeriesStringNamespace: - def __init__(self: Self, series: PandasLikeSeries) -> None: - self._compliant_series = series - - def len_chars(self: Self) -> PandasLikeSeries: - return self._compliant_series._from_native_series( - self._compliant_series._native_series.str.len() - ) +class PandasLikeSeriesStringNamespace( + PandasLikeSeriesNamespace, StringNamespace["PandasLikeSeries"] +): + def len_chars(self) -> PandasLikeSeries: + return self.from_native(self.native.str.len()) def replace( - self: Self, pattern: str, value: str, *, literal: bool, n: int + self, pattern: str, value: str, *, literal: bool, n: int ) -> PandasLikeSeries: - return self._compliant_series._from_native_series( - self._compliant_series._native_series.str.replace( - pat=pattern, repl=value, n=n, regex=not literal - ), + return self.from_native( + self.native.str.replace(pat=pattern, repl=value, n=n, regex=not literal) ) - def replace_all( - self: Self, pattern: str, value: str, *, literal: bool - ) -> PandasLikeSeries: + def replace_all(self, pattern: str, value: str, *, literal: bool) -> PandasLikeSeries: return self.replace(pattern, value, literal=literal, n=-1) - def strip_chars(self: Self, characters: str | None) -> PandasLikeSeries: - return self._compliant_series._from_native_series( - self._compliant_series._native_series.str.strip(characters), - ) + def strip_chars(self, characters: str | None) -> PandasLikeSeries: + return self.from_native(self.native.str.strip(characters)) - def starts_with(self: Self, prefix: str) -> PandasLikeSeries: - return self._compliant_series._from_native_series( - self._compliant_series._native_series.str.startswith(prefix), - ) + def starts_with(self, prefix: str) -> PandasLikeSeries: + return self.from_native(self.native.str.startswith(prefix)) - def ends_with(self: Self, suffix: str) -> PandasLikeSeries: - return self._compliant_series._from_native_series( - self._compliant_series._native_series.str.endswith(suffix), - ) + def ends_with(self, suffix: str) -> PandasLikeSeries: + return self.from_native(self.native.str.endswith(suffix)) - def contains(self: Self, pattern: str, *, literal: bool) -> PandasLikeSeries: - return self._compliant_series._from_native_series( - self._compliant_series._native_series.str.contains( - pat=pattern, regex=not literal - ) - ) + def contains(self, pattern: str, *, literal: bool) -> PandasLikeSeries: + return self.from_native(self.native.str.contains(pat=pattern, regex=not literal)) - def slice(self: Self, offset: int, length: int | None) -> PandasLikeSeries: + def slice(self, offset: int, length: int | None) -> PandasLikeSeries: stop = offset + length if length else None - return self._compliant_series._from_native_series( - self._compliant_series._native_series.str.slice(start=offset, stop=stop), - ) - - def split(self: Self, by: str) -> PandasLikeSeries: - if ( - self._compliant_series._implementation is not Implementation.CUDF - ): # pragma: no cover - dtype_backend = get_dtype_backend( - self._compliant_series._native_series.dtype, - self._compliant_series._implementation, - ) - if dtype_backend != "pyarrow": - msg = ( - "This operation requires a pyarrow-backed series. " - "Please refer to https://narwhals-dev.github.io/narwhals/api-reference/narwhals/#narwhals.maybe_convert_dtypes " - "and ensure you are using dtype_backend='pyarrow'. " - "Additionally, make sure you have pandas version 1.5+ and pyarrow installed. " - ) - raise TypeError(msg) - - return self._compliant_series._from_native_series( - self._compliant_series._native_series.str.split(pat=by), - ) - - def to_datetime(self: Self, format: str | None) -> PandasLikeSeries: - if format is not None and any(x in format for x in ("%z", "Z")): - # We know that the inputs are timezone-aware, so we can directly pass - # `utc=True` for better performance. - return self._compliant_series._from_native_series( - to_datetime(self._compliant_series._implementation, utc=True)( - self._compliant_series._native_series, format=format - ) + return self.from_native(self.native.str.slice(start=offset, stop=stop)) + + def split(self, by: str) -> PandasLikeSeries: + implementation = self.implementation + if not implementation.is_cudf() and not is_pyarrow_dtype_backend( + self.native.dtype, implementation + ): + msg = ( + "This operation requires a pyarrow-backed series. " + "Please refer to https://narwhals-dev.github.io/narwhals/api-reference/narwhals/#narwhals.maybe_convert_dtypes " + "and ensure you are using dtype_backend='pyarrow'. " + "Additionally, make sure you have pandas version 1.5+ and pyarrow installed. " ) - result = self._compliant_series._from_native_series( - to_datetime(self._compliant_series._implementation, utc=False)( - self._compliant_series._native_series, format=format - ) - ) - result_time_zone = result.dtype.time_zone # type: ignore[attr-defined] - if result_time_zone is not None and result_time_zone != "UTC": - result = result.dt.convert_time_zone("UTC") + raise TypeError(msg) + return self.from_native(self.native.str.split(pat=by)) + + def to_datetime(self, format: str | None) -> PandasLikeSeries: + # If we know inputs are timezone-aware, we can pass `utc=True` for better performance. + if format and any(x in format for x in ("%z", "Z")): + return self.from_native(self._to_datetime(format, utc=True)) + result = self.from_native(self._to_datetime(format, utc=False)) + if (tz := getattr(result.dtype, "time_zone", None)) and tz != "UTC": + return result.dt.convert_time_zone("UTC") return result - def to_uppercase(self: Self) -> PandasLikeSeries: - return self._compliant_series._from_native_series( - self._compliant_series._native_series.str.upper(), + def _to_datetime(self, format: str | None, *, utc: bool) -> Any: + return self.implementation.to_native_namespace().to_datetime( + self.native, format=format, utc=utc ) - def to_lowercase(self: Self) -> PandasLikeSeries: - return self._compliant_series._from_native_series( - self._compliant_series._native_series.str.lower(), - ) + def to_uppercase(self) -> PandasLikeSeries: + return self.from_native(self.native.str.upper()) + + def to_lowercase(self) -> PandasLikeSeries: + return self.from_native(self.native.str.lower()) diff --git a/narwhals/_pandas_like/series_struct.py b/narwhals/_pandas_like/series_struct.py index a9d602b0f2..e3b2a2056b 100644 --- a/narwhals/_pandas_like/series_struct.py +++ b/narwhals/_pandas_like/series_struct.py @@ -2,20 +2,15 @@ from typing import TYPE_CHECKING -if TYPE_CHECKING: - from typing_extensions import Self +from narwhals._compliant.any_namespace import StructNamespace +from narwhals._pandas_like.utils import PandasLikeSeriesNamespace +if TYPE_CHECKING: from narwhals._pandas_like.series import PandasLikeSeries -class PandasLikeSeriesStructNamespace: - def __init__(self: Self, series: PandasLikeSeries) -> None: - if not hasattr(series._native_series, "struct"): - msg = "Series must be of PyArrow Struct type to support struct namespace." - raise TypeError(msg) - self._compliant_series = series - - def field(self: Self, name: str) -> PandasLikeSeries: - return self._compliant_series._from_native_series( - self._compliant_series._native_series.struct.field(name) - ).alias(name) +class PandasLikeSeriesStructNamespace( + PandasLikeSeriesNamespace, StructNamespace["PandasLikeSeries"] +): + def field(self, name: str) -> PandasLikeSeries: + return self.from_native(self.native.struct.field(name)).alias(name) diff --git a/narwhals/_pandas_like/utils.py b/narwhals/_pandas_like/utils.py index d4069611d1..ce95b6f878 100644 --- a/narwhals/_pandas_like/utils.py +++ b/narwhals/_pandas_like/utils.py @@ -13,6 +13,7 @@ import pandas as pd +from narwhals._compliant.series import EagerSeriesNamespace from narwhals.exceptions import ColumnNotFoundError from narwhals.exceptions import DuplicateError from narwhals.exceptions import ShapeError @@ -450,6 +451,11 @@ def get_dtype_backend(dtype: Any, implementation: Implementation) -> DTypeBacken return None +@functools.lru_cache(maxsize=16) +def is_pyarrow_dtype_backend(dtype: Any, implementation: Implementation) -> bool: + return get_dtype_backend(dtype, implementation) == "pyarrow" + + def narwhals_to_native_dtype( # noqa: PLR0915 dtype: DType | type[DType], dtype_backend: DTypeBackend, @@ -646,17 +652,6 @@ def align_series_full_broadcast( return reindexed -def to_datetime(implementation: Implementation, *, utc: bool) -> Any: - if implementation in PANDAS_LIKE_IMPLEMENTATION: - return functools.partial( - implementation.to_native_namespace().to_datetime, utc=utc - ) - - else: # pragma: no cover - msg = f"Expected pandas-like implementation ({PANDAS_LIKE_IMPLEMENTATION}), found {implementation}" - raise TypeError(msg) - - def int_dtype_mapper(dtype: Any) -> str: if "pyarrow" in str(dtype): return "Int64[pyarrow]" @@ -818,3 +813,17 @@ def check_column_names_are_unique(columns: pd.Index[str]) -> None: msg += f"\n- '{key}' {value} times" msg = f"Expected unique column names, got:{msg}" raise DuplicateError(msg) + + +class PandasLikeSeriesNamespace(EagerSeriesNamespace["PandasLikeSeries", Any]): + @property + def implementation(self) -> Implementation: + return self.compliant._implementation + + @property + def backend_version(self) -> tuple[int, ...]: + return self.compliant._backend_version + + @property + def version(self) -> Version: + return self.compliant._version diff --git a/narwhals/utils.py b/narwhals/utils.py index fe5ad29ccd..ff66c8aba9 100644 --- a/narwhals/utils.py +++ b/narwhals/utils.py @@ -57,9 +57,11 @@ from narwhals._arrow.namespace import ArrowNamespace from narwhals._compliant import CompliantExpr + from narwhals._compliant import CompliantExprT from narwhals._compliant import CompliantFrameT from narwhals._compliant import CompliantNamespace from narwhals._compliant import CompliantSeriesOrNativeExprT_co + from narwhals._compliant import CompliantSeriesT from narwhals._compliant import NativeFrameT_co from narwhals._compliant import NativeSeriesT_co from narwhals._dask.namespace import DaskNamespace @@ -146,15 +148,9 @@ def columns(self) -> Sequence[str]: ... NativeT_co = TypeVar("NativeT_co", covariant=True) CompliantT_co = TypeVar("CompliantT_co", covariant=True) -CompliantExprT_co = TypeVar( - "CompliantExprT_co", bound="CompliantExpr[Any, Any]", covariant=True -) -CompliantSeriesT_co = TypeVar( - "CompliantSeriesT_co", bound="CompliantSeries[Any]", covariant=True -) -class _StoresNative(Protocol[NativeT_co]): +class _StoresNative(Protocol[NativeT_co]): # noqa: PYI046 """Provides access to a native object. Native objects have types like: @@ -169,7 +165,7 @@ def native(self) -> NativeT_co: ... -class _StoresCompliant(Protocol[CompliantT_co]): +class _StoresCompliant(Protocol[CompliantT_co]): # noqa: PYI046 """Provides access to a compliant object. Compliant objects have types like: @@ -184,35 +180,6 @@ def compliant(self) -> CompliantT_co: ... -class _SeriesNamespace( # type: ignore[misc] # noqa: PYI046 - _StoresCompliant[CompliantSeriesT_co], - _StoresNative[NativeT_co], - Protocol[CompliantSeriesT_co, NativeT_co], -): - _compliant_series: CompliantSeriesT_co - - @property - def compliant(self) -> CompliantSeriesT_co: - return self._compliant_series - - @property - def native(self) -> NativeT_co: - return self._compliant_series.native - - def from_native(self, series: Any, /) -> CompliantSeriesT_co: - return self.compliant._from_native_series(series) - - -class _ExprNamespace( # type: ignore[misc] # noqa: PYI046 - _StoresCompliant[CompliantExprT_co], Protocol[CompliantExprT_co] -): - _compliant_expr: CompliantExprT_co - - @property - def compliant(self) -> CompliantExprT_co: - return self._compliant_expr - - class Version(Enum): V1 = auto() MAIN = auto() @@ -1555,15 +1522,14 @@ def _hasattr_static(obj: Any, attr: str) -> bool: def is_compliant_dataframe( - obj: CompliantDataFrame[CompliantSeriesT_co, CompliantExprT_co, NativeFrameT_co] - | Any, -) -> TypeIs[CompliantDataFrame[CompliantSeriesT_co, CompliantExprT_co, NativeFrameT_co]]: + obj: CompliantDataFrame[CompliantSeriesT, CompliantExprT, NativeFrameT_co] | Any, +) -> TypeIs[CompliantDataFrame[CompliantSeriesT, CompliantExprT, NativeFrameT_co]]: return _hasattr_static(obj, "__narwhals_dataframe__") def is_compliant_lazyframe( - obj: CompliantLazyFrame[CompliantExprT_co, NativeFrameT_co] | Any, -) -> TypeIs[CompliantLazyFrame[CompliantExprT_co, NativeFrameT_co]]: + obj: CompliantLazyFrame[CompliantExprT, NativeFrameT_co] | Any, +) -> TypeIs[CompliantLazyFrame[CompliantExprT, NativeFrameT_co]]: return _hasattr_static(obj, "__narwhals_lazyframe__") diff --git a/utils/generate_backend_completeness.py b/utils/generate_backend_completeness.py index b8f2bb58f2..37f1843707 100644 --- a/utils/generate_backend_completeness.py +++ b/utils/generate_backend_completeness.py @@ -108,7 +108,9 @@ def render_table_and_write_to_output( results: pl.DataFrame = ( pl.concat(results) .with_columns(supported=pl.lit(":white_check_mark:")) - .pivot(on="Backend", values="supported", index=["Method"]) + .pivot( + on="Backend", values="supported", index=["Method"], aggregate_function="first" + ) .filter(pl.col("narwhals").is_not_null()) .drop("narwhals") .fill_null(":x:")