From bc94d5fa06bd2c961a06ee9e76aa5893bb1196bd Mon Sep 17 00:00:00 2001 From: olp-cs <162949+olp-cs@users.noreply.github.com> Date: Sun, 8 Dec 2024 13:44:15 +0000 Subject: [PATCH] chore: rename `series` and `expr` to `_compliant` for Arrow, Polars, and Dask --- narwhals/_arrow/expr.py | 170 +++++++++++++++++++----------------- narwhals/_arrow/series.py | 144 +++++++++++++++--------------- narwhals/_dask/expr.py | 174 +++++++++++++++++++------------------ narwhals/_polars/expr.py | 24 ++--- narwhals/_polars/series.py | 18 ++-- 5 files changed, 274 insertions(+), 256 deletions(-) diff --git a/narwhals/_arrow/expr.py b/narwhals/_arrow/expr.py index 02930f3920..17d24d86ed 100644 --- a/narwhals/_arrow/expr.py +++ b/narwhals/_arrow/expr.py @@ -516,11 +516,11 @@ def name(self: Self) -> ArrowExprNameNamespace: class ArrowExprCatNamespace: def __init__(self: Self, expr: ArrowExpr) -> None: - self._expr = expr + self._compliant_expr = expr def get_categories(self: Self) -> ArrowExpr: return reuse_series_namespace_implementation( - self._expr, + self._compliant_expr, "cat", "get_categories", ) @@ -528,89 +528,103 @@ def get_categories(self: Self) -> ArrowExpr: class ArrowExprDateTimeNamespace: def __init__(self: Self, expr: ArrowExpr) -> None: - self._expr = expr + self._compliant_expr = expr def to_string(self: Self, format: str) -> ArrowExpr: # noqa: A002 return reuse_series_namespace_implementation( - self._expr, "dt", "to_string", format + self._compliant_expr, "dt", "to_string", format ) def replace_time_zone(self: Self, time_zone: str | None) -> ArrowExpr: return reuse_series_namespace_implementation( - self._expr, "dt", "replace_time_zone", time_zone + self._compliant_expr, "dt", "replace_time_zone", time_zone ) def convert_time_zone(self: Self, time_zone: str) -> ArrowExpr: return reuse_series_namespace_implementation( - self._expr, "dt", "convert_time_zone", time_zone + self._compliant_expr, "dt", "convert_time_zone", time_zone ) def timestamp(self: Self, time_unit: Literal["ns", "us", "ms"] = "us") -> ArrowExpr: return reuse_series_namespace_implementation( - self._expr, "dt", "timestamp", time_unit + self._compliant_expr, "dt", "timestamp", time_unit ) def date(self: Self) -> ArrowExpr: - return reuse_series_namespace_implementation(self._expr, "dt", "date") + return reuse_series_namespace_implementation(self._compliant_expr, "dt", "date") def year(self: Self) -> ArrowExpr: - return reuse_series_namespace_implementation(self._expr, "dt", "year") + return reuse_series_namespace_implementation(self._compliant_expr, "dt", "year") def month(self: Self) -> ArrowExpr: - return reuse_series_namespace_implementation(self._expr, "dt", "month") + return reuse_series_namespace_implementation(self._compliant_expr, "dt", "month") def day(self: Self) -> ArrowExpr: - return reuse_series_namespace_implementation(self._expr, "dt", "day") + return reuse_series_namespace_implementation(self._compliant_expr, "dt", "day") def hour(self: Self) -> ArrowExpr: - return reuse_series_namespace_implementation(self._expr, "dt", "hour") + return reuse_series_namespace_implementation(self._compliant_expr, "dt", "hour") def minute(self: Self) -> ArrowExpr: - return reuse_series_namespace_implementation(self._expr, "dt", "minute") + return reuse_series_namespace_implementation(self._compliant_expr, "dt", "minute") def second(self: Self) -> ArrowExpr: - return reuse_series_namespace_implementation(self._expr, "dt", "second") + return reuse_series_namespace_implementation(self._compliant_expr, "dt", "second") def millisecond(self: Self) -> ArrowExpr: - return reuse_series_namespace_implementation(self._expr, "dt", "millisecond") + return reuse_series_namespace_implementation( + self._compliant_expr, "dt", "millisecond" + ) def microsecond(self: Self) -> ArrowExpr: - return reuse_series_namespace_implementation(self._expr, "dt", "microsecond") + return reuse_series_namespace_implementation( + self._compliant_expr, "dt", "microsecond" + ) def nanosecond(self: Self) -> ArrowExpr: - return reuse_series_namespace_implementation(self._expr, "dt", "nanosecond") + return reuse_series_namespace_implementation( + self._compliant_expr, "dt", "nanosecond" + ) def ordinal_day(self: Self) -> ArrowExpr: - return reuse_series_namespace_implementation(self._expr, "dt", "ordinal_day") + return reuse_series_namespace_implementation( + self._compliant_expr, "dt", "ordinal_day" + ) def total_minutes(self: Self) -> ArrowExpr: - return reuse_series_namespace_implementation(self._expr, "dt", "total_minutes") + return reuse_series_namespace_implementation( + self._compliant_expr, "dt", "total_minutes" + ) def total_seconds(self: Self) -> ArrowExpr: - return reuse_series_namespace_implementation(self._expr, "dt", "total_seconds") + return reuse_series_namespace_implementation( + self._compliant_expr, "dt", "total_seconds" + ) def total_milliseconds(self: Self) -> ArrowExpr: return reuse_series_namespace_implementation( - self._expr, "dt", "total_milliseconds" + self._compliant_expr, "dt", "total_milliseconds" ) def total_microseconds(self: Self) -> ArrowExpr: return reuse_series_namespace_implementation( - self._expr, "dt", "total_microseconds" + self._compliant_expr, "dt", "total_microseconds" ) def total_nanoseconds(self: Self) -> ArrowExpr: return reuse_series_namespace_implementation( - self._expr, "dt", "total_nanoseconds" + self._compliant_expr, "dt", "total_nanoseconds" ) class ArrowExprStringNamespace: def __init__(self: Self, expr: ArrowExpr) -> None: - self._expr = expr + self._compliant_expr = expr def len_chars(self: Self) -> ArrowExpr: - return reuse_series_namespace_implementation(self._expr, "str", "len_chars") + return reuse_series_namespace_implementation( + self._compliant_expr, "str", "len_chars" + ) def replace( self: Self, @@ -621,7 +635,7 @@ def replace( n: int, ) -> ArrowExpr: return reuse_series_namespace_implementation( - self._expr, + self._compliant_expr, "str", "replace", pattern, @@ -638,7 +652,7 @@ def replace_all( literal: bool, ) -> ArrowExpr: return reuse_series_namespace_implementation( - self._expr, + self._compliant_expr, "str", "replace_all", pattern, @@ -648,7 +662,7 @@ def replace_all( def strip_chars(self: Self, characters: str | None) -> ArrowExpr: return reuse_series_namespace_implementation( - self._expr, + self._compliant_expr, "str", "strip_chars", characters, @@ -656,7 +670,7 @@ def strip_chars(self: Self, characters: str | None) -> ArrowExpr: def starts_with(self: Self, prefix: str) -> ArrowExpr: return reuse_series_namespace_implementation( - self._expr, + self._compliant_expr, "str", "starts_with", prefix, @@ -664,7 +678,7 @@ def starts_with(self: Self, prefix: str) -> ArrowExpr: def ends_with(self: Self, suffix: str) -> ArrowExpr: return reuse_series_namespace_implementation( - self._expr, + self._compliant_expr, "str", "ends_with", suffix, @@ -672,17 +686,17 @@ def ends_with(self: Self, suffix: str) -> ArrowExpr: def contains(self, pattern: str, *, literal: bool) -> ArrowExpr: return reuse_series_namespace_implementation( - self._expr, "str", "contains", pattern, literal=literal + self._compliant_expr, "str", "contains", pattern, literal=literal ) def slice(self: Self, offset: int, length: int | None) -> ArrowExpr: return reuse_series_namespace_implementation( - self._expr, "str", "slice", offset, length + self._compliant_expr, "str", "slice", offset, length ) def to_datetime(self: Self, format: str | None) -> ArrowExpr: # noqa: A002 return reuse_series_namespace_implementation( - self._expr, + self._compliant_expr, "str", "to_datetime", format, @@ -690,14 +704,14 @@ def to_datetime(self: Self, format: str | None) -> ArrowExpr: # noqa: A002 def to_uppercase(self: Self) -> ArrowExpr: return reuse_series_namespace_implementation( - self._expr, + self._compliant_expr, "str", "to_uppercase", ) def to_lowercase(self: Self) -> ArrowExpr: return reuse_series_namespace_implementation( - self._expr, + self._compliant_expr, "str", "to_lowercase", ) @@ -705,10 +719,10 @@ def to_lowercase(self: Self) -> ArrowExpr: class ArrowExprNameNamespace: def __init__(self: Self, expr: ArrowExpr) -> None: - self._expr = expr + self._compliant_expr = expr def keep(self: Self) -> ArrowExpr: - root_names = self._expr._root_names + root_names = self._compliant_expr._root_names if root_names is None: msg = ( @@ -718,21 +732,21 @@ def keep(self: Self) -> ArrowExpr: ) raise ValueError(msg) - return self._expr.__class__( + return self._compliant_expr.__class__( lambda df: [ series.alias(name) - for series, name in zip(self._expr._call(df), root_names) + for series, name in zip(self._compliant_expr._call(df), root_names) ], - depth=self._expr._depth, - function_name=self._expr._function_name, + depth=self._compliant_expr._depth, + function_name=self._compliant_expr._function_name, root_names=root_names, output_names=root_names, - backend_version=self._expr._backend_version, - version=self._expr._version, + backend_version=self._compliant_expr._backend_version, + version=self._compliant_expr._version, ) def map(self: Self, function: Callable[[str], str]) -> ArrowExpr: - root_names = self._expr._root_names + root_names = self._compliant_expr._root_names if root_names is None: msg = ( @@ -744,21 +758,21 @@ def map(self: Self, function: Callable[[str], str]) -> ArrowExpr: output_names = [function(str(name)) for name in root_names] - return self._expr.__class__( + return self._compliant_expr.__class__( lambda df: [ series.alias(name) - for series, name in zip(self._expr._call(df), output_names) + for series, name in zip(self._compliant_expr._call(df), output_names) ], - depth=self._expr._depth, - function_name=self._expr._function_name, + depth=self._compliant_expr._depth, + function_name=self._compliant_expr._function_name, root_names=root_names, output_names=output_names, - backend_version=self._expr._backend_version, - version=self._expr._version, + backend_version=self._compliant_expr._backend_version, + version=self._compliant_expr._version, ) def prefix(self: Self, prefix: str) -> ArrowExpr: - root_names = self._expr._root_names + root_names = self._compliant_expr._root_names if root_names is None: msg = ( "Anonymous expressions are not supported in `.name.prefix`.\n" @@ -768,21 +782,21 @@ def prefix(self: Self, prefix: str) -> ArrowExpr: raise ValueError(msg) output_names = [prefix + str(name) for name in root_names] - return self._expr.__class__( + return self._compliant_expr.__class__( lambda df: [ series.alias(name) - for series, name in zip(self._expr._call(df), output_names) + for series, name in zip(self._compliant_expr._call(df), output_names) ], - depth=self._expr._depth, - function_name=self._expr._function_name, + depth=self._compliant_expr._depth, + function_name=self._compliant_expr._function_name, root_names=root_names, output_names=output_names, - backend_version=self._expr._backend_version, - version=self._expr._version, + backend_version=self._compliant_expr._backend_version, + version=self._compliant_expr._version, ) def suffix(self: Self, suffix: str) -> ArrowExpr: - root_names = self._expr._root_names + root_names = self._compliant_expr._root_names if root_names is None: msg = ( "Anonymous expressions are not supported in `.name.suffix`.\n" @@ -793,21 +807,21 @@ def suffix(self: Self, suffix: str) -> ArrowExpr: output_names = [str(name) + suffix for name in root_names] - return self._expr.__class__( + return self._compliant_expr.__class__( lambda df: [ series.alias(name) - for series, name in zip(self._expr._call(df), output_names) + for series, name in zip(self._compliant_expr._call(df), output_names) ], - depth=self._expr._depth, - function_name=self._expr._function_name, + depth=self._compliant_expr._depth, + function_name=self._compliant_expr._function_name, root_names=root_names, output_names=output_names, - backend_version=self._expr._backend_version, - version=self._expr._version, + backend_version=self._compliant_expr._backend_version, + version=self._compliant_expr._version, ) def to_lowercase(self: Self) -> ArrowExpr: - root_names = self._expr._root_names + root_names = self._compliant_expr._root_names if root_names is None: msg = ( @@ -818,21 +832,21 @@ def to_lowercase(self: Self) -> ArrowExpr: raise ValueError(msg) output_names = [str(name).lower() for name in root_names] - return self._expr.__class__( + return self._compliant_expr.__class__( lambda df: [ series.alias(name) - for series, name in zip(self._expr._call(df), output_names) + for series, name in zip(self._compliant_expr._call(df), output_names) ], - depth=self._expr._depth, - function_name=self._expr._function_name, + depth=self._compliant_expr._depth, + function_name=self._compliant_expr._function_name, root_names=root_names, output_names=output_names, - backend_version=self._expr._backend_version, - version=self._expr._version, + backend_version=self._compliant_expr._backend_version, + version=self._compliant_expr._version, ) def to_uppercase(self: Self) -> ArrowExpr: - root_names = self._expr._root_names + root_names = self._compliant_expr._root_names if root_names is None: msg = ( @@ -843,15 +857,15 @@ def to_uppercase(self: Self) -> ArrowExpr: raise ValueError(msg) output_names = [str(name).upper() for name in root_names] - return self._expr.__class__( + return self._compliant_expr.__class__( lambda df: [ series.alias(name) - for series, name in zip(self._expr._call(df), output_names) + for series, name in zip(self._compliant_expr._call(df), output_names) ], - depth=self._expr._depth, - function_name=self._expr._function_name, + depth=self._compliant_expr._depth, + function_name=self._compliant_expr._function_name, root_names=root_names, output_names=output_names, - backend_version=self._expr._backend_version, - version=self._expr._version, + backend_version=self._compliant_expr._backend_version, + version=self._compliant_expr._version, ) diff --git a/narwhals/_arrow/series.py b/narwhals/_arrow/series.py index 51c1b9943b..e287be613d 100644 --- a/narwhals/_arrow/series.py +++ b/narwhals/_arrow/series.py @@ -1065,7 +1065,7 @@ def str(self: Self) -> ArrowSeriesStringNamespace: class ArrowSeriesDateTimeNamespace: def __init__(self: Self, series: ArrowSeries) -> None: - self._arrow_series = series + self._compliant_series = series def to_string(self: Self, format: str) -> ArrowSeries: # noqa: A002 import pyarrow.compute as pc @@ -1074,8 +1074,8 @@ def to_string(self: Self, format: str) -> ArrowSeries: # noqa: A002 # the fractional part of the second...:'( # https://arrow.apache.org/docs/python/generated/pyarrow.compute.strftime.html format = format.replace("%S.%f", "%S").replace("%S%.f", "%S") - return self._arrow_series._from_native_series( - pc.strftime(self._arrow_series._native_series, format) + return self._compliant_series._from_native_series( + pc.strftime(self._compliant_series._native_series, format) ) def replace_time_zone(self: Self, time_zone: str | None) -> ArrowSeries: @@ -1083,33 +1083,33 @@ def replace_time_zone(self: Self, time_zone: str | None) -> ArrowSeries: if time_zone is not None: result = pc.assume_timezone( - pc.local_timestamp(self._arrow_series._native_series), time_zone + pc.local_timestamp(self._compliant_series._native_series), time_zone ) else: - result = pc.local_timestamp(self._arrow_series._native_series) - return self._arrow_series._from_native_series(result) + result = pc.local_timestamp(self._compliant_series._native_series) + return self._compliant_series._from_native_series(result) def convert_time_zone(self: Self, time_zone: str) -> ArrowSeries: import pyarrow as pa - if self._arrow_series.dtype.time_zone is None: # type: ignore[attr-defined] + if self._compliant_series.dtype.time_zone is None: # type: ignore[attr-defined] result = self.replace_time_zone("UTC")._native_series.cast( - pa.timestamp(self._arrow_series._native_series.type.unit, time_zone) + pa.timestamp(self._compliant_series._native_series.type.unit, time_zone) ) else: - result = self._arrow_series._native_series.cast( - pa.timestamp(self._arrow_series._native_series.type.unit, time_zone) + result = self._compliant_series._native_series.cast( + pa.timestamp(self._compliant_series._native_series.type.unit, time_zone) ) - return self._arrow_series._from_native_series(result) + return self._compliant_series._from_native_series(result) def timestamp(self: Self, time_unit: Literal["ns", "us", "ms"] = "us") -> ArrowSeries: import pyarrow as pa import pyarrow.compute as pc - s = self._arrow_series._native_series - dtype = self._arrow_series.dtype - dtypes = import_dtypes_module(self._arrow_series._version) + s = self._compliant_series._native_series + dtype = self._compliant_series.dtype + dtypes = import_dtypes_module(self._compliant_series._version) if dtype == dtypes.Datetime: unit = dtype.time_unit # type: ignore[attr-defined] s_cast = s.cast(pa.int64()) @@ -1155,93 +1155,93 @@ def timestamp(self: Self, time_unit: Literal["ns", "us", "ms"] = "us") -> ArrowS else: msg = "Input should be either of Date or Datetime type" raise TypeError(msg) - return self._arrow_series._from_native_series(result) + return self._compliant_series._from_native_series(result) def date(self: Self) -> ArrowSeries: import pyarrow as pa - return self._arrow_series._from_native_series( - self._arrow_series._native_series.cast(pa.date32()) + return self._compliant_series._from_native_series( + self._compliant_series._native_series.cast(pa.date32()) ) def year(self: Self) -> ArrowSeries: import pyarrow.compute as pc - return self._arrow_series._from_native_series( - pc.year(self._arrow_series._native_series) + return self._compliant_series._from_native_series( + pc.year(self._compliant_series._native_series) ) def month(self: Self) -> ArrowSeries: import pyarrow.compute as pc - return self._arrow_series._from_native_series( - pc.month(self._arrow_series._native_series) + return self._compliant_series._from_native_series( + pc.month(self._compliant_series._native_series) ) def day(self: Self) -> ArrowSeries: import pyarrow.compute as pc - return self._arrow_series._from_native_series( - pc.day(self._arrow_series._native_series) + return self._compliant_series._from_native_series( + pc.day(self._compliant_series._native_series) ) def hour(self: Self) -> ArrowSeries: import pyarrow.compute as pc - return self._arrow_series._from_native_series( - pc.hour(self._arrow_series._native_series) + return self._compliant_series._from_native_series( + pc.hour(self._compliant_series._native_series) ) def minute(self: Self) -> ArrowSeries: import pyarrow.compute as pc - return self._arrow_series._from_native_series( - pc.minute(self._arrow_series._native_series) + return self._compliant_series._from_native_series( + pc.minute(self._compliant_series._native_series) ) def second(self: Self) -> ArrowSeries: import pyarrow.compute as pc - return self._arrow_series._from_native_series( - pc.second(self._arrow_series._native_series) + return self._compliant_series._from_native_series( + pc.second(self._compliant_series._native_series) ) def millisecond(self: Self) -> ArrowSeries: import pyarrow.compute as pc - return self._arrow_series._from_native_series( - pc.millisecond(self._arrow_series._native_series) + return self._compliant_series._from_native_series( + pc.millisecond(self._compliant_series._native_series) ) def microsecond(self: Self) -> ArrowSeries: import pyarrow.compute as pc - arr = self._arrow_series._native_series + arr = self._compliant_series._native_series result = pc.add(pc.multiply(pc.millisecond(arr), 1000), pc.microsecond(arr)) - return self._arrow_series._from_native_series(result) + return self._compliant_series._from_native_series(result) def nanosecond(self: Self) -> ArrowSeries: import pyarrow.compute as pc - arr = self._arrow_series._native_series + arr = self._compliant_series._native_series result = pc.add( pc.multiply(self.microsecond()._native_series, 1000), pc.nanosecond(arr) ) - return self._arrow_series._from_native_series(result) + return self._compliant_series._from_native_series(result) def ordinal_day(self: Self) -> ArrowSeries: import pyarrow.compute as pc - return self._arrow_series._from_native_series( - pc.day_of_year(self._arrow_series._native_series) + return self._compliant_series._from_native_series( + pc.day_of_year(self._compliant_series._native_series) ) def total_minutes(self: Self) -> ArrowSeries: import pyarrow as pa import pyarrow.compute as pc - arr = self._arrow_series._native_series + arr = self._compliant_series._native_series unit = arr.type.unit unit_to_minutes_factor = { @@ -1252,7 +1252,7 @@ def total_minutes(self: Self) -> ArrowSeries: } factor = pa.scalar(unit_to_minutes_factor[unit], type=pa.int64()) - return self._arrow_series._from_native_series( + return self._compliant_series._from_native_series( pc.cast(pc.divide(arr, factor), pa.int64()) ) @@ -1260,7 +1260,7 @@ def total_seconds(self: Self) -> ArrowSeries: import pyarrow as pa import pyarrow.compute as pc - arr = self._arrow_series._native_series + arr = self._compliant_series._native_series unit = arr.type.unit unit_to_seconds_factor = { @@ -1271,7 +1271,7 @@ def total_seconds(self: Self) -> ArrowSeries: } factor = pa.scalar(unit_to_seconds_factor[unit], type=pa.int64()) - return self._arrow_series._from_native_series( + return self._compliant_series._from_native_series( pc.cast(pc.divide(arr, factor), pa.int64()) ) @@ -1279,7 +1279,7 @@ def total_milliseconds(self: Self) -> ArrowSeries: import pyarrow as pa import pyarrow.compute as pc - arr = self._arrow_series._native_series + arr = self._compliant_series._native_series unit = arr.type.unit unit_to_milli_factor = { @@ -1292,11 +1292,11 @@ def total_milliseconds(self: Self) -> ArrowSeries: factor = pa.scalar(unit_to_milli_factor[unit], type=pa.int64()) if unit == "s": - return self._arrow_series._from_native_series( + return self._compliant_series._from_native_series( pc.cast(pc.multiply(arr, factor), pa.int64()) ) - return self._arrow_series._from_native_series( + return self._compliant_series._from_native_series( pc.cast(pc.divide(arr, factor), pa.int64()) ) @@ -1304,7 +1304,7 @@ def total_microseconds(self: Self) -> ArrowSeries: import pyarrow as pa import pyarrow.compute as pc - arr = self._arrow_series._native_series + arr = self._compliant_series._native_series unit = arr.type.unit unit_to_micro_factor = { @@ -1317,10 +1317,10 @@ def total_microseconds(self: Self) -> ArrowSeries: factor = pa.scalar(unit_to_micro_factor[unit], type=pa.int64()) if unit in {"s", "ms"}: - return self._arrow_series._from_native_series( + return self._compliant_series._from_native_series( pc.cast(pc.multiply(arr, factor), pa.int64()) ) - return self._arrow_series._from_native_series( + return self._compliant_series._from_native_series( pc.cast(pc.divide(arr, factor), pa.int64()) ) @@ -1328,7 +1328,7 @@ def total_nanoseconds(self: Self) -> ArrowSeries: import pyarrow as pa import pyarrow.compute as pc - arr = self._arrow_series._native_series + arr = self._compliant_series._native_series unit = arr.type.unit unit_to_nano_factor = { @@ -1340,36 +1340,36 @@ def total_nanoseconds(self: Self) -> ArrowSeries: factor = pa.scalar(unit_to_nano_factor[unit], type=pa.int64()) - return self._arrow_series._from_native_series( + return self._compliant_series._from_native_series( pc.cast(pc.multiply(arr, factor), pa.int64()) ) class ArrowSeriesCatNamespace: def __init__(self: Self, series: ArrowSeries) -> None: - self._arrow_series = series + self._compliant_series = series def get_categories(self: Self) -> ArrowSeries: import pyarrow as pa - ca = self._arrow_series._native_series + ca = self._compliant_series._native_series # TODO(Unassigned): this looks potentially expensive - is there no better way? # https://github.com/narwhals-dev/narwhals/issues/464 out = pa.chunked_array( [pa.concat_arrays([x.dictionary for x in ca.chunks]).unique()] ) - return self._arrow_series._from_native_series(out) + return self._compliant_series._from_native_series(out) class ArrowSeriesStringNamespace: def __init__(self: Self, series: ArrowSeries) -> None: - self._arrow_series = series + self._compliant_series = series def len_chars(self: Self) -> ArrowSeries: import pyarrow.compute as pc - return self._arrow_series._from_native_series( - pc.utf8_length(self._arrow_series._native_series) + return self._compliant_series._from_native_series( + pc.utf8_length(self._compliant_series._native_series) ) def replace( @@ -1378,9 +1378,9 @@ def replace( import pyarrow.compute as pc method = "replace_substring" if literal else "replace_substring_regex" - return self._arrow_series._from_native_series( + return self._compliant_series._from_native_series( getattr(pc, method)( - self._arrow_series._native_series, + self._compliant_series._native_series, pattern=pattern, replacement=value, max_replacements=n, @@ -1396,9 +1396,9 @@ def strip_chars(self: Self, characters: str | None) -> ArrowSeries: import pyarrow.compute as pc whitespace = " \t\n\r\v\f" - return self._arrow_series._from_native_series( + return self._compliant_series._from_native_series( pc.utf8_trim( - self._arrow_series._native_series, + self._compliant_series._native_series, characters or whitespace, ) ) @@ -1406,14 +1406,14 @@ def strip_chars(self: Self, characters: str | None) -> ArrowSeries: def starts_with(self: Self, prefix: str) -> ArrowSeries: import pyarrow.compute as pc - return self._arrow_series._from_native_series( + return self._compliant_series._from_native_series( pc.equal(self.slice(0, len(prefix))._native_series, prefix) ) def ends_with(self: Self, suffix: str) -> ArrowSeries: import pyarrow.compute as pc - return self._arrow_series._from_native_series( + return self._compliant_series._from_native_series( pc.equal(self.slice(-len(suffix), None)._native_series, suffix) ) @@ -1421,17 +1421,17 @@ def contains(self: Self, pattern: str, *, literal: bool) -> ArrowSeries: import pyarrow.compute as pc check_func = pc.match_substring if literal else pc.match_substring_regex - return self._arrow_series._from_native_series( - check_func(self._arrow_series._native_series, pattern) + return self._compliant_series._from_native_series( + check_func(self._compliant_series._native_series, pattern) ) def slice(self: Self, offset: int, length: int | None) -> ArrowSeries: import pyarrow.compute as pc stop = offset + length if length is not None else None - return self._arrow_series._from_native_series( + return self._compliant_series._from_native_series( pc.utf8_slice_codeunits( - self._arrow_series._native_series, start=offset, stop=stop + self._compliant_series._native_series, start=offset, stop=stop ), ) @@ -1439,22 +1439,22 @@ def to_datetime(self: Self, format: str | None) -> ArrowSeries: # noqa: A002 import pyarrow.compute as pc if format is None: - format = parse_datetime_format(self._arrow_series._native_series) + format = parse_datetime_format(self._compliant_series._native_series) - return self._arrow_series._from_native_series( - pc.strptime(self._arrow_series._native_series, format=format, unit="us") + return self._compliant_series._from_native_series( + pc.strptime(self._compliant_series._native_series, format=format, unit="us") ) def to_uppercase(self: Self) -> ArrowSeries: import pyarrow.compute as pc - return self._arrow_series._from_native_series( - pc.utf8_upper(self._arrow_series._native_series), + return self._compliant_series._from_native_series( + pc.utf8_upper(self._compliant_series._native_series), ) def to_lowercase(self: Self) -> ArrowSeries: import pyarrow.compute as pc - return self._arrow_series._from_native_series( - pc.utf8_lower(self._arrow_series._native_series), + return self._compliant_series._from_native_series( + pc.utf8_lower(self._compliant_series._native_series), ) diff --git a/narwhals/_dask/expr.py b/narwhals/_dask/expr.py index 76b9cd4314..8990f0a8ff 100644 --- a/narwhals/_dask/expr.py +++ b/narwhals/_dask/expr.py @@ -919,10 +919,10 @@ def func( class DaskExprStringNamespace: def __init__(self, expr: DaskExpr) -> None: - self._expr = expr + self._compliant_expr = expr def len_chars(self) -> DaskExpr: - return self._expr._from_call( + return self._compliant_expr._from_call( lambda _input: _input.str.len(), "len", returns_scalar=False ) @@ -934,7 +934,7 @@ def replace( literal: bool = False, n: int = 1, ) -> DaskExpr: - return self._expr._from_call( + return self._compliant_expr._from_call( lambda _input, _pattern, _value, _literal, _n: _input.str.replace( _pattern, _value, regex=not _literal, n=_n ), @@ -953,7 +953,7 @@ def replace_all( *, literal: bool = False, ) -> DaskExpr: - return self._expr._from_call( + return self._compliant_expr._from_call( lambda _input, _pattern, _value, _literal: _input.str.replace( _pattern, _value, n=-1, regex=not _literal ), @@ -965,7 +965,7 @@ def replace_all( ) def strip_chars(self, characters: str | None = None) -> DaskExpr: - return self._expr._from_call( + return self._compliant_expr._from_call( lambda _input, characters: _input.str.strip(characters), "strip", characters, @@ -973,7 +973,7 @@ def strip_chars(self, characters: str | None = None) -> DaskExpr: ) def starts_with(self, prefix: str) -> DaskExpr: - return self._expr._from_call( + return self._compliant_expr._from_call( lambda _input, prefix: _input.str.startswith(prefix), "starts_with", prefix, @@ -981,7 +981,7 @@ def starts_with(self, prefix: str) -> DaskExpr: ) def ends_with(self, suffix: str) -> DaskExpr: - return self._expr._from_call( + return self._compliant_expr._from_call( lambda _input, suffix: _input.str.endswith(suffix), "ends_with", suffix, @@ -989,7 +989,7 @@ def ends_with(self, suffix: str) -> DaskExpr: ) def contains(self, pattern: str, *, literal: bool = False) -> DaskExpr: - return self._expr._from_call( + return self._compliant_expr._from_call( lambda _input, pat, regex: _input.str.contains(pat=pat, regex=regex), "contains", pattern, @@ -999,7 +999,7 @@ def contains(self, pattern: str, *, literal: bool = False) -> DaskExpr: def slice(self, offset: int, length: int | None = None) -> DaskExpr: stop = offset + length if length else None - return self._expr._from_call( + return self._compliant_expr._from_call( lambda _input, start, stop: _input.str.slice(start=start, stop=stop), "slice", offset, @@ -1010,7 +1010,7 @@ def slice(self, offset: int, length: int | None = None) -> DaskExpr: def to_datetime(self: Self, format: str | None) -> DaskExpr: # noqa: A002 import dask.dataframe as dd - return self._expr._from_call( + return self._compliant_expr._from_call( lambda _input, fmt: dd.to_datetime(_input, format=fmt), "to_datetime", format, @@ -1018,14 +1018,14 @@ def to_datetime(self: Self, format: str | None) -> DaskExpr: # noqa: A002 ) def to_uppercase(self) -> DaskExpr: - return self._expr._from_call( + return self._compliant_expr._from_call( lambda _input: _input.str.upper(), "to_uppercase", returns_scalar=False, ) def to_lowercase(self) -> DaskExpr: - return self._expr._from_call( + return self._compliant_expr._from_call( lambda _input: _input.str.lower(), "to_lowercase", returns_scalar=False, @@ -1034,87 +1034,87 @@ def to_lowercase(self) -> DaskExpr: class DaskExprDateTimeNamespace: def __init__(self, expr: DaskExpr) -> None: - self._expr = expr + self._compliant_expr = expr def date(self) -> DaskExpr: - return self._expr._from_call( + return self._compliant_expr._from_call( lambda _input: _input.dt.date, "date", returns_scalar=False, ) def year(self) -> DaskExpr: - return self._expr._from_call( + return self._compliant_expr._from_call( lambda _input: _input.dt.year, "year", returns_scalar=False, ) def month(self) -> DaskExpr: - return self._expr._from_call( + return self._compliant_expr._from_call( lambda _input: _input.dt.month, "month", returns_scalar=False, ) def day(self) -> DaskExpr: - return self._expr._from_call( + return self._compliant_expr._from_call( lambda _input: _input.dt.day, "day", returns_scalar=False, ) def hour(self) -> DaskExpr: - return self._expr._from_call( + return self._compliant_expr._from_call( lambda _input: _input.dt.hour, "hour", returns_scalar=False, ) def minute(self) -> DaskExpr: - return self._expr._from_call( + return self._compliant_expr._from_call( lambda _input: _input.dt.minute, "minute", returns_scalar=False, ) def second(self) -> DaskExpr: - return self._expr._from_call( + return self._compliant_expr._from_call( lambda _input: _input.dt.second, "second", returns_scalar=False, ) def millisecond(self) -> DaskExpr: - return self._expr._from_call( + return self._compliant_expr._from_call( lambda _input: _input.dt.microsecond // 1000, "millisecond", returns_scalar=False, ) def microsecond(self) -> DaskExpr: - return self._expr._from_call( + return self._compliant_expr._from_call( lambda _input: _input.dt.microsecond, "microsecond", returns_scalar=False, ) def nanosecond(self) -> DaskExpr: - return self._expr._from_call( + return self._compliant_expr._from_call( lambda _input: _input.dt.microsecond * 1000 + _input.dt.nanosecond, "nanosecond", returns_scalar=False, ) def ordinal_day(self) -> DaskExpr: - return self._expr._from_call( + return self._compliant_expr._from_call( lambda _input: _input.dt.dayofyear, "ordinal_day", returns_scalar=False, ) def to_string(self, format: str) -> DaskExpr: # noqa: A002 - return self._expr._from_call( + return self._compliant_expr._from_call( lambda _input, _format: _input.dt.strftime(_format), "strftime", format.replace("%.f", ".%f"), @@ -1122,7 +1122,7 @@ def to_string(self, format: str) -> DaskExpr: # noqa: A002 ) def replace_time_zone(self, time_zone: str | None) -> DaskExpr: - return self._expr._from_call( + return self._compliant_expr._from_call( lambda _input, _time_zone: _input.dt.tz_localize(None).dt.tz_localize( _time_zone ) @@ -1135,13 +1135,15 @@ def replace_time_zone(self, time_zone: str | None) -> DaskExpr: def convert_time_zone(self, time_zone: str) -> DaskExpr: def func(s: dask_expr.Series, time_zone: str) -> dask_expr.Series: - dtype = native_to_narwhals_dtype(s, self._expr._version, Implementation.DASK) + dtype = native_to_narwhals_dtype( + s, self._compliant_expr._version, Implementation.DASK + ) if dtype.time_zone is None: # type: ignore[attr-defined] return s.dt.tz_localize("UTC").dt.tz_convert(time_zone) else: return s.dt.tz_convert(time_zone) - return self._expr._from_call( + return self._compliant_expr._from_call( func, "tz_convert", time_zone, @@ -1152,10 +1154,12 @@ def timestamp(self, time_unit: Literal["ns", "us", "ms"] = "us") -> DaskExpr: def func( s: dask_expr.Series, time_unit: Literal["ns", "us", "ms"] = "us" ) -> dask_expr.Series: - dtype = native_to_narwhals_dtype(s, self._expr._version, Implementation.DASK) + dtype = native_to_narwhals_dtype( + s, self._compliant_expr._version, Implementation.DASK + ) is_pyarrow_dtype = "pyarrow" in str(dtype) mask_na = s.isna() - dtypes = import_dtypes_module(self._expr._version) + dtypes = import_dtypes_module(self._compliant_expr._version) if dtype == dtypes.Date: # Date is only supported in pandas dtypes if pyarrow-backed s_cast = s.astype("Int32[pyarrow]") @@ -1173,7 +1177,7 @@ def func( raise TypeError(msg) return result.where(~mask_na) - return self._expr._from_call( + return self._compliant_expr._from_call( func, "datetime", time_unit, @@ -1181,35 +1185,35 @@ def func( ) def total_minutes(self) -> DaskExpr: - return self._expr._from_call( + return self._compliant_expr._from_call( lambda _input: _input.dt.total_seconds() // 60, "total_minutes", returns_scalar=False, ) def total_seconds(self) -> DaskExpr: - return self._expr._from_call( + return self._compliant_expr._from_call( lambda _input: _input.dt.total_seconds() // 1, "total_seconds", returns_scalar=False, ) def total_milliseconds(self) -> DaskExpr: - return self._expr._from_call( + return self._compliant_expr._from_call( lambda _input: _input.dt.total_seconds() * 1000 // 1, "total_milliseconds", returns_scalar=False, ) def total_microseconds(self) -> DaskExpr: - return self._expr._from_call( + return self._compliant_expr._from_call( lambda _input: _input.dt.total_seconds() * 1_000_000 // 1, "total_microseconds", returns_scalar=False, ) def total_nanoseconds(self) -> DaskExpr: - return self._expr._from_call( + return self._compliant_expr._from_call( lambda _input: _input.dt.total_seconds() * 1_000_000_000 // 1, "total_nanoseconds", returns_scalar=False, @@ -1218,10 +1222,10 @@ def total_nanoseconds(self) -> DaskExpr: class DaskExprNameNamespace: def __init__(self: Self, expr: DaskExpr) -> None: - self._expr = expr + self._compliant_expr = expr def keep(self: Self) -> DaskExpr: - root_names = self._expr._root_names + root_names = self._compliant_expr._root_names if root_names is None: msg = ( @@ -1231,22 +1235,22 @@ def keep(self: Self) -> DaskExpr: ) raise ValueError(msg) - return self._expr.__class__( + return self._compliant_expr.__class__( lambda df: [ series.rename(name) - for series, name in zip(self._expr._call(df), root_names) + for series, name in zip(self._compliant_expr._call(df), root_names) ], - depth=self._expr._depth, - function_name=self._expr._function_name, + depth=self._compliant_expr._depth, + function_name=self._compliant_expr._function_name, root_names=root_names, output_names=root_names, - returns_scalar=self._expr._returns_scalar, - backend_version=self._expr._backend_version, - version=self._expr._version, + returns_scalar=self._compliant_expr._returns_scalar, + backend_version=self._compliant_expr._backend_version, + version=self._compliant_expr._version, ) def map(self: Self, function: Callable[[str], str]) -> DaskExpr: - root_names = self._expr._root_names + root_names = self._compliant_expr._root_names if root_names is None: msg = ( @@ -1258,22 +1262,22 @@ def map(self: Self, function: Callable[[str], str]) -> DaskExpr: output_names = [function(str(name)) for name in root_names] - return self._expr.__class__( + return self._compliant_expr.__class__( lambda df: [ series.rename(name) - for series, name in zip(self._expr._call(df), output_names) + for series, name in zip(self._compliant_expr._call(df), output_names) ], - depth=self._expr._depth, - function_name=self._expr._function_name, + depth=self._compliant_expr._depth, + function_name=self._compliant_expr._function_name, root_names=root_names, output_names=output_names, - returns_scalar=self._expr._returns_scalar, - backend_version=self._expr._backend_version, - version=self._expr._version, + returns_scalar=self._compliant_expr._returns_scalar, + backend_version=self._compliant_expr._backend_version, + version=self._compliant_expr._version, ) def prefix(self: Self, prefix: str) -> DaskExpr: - root_names = self._expr._root_names + root_names = self._compliant_expr._root_names if root_names is None: msg = ( "Anonymous expressions are not supported in `.name.prefix`.\n" @@ -1283,22 +1287,22 @@ def prefix(self: Self, prefix: str) -> DaskExpr: raise ValueError(msg) output_names = [prefix + str(name) for name in root_names] - return self._expr.__class__( + return self._compliant_expr.__class__( lambda df: [ series.rename(name) - for series, name in zip(self._expr._call(df), output_names) + for series, name in zip(self._compliant_expr._call(df), output_names) ], - depth=self._expr._depth, - function_name=self._expr._function_name, + depth=self._compliant_expr._depth, + function_name=self._compliant_expr._function_name, root_names=root_names, output_names=output_names, - returns_scalar=self._expr._returns_scalar, - backend_version=self._expr._backend_version, - version=self._expr._version, + returns_scalar=self._compliant_expr._returns_scalar, + backend_version=self._compliant_expr._backend_version, + version=self._compliant_expr._version, ) def suffix(self: Self, suffix: str) -> DaskExpr: - root_names = self._expr._root_names + root_names = self._compliant_expr._root_names if root_names is None: msg = ( "Anonymous expressions are not supported in `.name.suffix`.\n" @@ -1309,22 +1313,22 @@ def suffix(self: Self, suffix: str) -> DaskExpr: output_names = [str(name) + suffix for name in root_names] - return self._expr.__class__( + return self._compliant_expr.__class__( lambda df: [ series.rename(name) - for series, name in zip(self._expr._call(df), output_names) + for series, name in zip(self._compliant_expr._call(df), output_names) ], - depth=self._expr._depth, - function_name=self._expr._function_name, + depth=self._compliant_expr._depth, + function_name=self._compliant_expr._function_name, root_names=root_names, output_names=output_names, - returns_scalar=self._expr._returns_scalar, - backend_version=self._expr._backend_version, - version=self._expr._version, + returns_scalar=self._compliant_expr._returns_scalar, + backend_version=self._compliant_expr._backend_version, + version=self._compliant_expr._version, ) def to_lowercase(self: Self) -> DaskExpr: - root_names = self._expr._root_names + root_names = self._compliant_expr._root_names if root_names is None: msg = ( @@ -1335,22 +1339,22 @@ def to_lowercase(self: Self) -> DaskExpr: raise ValueError(msg) output_names = [str(name).lower() for name in root_names] - return self._expr.__class__( + return self._compliant_expr.__class__( lambda df: [ series.rename(name) - for series, name in zip(self._expr._call(df), output_names) + for series, name in zip(self._compliant_expr._call(df), output_names) ], - depth=self._expr._depth, - function_name=self._expr._function_name, + depth=self._compliant_expr._depth, + function_name=self._compliant_expr._function_name, root_names=root_names, output_names=output_names, - returns_scalar=self._expr._returns_scalar, - backend_version=self._expr._backend_version, - version=self._expr._version, + returns_scalar=self._compliant_expr._returns_scalar, + backend_version=self._compliant_expr._backend_version, + version=self._compliant_expr._version, ) def to_uppercase(self: Self) -> DaskExpr: - root_names = self._expr._root_names + root_names = self._compliant_expr._root_names if root_names is None: msg = ( @@ -1361,16 +1365,16 @@ def to_uppercase(self: Self) -> DaskExpr: raise ValueError(msg) output_names = [str(name).upper() for name in root_names] - return self._expr.__class__( + return self._compliant_expr.__class__( lambda df: [ series.rename(name) - for series, name in zip(self._expr._call(df), output_names) + for series, name in zip(self._compliant_expr._call(df), output_names) ], - depth=self._expr._depth, - function_name=self._expr._function_name, + depth=self._compliant_expr._depth, + function_name=self._compliant_expr._function_name, root_names=root_names, output_names=output_names, - returns_scalar=self._expr._returns_scalar, - backend_version=self._expr._backend_version, - version=self._expr._version, + returns_scalar=self._compliant_expr._returns_scalar, + backend_version=self._compliant_expr._backend_version, + version=self._compliant_expr._version, ) diff --git a/narwhals/_polars/expr.py b/narwhals/_polars/expr.py index 559ca46b3a..1b2b9d86e0 100644 --- a/narwhals/_polars/expr.py +++ b/narwhals/_polars/expr.py @@ -184,13 +184,13 @@ def name(self: Self) -> PolarsExprNameNamespace: class PolarsExprDateTimeNamespace: def __init__(self: Self, expr: PolarsExpr) -> None: - self._expr = expr + self._compliant_expr = expr def __getattr__(self: Self, attr: str) -> Callable[[Any], PolarsExpr]: def func(*args: Any, **kwargs: Any) -> PolarsExpr: args, kwargs = extract_args_kwargs(args, kwargs) # type: ignore[assignment] - return self._expr._from_native_expr( - getattr(self._expr._native_expr.dt, attr)(*args, **kwargs) + return self._compliant_expr._from_native_expr( + getattr(self._compliant_expr._native_expr.dt, attr)(*args, **kwargs) ) return func @@ -198,13 +198,13 @@ def func(*args: Any, **kwargs: Any) -> PolarsExpr: class PolarsExprStringNamespace: def __init__(self: Self, expr: PolarsExpr) -> None: - self._expr = expr + self._compliant_expr = expr def __getattr__(self: Self, attr: str) -> Callable[[Any], PolarsExpr]: def func(*args: Any, **kwargs: Any) -> PolarsExpr: args, kwargs = extract_args_kwargs(args, kwargs) # type: ignore[assignment] - return self._expr._from_native_expr( - getattr(self._expr._native_expr.str, attr)(*args, **kwargs) + return self._compliant_expr._from_native_expr( + getattr(self._compliant_expr._native_expr.str, attr)(*args, **kwargs) ) return func @@ -212,13 +212,13 @@ def func(*args: Any, **kwargs: Any) -> PolarsExpr: class PolarsExprCatNamespace: def __init__(self: Self, expr: PolarsExpr) -> None: - self._expr = expr + self._compliant_expr = expr def __getattr__(self: Self, attr: str) -> Callable[[Any], PolarsExpr]: def func(*args: Any, **kwargs: Any) -> PolarsExpr: args, kwargs = extract_args_kwargs(args, kwargs) # type: ignore[assignment] - return self._expr._from_native_expr( - getattr(self._expr._native_expr.cat, attr)(*args, **kwargs) + return self._compliant_expr._from_native_expr( + getattr(self._compliant_expr._native_expr.cat, attr)(*args, **kwargs) ) return func @@ -226,13 +226,13 @@ def func(*args: Any, **kwargs: Any) -> PolarsExpr: class PolarsExprNameNamespace: def __init__(self: Self, expr: PolarsExpr) -> None: - self._expr = expr + self._compliant_expr = expr def __getattr__(self: Self, attr: str) -> Callable[[Any], PolarsExpr]: def func(*args: Any, **kwargs: Any) -> PolarsExpr: args, kwargs = extract_args_kwargs(args, kwargs) # type: ignore[assignment] - return self._expr._from_native_expr( - getattr(self._expr._native_expr.name, attr)(*args, **kwargs) + return self._compliant_expr._from_native_expr( + getattr(self._compliant_expr._native_expr.name, attr)(*args, **kwargs) ) return func diff --git a/narwhals/_polars/series.py b/narwhals/_polars/series.py index c960e8441a..c4443d3560 100644 --- a/narwhals/_polars/series.py +++ b/narwhals/_polars/series.py @@ -369,13 +369,13 @@ def cat(self: Self) -> PolarsSeriesCatNamespace: class PolarsSeriesDateTimeNamespace: def __init__(self: Self, series: PolarsSeries) -> None: - self._series = series + self._compliant_series = series def __getattr__(self: Self, attr: str) -> Any: def func(*args: Any, **kwargs: Any) -> Any: args, kwargs = extract_args_kwargs(args, kwargs) # type: ignore[assignment] - return self._series._from_native_series( - getattr(self._series._native_series.dt, attr)(*args, **kwargs) + return self._compliant_series._from_native_series( + getattr(self._compliant_series._native_series.dt, attr)(*args, **kwargs) ) return func @@ -383,13 +383,13 @@ def func(*args: Any, **kwargs: Any) -> Any: class PolarsSeriesStringNamespace: def __init__(self: Self, series: PolarsSeries) -> None: - self._series = series + self._compliant_series = series def __getattr__(self: Self, attr: str) -> Any: def func(*args: Any, **kwargs: Any) -> Any: args, kwargs = extract_args_kwargs(args, kwargs) # type: ignore[assignment] - return self._series._from_native_series( - getattr(self._series._native_series.str, attr)(*args, **kwargs) + return self._compliant_series._from_native_series( + getattr(self._compliant_series._native_series.str, attr)(*args, **kwargs) ) return func @@ -397,13 +397,13 @@ def func(*args: Any, **kwargs: Any) -> Any: class PolarsSeriesCatNamespace: def __init__(self: Self, series: PolarsSeries) -> None: - self._series = series + self._compliant_series = series def __getattr__(self: Self, attr: str) -> Any: def func(*args: Any, **kwargs: Any) -> Any: args, kwargs = extract_args_kwargs(args, kwargs) # type: ignore[assignment] - return self._series._from_native_series( - getattr(self._series._native_series.cat, attr)(*args, **kwargs) + return self._compliant_series._from_native_series( + getattr(self._compliant_series._native_series.cat, attr)(*args, **kwargs) ) return func