From 47e768ddfc36ffa6949cd0f979164cdd60227318 Mon Sep 17 00:00:00 2001 From: dangotbanned <125183946+dangotbanned@users.noreply.github.com> Date: Wed, 5 Mar 2025 18:26:23 +0000 Subject: [PATCH 01/14] chore(typing): Add typing for `SparkLikeExpr` properties Porting over (#2051), didn't realize this was delclared twice until (#2132) --- narwhals/_spark_like/expr.py | 15 ++++++++++++--- 1 file changed, 12 insertions(+), 3 deletions(-) diff --git a/narwhals/_spark_like/expr.py b/narwhals/_spark_like/expr.py index 7c48731443..965627ff82 100644 --- a/narwhals/_spark_like/expr.py +++ b/narwhals/_spark_like/expr.py @@ -24,6 +24,7 @@ if TYPE_CHECKING: from pyspark.sql import Column + from pyspark.sql import Window from typing_extensions import Self from narwhals._spark_like.dataframe import SparkLikeLazyFrame @@ -78,7 +79,11 @@ def func(df: SparkLikeLazyFrame) -> Sequence[Column]: ) @property - def _F(self: Self) -> Any: # noqa: N802 + def _F(self: Self): # type: ignore[no-untyped-def] # noqa: ANN202, N802 + if TYPE_CHECKING: + from pyspark.sql import functions + + return functions if self._implementation is Implementation.SQLFRAME: from sqlframe.base.session import _BaseSession @@ -91,7 +96,11 @@ def _F(self: Self) -> Any: # noqa: N802 return functions @property - def _native_dtypes(self: Self) -> Any: + def _native_dtypes(self: Self): # type: ignore[no-untyped-def] # noqa: ANN202 + if TYPE_CHECKING: + from pyspark.sql import types + + return types if self._implementation is Implementation.SQLFRAME: from sqlframe.base.session import _BaseSession @@ -104,7 +113,7 @@ def _native_dtypes(self: Self) -> Any: return types @property - def _Window(self: Self) -> Any: # noqa: N802 + def _Window(self: Self) -> type[Window]: # noqa: N802 if self._implementation is Implementation.SQLFRAME: from sqlframe.base.session import _BaseSession From 7bb913182bd414dda39f841de39105db78767090 Mon Sep 17 00:00:00 2001 From: dangotbanned <125183946+dangotbanned@users.noreply.github.com> Date: Wed, 5 Mar 2025 19:08:32 +0000 Subject: [PATCH 02/14] chore: fix typing and simplify `SparkLikeExprStringNamespace.to_datetime` Resolves (https://github.com/narwhals-dev/narwhals/actions/runs/13682912007/job/38259412675?pr=2152) --- narwhals/_spark_like/expr_str.py | 48 ++++++++++---------------------- 1 file changed, 15 insertions(+), 33 deletions(-) diff --git a/narwhals/_spark_like/expr_str.py b/narwhals/_spark_like/expr_str.py index c84eefecd1..89d670256a 100644 --- a/narwhals/_spark_like/expr_str.py +++ b/narwhals/_spark_like/expr_str.py @@ -1,7 +1,8 @@ from __future__ import annotations +from functools import partial from typing import TYPE_CHECKING -from typing import overload +from typing import Any if TYPE_CHECKING: from pyspark.sql import Column @@ -107,47 +108,28 @@ def to_lowercase(self: Self) -> SparkLikeExpr: ) def to_datetime(self: Self, format: str | None) -> SparkLikeExpr: # noqa: A002 - is_naive = ( - format is not None - and "%s" not in format - and "%z" not in format - and "Z" not in format - ) - function = ( - self._compliant_expr._F.to_timestamp_ntz - if is_naive - else self._compliant_expr._F.to_timestamp - ) - pyspark_format = strptime_to_pyspark_format(format) - format = ( - self._compliant_expr._F.lit(pyspark_format) if is_naive else pyspark_format - ) + F = self._compliant_expr._F # noqa: N806 + if not format: + function = F.to_timestamp + elif is_naive(format): + function = partial( + F.to_timestamp_ntz, format=F.lit(strptime_to_pyspark_format(format)) + ) + else: + function = partial(F.to_timestamp, format=strptime_to_pyspark_format(format)) return self._compliant_expr._from_call( - lambda _input: function( - self._compliant_expr._F.replace( - _input, - self._compliant_expr._F.lit("T"), - self._compliant_expr._F.lit(" "), - ), - format=format, - ), + lambda _input: function(F.replace(_input, F.lit("T"), F.lit(" "))), "to_datetime", ) -@overload -def strptime_to_pyspark_format(format: None) -> None: ... - - -@overload -def strptime_to_pyspark_format(format: str) -> str: ... +def is_naive(obj: Any) -> bool: + return obj is not None and {"%s", "%z", "Z"}.isdisjoint(obj) -def strptime_to_pyspark_format(format: str | None) -> str | None: # noqa: A002 +def strptime_to_pyspark_format(format: str) -> str: # noqa: A002 """Converts a Python strptime datetime format string to a PySpark datetime format string.""" # Mapping from Python strptime format to PySpark format - if format is None: - return None # see https://spark.apache.org/docs/latest/sql-ref-datetime-pattern.html # and https://docs.python.org/3/library/datetime.html#strftime-strptime-behavior From 89eeed57f8d21ebdc6b442cbad92fa40debf50a5 Mon Sep 17 00:00:00 2001 From: Edoardo Abati <29585319+EdAbati@users.noreply.github.com> Date: Thu, 6 Mar 2025 08:05:03 +0100 Subject: [PATCH 03/14] rename function --- narwhals/_spark_like/expr_str.py | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/narwhals/_spark_like/expr_str.py b/narwhals/_spark_like/expr_str.py index 89d670256a..e888d52536 100644 --- a/narwhals/_spark_like/expr_str.py +++ b/narwhals/_spark_like/expr_str.py @@ -2,7 +2,6 @@ from functools import partial from typing import TYPE_CHECKING -from typing import Any if TYPE_CHECKING: from pyspark.sql import Column @@ -111,7 +110,7 @@ def to_datetime(self: Self, format: str | None) -> SparkLikeExpr: # noqa: A002 F = self._compliant_expr._F # noqa: N806 if not format: function = F.to_timestamp - elif is_naive(format): + elif is_naive_format(format): function = partial( F.to_timestamp_ntz, format=F.lit(strptime_to_pyspark_format(format)) ) @@ -123,8 +122,8 @@ def to_datetime(self: Self, format: str | None) -> SparkLikeExpr: # noqa: A002 ) -def is_naive(obj: Any) -> bool: - return obj is not None and {"%s", "%z", "Z"}.isdisjoint(obj) +def is_naive_format(format_str: str) -> bool: + return {"%s", "%z", "Z"}.isdisjoint(format_str) def strptime_to_pyspark_format(format: str) -> str: # noqa: A002 From f730d52e9908d2610a92537ed741cba116482444 Mon Sep 17 00:00:00 2001 From: Edoardo Abati <29585319+EdAbati@users.noreply.github.com> Date: Thu, 6 Mar 2025 08:23:18 +0100 Subject: [PATCH 04/14] use single chars in set --- narwhals/_spark_like/expr_str.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/narwhals/_spark_like/expr_str.py b/narwhals/_spark_like/expr_str.py index e888d52536..954a4cf13a 100644 --- a/narwhals/_spark_like/expr_str.py +++ b/narwhals/_spark_like/expr_str.py @@ -123,7 +123,7 @@ def to_datetime(self: Self, format: str | None) -> SparkLikeExpr: # noqa: A002 def is_naive_format(format_str: str) -> bool: - return {"%s", "%z", "Z"}.isdisjoint(format_str) + return {"s", "z", "Z"}.isdisjoint(format_str) def strptime_to_pyspark_format(format: str) -> str: # noqa: A002 From e19dcdf5f747a350c69b2a06236bbdcd886d6b27 Mon Sep 17 00:00:00 2001 From: dangotbanned <125183946+dangotbanned@users.noreply.github.com> Date: Thu, 6 Mar 2025 13:23:39 +0000 Subject: [PATCH 05/14] fix: Remove timezone offset replacement --- narwhals/_spark_like/expr_str.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/narwhals/_spark_like/expr_str.py b/narwhals/_spark_like/expr_str.py index 954a4cf13a..05a37de022 100644 --- a/narwhals/_spark_like/expr_str.py +++ b/narwhals/_spark_like/expr_str.py @@ -146,7 +146,8 @@ def strptime_to_pyspark_format(format: str) -> str: # noqa: A002 "%a": "E", # Abbreviated weekday name "%A": "E", # Full weekday name "%j": "D", # Day of the year - "%z": "Z", # Timezone offset + # NOTE: This replacement seems to be happening upstream? + # "%z": "Z", # Timezone offset "%s": "X", # Unix timestamp } From a4b619c1ff9ef357248b8d8c785134bd0338044e Mon Sep 17 00:00:00 2001 From: dangotbanned <125183946+dangotbanned@users.noreply.github.com> Date: Thu, 6 Mar 2025 13:24:08 +0000 Subject: [PATCH 06/14] test: Adds `test_to_datetime_tz_aware` Resolves https://github.com/narwhals-dev/narwhals/pull/2152#discussion_r1983141249 --- tests/expr_and_series/str/to_datetime_test.py | 47 +++++++++++++++++++ 1 file changed, 47 insertions(+) diff --git a/tests/expr_and_series/str/to_datetime_test.py b/tests/expr_and_series/str/to_datetime_test.py index 24687af9ec..b5f14ea9fc 100644 --- a/tests/expr_and_series/str/to_datetime_test.py +++ b/tests/expr_and_series/str/to_datetime_test.py @@ -3,6 +3,7 @@ from datetime import datetime from datetime import timezone from typing import TYPE_CHECKING +from typing import Sequence import pyarrow as pa import pytest @@ -34,6 +35,52 @@ def test_to_datetime(constructor: Constructor) -> None: assert str(result) == expected +@pytest.mark.parametrize( + ("fmt", "data", "expected", "expected_polars_duckdb_pyspark"), + [ + ( + "%Y-%m-%d %H:%M:%S%z", + {"a": ["2020-01-01 12:34:56+02:00"]}, + "2020-01-01 12:34:56+02:00", + "2020-01-01 10:34:56+00:00", + ) + ], +) +def test_to_datetime_tz_aware( + request: pytest.FixtureRequest, + constructor: Constructor, + fmt: str, + data: dict[str, Sequence[str]], + expected: str, + expected_polars_duckdb_pyspark: str, +) -> None: + constructor_str = str(constructor) + if any( + name in constructor_str for name in ("polars", "duckdb", "pyspark", "sqlframe") + ): + expected = expected_polars_duckdb_pyspark + elif "pyarrow" in constructor_str and "pandas" not in constructor_str: + from pyarrow.lib import ArrowInvalid + + request.applymarker( + pytest.mark.xfail( + True, # noqa: FBT003 + raises=ArrowInvalid, + reason="Unclear, see https://github.com/narwhals-dev/narwhals/pull/2152#discussion_r1983225794", + ) + ) + + result = ( + nw.from_native(constructor(data)) + .lazy() + .select(b=nw.col("a").str.to_datetime(fmt)) + .collect() + .item(row=0, column="b") + ) + + assert str(result) == expected + + def test_to_datetime_series(constructor_eager: ConstructorEager) -> None: if "cudf" in str(constructor_eager): expected = "2020-01-01T12:34:56.000000000" From 236712545b6157d903584dc4ee03405e0817561d Mon Sep 17 00:00:00 2001 From: dangotbanned <125183946+dangotbanned@users.noreply.github.com> Date: Thu, 6 Mar 2025 13:27:59 +0000 Subject: [PATCH 07/14] test: possibly fix `pyarrow` in ci? Maybe this was just a TZDATA issue locally? https://github.com/narwhals-dev/narwhals/actions/runs/13699734154/job/38310256617?pr=2152 --- tests/expr_and_series/str/to_datetime_test.py | 1 + 1 file changed, 1 insertion(+) diff --git a/tests/expr_and_series/str/to_datetime_test.py b/tests/expr_and_series/str/to_datetime_test.py index b5f14ea9fc..72f57b3ba8 100644 --- a/tests/expr_and_series/str/to_datetime_test.py +++ b/tests/expr_and_series/str/to_datetime_test.py @@ -62,6 +62,7 @@ def test_to_datetime_tz_aware( elif "pyarrow" in constructor_str and "pandas" not in constructor_str: from pyarrow.lib import ArrowInvalid + expected = expected_polars_duckdb_pyspark request.applymarker( pytest.mark.xfail( True, # noqa: FBT003 From fb4865446aba9c4a26c6601f9f418266ac254ab7 Mon Sep 17 00:00:00 2001 From: dangotbanned <125183946+dangotbanned@users.noreply.github.com> Date: Thu, 6 Mar 2025 13:39:24 +0000 Subject: [PATCH 08/14] test: xfail polars `3.8`, fix false positive pyarrow https://github.com/narwhals-dev/narwhals/actions/runs/13699804987/job/38310487932?pr=2152 https://github.com/narwhals-dev/narwhals/actions/runs/13699804987/job/38310488783?pr=2152 --- tests/expr_and_series/str/to_datetime_test.py | 13 ++++++++++++- 1 file changed, 12 insertions(+), 1 deletion(-) diff --git a/tests/expr_and_series/str/to_datetime_test.py b/tests/expr_and_series/str/to_datetime_test.py index 72f57b3ba8..42ffe706f3 100644 --- a/tests/expr_and_series/str/to_datetime_test.py +++ b/tests/expr_and_series/str/to_datetime_test.py @@ -1,5 +1,6 @@ from __future__ import annotations +import sys from datetime import datetime from datetime import timezone from typing import TYPE_CHECKING @@ -59,7 +60,17 @@ def test_to_datetime_tz_aware( name in constructor_str for name in ("polars", "duckdb", "pyspark", "sqlframe") ): expected = expected_polars_duckdb_pyspark - elif "pyarrow" in constructor_str and "pandas" not in constructor_str: + request.applymarker( + pytest.mark.xfail( + "polars" in constructor_str and sys.version_info < (3, 9), + reason="Needs 'polars[timezone]'", + ) + ) + elif ( + "pyarrow" in constructor_str + and "pandas" not in constructor_str + and "modin" not in constructor_str + ): from pyarrow.lib import ArrowInvalid expected = expected_polars_duckdb_pyspark From 75e48cc42ad859e5c69f64feaa3c7241d3c99dcf Mon Sep 17 00:00:00 2001 From: dangotbanned <125183946+dangotbanned@users.noreply.github.com> Date: Thu, 6 Mar 2025 13:52:28 +0000 Subject: [PATCH 09/14] test: narrower xfail, tz-less expected? Not even sure what `pyarrow` is doing here https://github.com/narwhals-dev/narwhals/actions/runs/13700021595/job/38311197947?pr=2152 --- tests/expr_and_series/str/to_datetime_test.py | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/tests/expr_and_series/str/to_datetime_test.py b/tests/expr_and_series/str/to_datetime_test.py index 42ffe706f3..c1d3927910 100644 --- a/tests/expr_and_series/str/to_datetime_test.py +++ b/tests/expr_and_series/str/to_datetime_test.py @@ -12,6 +12,7 @@ import narwhals.stable.v1 as nw from narwhals._arrow.utils import parse_datetime_format from tests.utils import assert_equal_data +from tests.utils import is_pyarrow_windows_no_tzdata if TYPE_CHECKING: from tests.utils import Constructor @@ -37,13 +38,14 @@ def test_to_datetime(constructor: Constructor) -> None: @pytest.mark.parametrize( - ("fmt", "data", "expected", "expected_polars_duckdb_pyspark"), + ("fmt", "data", "expected", "expected_polars_duckdb_pyspark", "expected_pyarrow"), [ ( "%Y-%m-%d %H:%M:%S%z", {"a": ["2020-01-01 12:34:56+02:00"]}, "2020-01-01 12:34:56+02:00", "2020-01-01 10:34:56+00:00", + "2020-01-01 10:34:56", ) ], ) @@ -54,6 +56,7 @@ def test_to_datetime_tz_aware( data: dict[str, Sequence[str]], expected: str, expected_polars_duckdb_pyspark: str, + expected_pyarrow: str, ) -> None: constructor_str = str(constructor) if any( @@ -73,12 +76,12 @@ def test_to_datetime_tz_aware( ): from pyarrow.lib import ArrowInvalid - expected = expected_polars_duckdb_pyspark + expected = expected_pyarrow request.applymarker( pytest.mark.xfail( - True, # noqa: FBT003 + is_pyarrow_windows_no_tzdata(constructor), raises=ArrowInvalid, - reason="Unclear, see https://github.com/narwhals-dev/narwhals/pull/2152#discussion_r1983225794", + reason="Timezone database unavailable", ) ) From 194ef83ca93d9c91c9efd3626251643e42f458bd Mon Sep 17 00:00:00 2001 From: dangotbanned <125183946+dangotbanned@users.noreply.github.com> Date: Thu, 6 Mar 2025 13:57:48 +0000 Subject: [PATCH 10/14] test: account for `pyarrow` version changes https://github.com/narwhals-dev/narwhals/actions/runs/13700267075/job/38312036397?pr=215 --- tests/expr_and_series/str/to_datetime_test.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/expr_and_series/str/to_datetime_test.py b/tests/expr_and_series/str/to_datetime_test.py index c1d3927910..2a86b3334a 100644 --- a/tests/expr_and_series/str/to_datetime_test.py +++ b/tests/expr_and_series/str/to_datetime_test.py @@ -93,7 +93,7 @@ def test_to_datetime_tz_aware( .item(row=0, column="b") ) - assert str(result) == expected + assert str(result).startswith(expected) def test_to_datetime_series(constructor_eager: ConstructorEager) -> None: From ce3145db05be055d3387260351d12538b5b3dc49 Mon Sep 17 00:00:00 2001 From: dangotbanned <125183946+dangotbanned@users.noreply.github.com> Date: Thu, 6 Mar 2025 14:24:24 +0000 Subject: [PATCH 11/14] test: maybe fix `pyspark` https://github.com/narwhals-dev/narwhals/actions/runs/13700361438/job/38312364899?pr=2152 --- narwhals/_spark_like/expr_str.py | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/narwhals/_spark_like/expr_str.py b/narwhals/_spark_like/expr_str.py index 05a37de022..2e635c93ca 100644 --- a/narwhals/_spark_like/expr_str.py +++ b/narwhals/_spark_like/expr_str.py @@ -115,7 +115,9 @@ def to_datetime(self: Self, format: str | None) -> SparkLikeExpr: # noqa: A002 F.to_timestamp_ntz, format=F.lit(strptime_to_pyspark_format(format)) ) else: - function = partial(F.to_timestamp, format=strptime_to_pyspark_format(format)) + is_pyspark = self._compliant_expr._implementation.is_pyspark() + format = strptime_to_pyspark_format(format, is_pyspark=is_pyspark) + function = partial(F.to_timestamp, format=format) return self._compliant_expr._from_call( lambda _input: function(F.replace(_input, F.lit("T"), F.lit(" "))), "to_datetime", @@ -126,7 +128,7 @@ def is_naive_format(format_str: str) -> bool: return {"s", "z", "Z"}.isdisjoint(format_str) -def strptime_to_pyspark_format(format: str) -> str: # noqa: A002 +def strptime_to_pyspark_format(format: str, *, is_pyspark: bool = False) -> str: # noqa: A002 """Converts a Python strptime datetime format string to a PySpark datetime format string.""" # Mapping from Python strptime format to PySpark format @@ -146,10 +148,11 @@ def strptime_to_pyspark_format(format: str) -> str: # noqa: A002 "%a": "E", # Abbreviated weekday name "%A": "E", # Full weekday name "%j": "D", # Day of the year - # NOTE: This replacement seems to be happening upstream? - # "%z": "Z", # Timezone offset "%s": "X", # Unix timestamp } + if is_pyspark: + # NOTE: This replacement seems to be happening upstream for `sqlframe` + format_mapping["%z"] = "Z" # Timezone offset # Replace Python format specifiers with PySpark specifiers pyspark_format = format From 4470a99d18d244962559c210fbcf8f26ca34504d Mon Sep 17 00:00:00 2001 From: dangotbanned <125183946+dangotbanned@users.noreply.github.com> Date: Fri, 7 Mar 2025 15:42:24 +0000 Subject: [PATCH 12/14] revert: go back to typing fixes only Addresses https://github.com/narwhals-dev/narwhals/pull/2152#pullrequestreview-2667604929 --- narwhals/_spark_like/expr_str.py | 9 +-- tests/expr_and_series/str/to_datetime_test.py | 62 ------------------- 2 files changed, 3 insertions(+), 68 deletions(-) diff --git a/narwhals/_spark_like/expr_str.py b/narwhals/_spark_like/expr_str.py index 2e635c93ca..e52f7a0c1e 100644 --- a/narwhals/_spark_like/expr_str.py +++ b/narwhals/_spark_like/expr_str.py @@ -115,8 +115,7 @@ def to_datetime(self: Self, format: str | None) -> SparkLikeExpr: # noqa: A002 F.to_timestamp_ntz, format=F.lit(strptime_to_pyspark_format(format)) ) else: - is_pyspark = self._compliant_expr._implementation.is_pyspark() - format = strptime_to_pyspark_format(format, is_pyspark=is_pyspark) + format = strptime_to_pyspark_format(format) function = partial(F.to_timestamp, format=format) return self._compliant_expr._from_call( lambda _input: function(F.replace(_input, F.lit("T"), F.lit(" "))), @@ -128,7 +127,7 @@ def is_naive_format(format_str: str) -> bool: return {"s", "z", "Z"}.isdisjoint(format_str) -def strptime_to_pyspark_format(format: str, *, is_pyspark: bool = False) -> str: # noqa: A002 +def strptime_to_pyspark_format(format: str) -> str: # noqa: A002 """Converts a Python strptime datetime format string to a PySpark datetime format string.""" # Mapping from Python strptime format to PySpark format @@ -148,11 +147,9 @@ def strptime_to_pyspark_format(format: str, *, is_pyspark: bool = False) -> str: "%a": "E", # Abbreviated weekday name "%A": "E", # Full weekday name "%j": "D", # Day of the year + "%z": "Z", # Timezone offset "%s": "X", # Unix timestamp } - if is_pyspark: - # NOTE: This replacement seems to be happening upstream for `sqlframe` - format_mapping["%z"] = "Z" # Timezone offset # Replace Python format specifiers with PySpark specifiers pyspark_format = format diff --git a/tests/expr_and_series/str/to_datetime_test.py b/tests/expr_and_series/str/to_datetime_test.py index 2a86b3334a..24687af9ec 100644 --- a/tests/expr_and_series/str/to_datetime_test.py +++ b/tests/expr_and_series/str/to_datetime_test.py @@ -1,10 +1,8 @@ from __future__ import annotations -import sys from datetime import datetime from datetime import timezone from typing import TYPE_CHECKING -from typing import Sequence import pyarrow as pa import pytest @@ -12,7 +10,6 @@ import narwhals.stable.v1 as nw from narwhals._arrow.utils import parse_datetime_format from tests.utils import assert_equal_data -from tests.utils import is_pyarrow_windows_no_tzdata if TYPE_CHECKING: from tests.utils import Constructor @@ -37,65 +34,6 @@ def test_to_datetime(constructor: Constructor) -> None: assert str(result) == expected -@pytest.mark.parametrize( - ("fmt", "data", "expected", "expected_polars_duckdb_pyspark", "expected_pyarrow"), - [ - ( - "%Y-%m-%d %H:%M:%S%z", - {"a": ["2020-01-01 12:34:56+02:00"]}, - "2020-01-01 12:34:56+02:00", - "2020-01-01 10:34:56+00:00", - "2020-01-01 10:34:56", - ) - ], -) -def test_to_datetime_tz_aware( - request: pytest.FixtureRequest, - constructor: Constructor, - fmt: str, - data: dict[str, Sequence[str]], - expected: str, - expected_polars_duckdb_pyspark: str, - expected_pyarrow: str, -) -> None: - constructor_str = str(constructor) - if any( - name in constructor_str for name in ("polars", "duckdb", "pyspark", "sqlframe") - ): - expected = expected_polars_duckdb_pyspark - request.applymarker( - pytest.mark.xfail( - "polars" in constructor_str and sys.version_info < (3, 9), - reason="Needs 'polars[timezone]'", - ) - ) - elif ( - "pyarrow" in constructor_str - and "pandas" not in constructor_str - and "modin" not in constructor_str - ): - from pyarrow.lib import ArrowInvalid - - expected = expected_pyarrow - request.applymarker( - pytest.mark.xfail( - is_pyarrow_windows_no_tzdata(constructor), - raises=ArrowInvalid, - reason="Timezone database unavailable", - ) - ) - - result = ( - nw.from_native(constructor(data)) - .lazy() - .select(b=nw.col("a").str.to_datetime(fmt)) - .collect() - .item(row=0, column="b") - ) - - assert str(result).startswith(expected) - - def test_to_datetime_series(constructor_eager: ConstructorEager) -> None: if "cudf" in str(constructor_eager): expected = "2020-01-01T12:34:56.000000000" From 77a264ce2dd27fcd037f2ba56d279b27b30bff09 Mon Sep 17 00:00:00 2001 From: dangotbanned <125183946+dangotbanned@users.noreply.github.com> Date: Fri, 7 Mar 2025 15:43:39 +0000 Subject: [PATCH 13/14] chore: ignore `format` shadowing https://github.com/narwhals-dev/narwhals/pull/2152#pullrequestreview-2667604929 --- narwhals/_spark_like/expr_str.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/narwhals/_spark_like/expr_str.py b/narwhals/_spark_like/expr_str.py index e52f7a0c1e..f83d844f58 100644 --- a/narwhals/_spark_like/expr_str.py +++ b/narwhals/_spark_like/expr_str.py @@ -123,8 +123,8 @@ def to_datetime(self: Self, format: str | None) -> SparkLikeExpr: # noqa: A002 ) -def is_naive_format(format_str: str) -> bool: - return {"s", "z", "Z"}.isdisjoint(format_str) +def is_naive_format(format: str) -> bool: # noqa: A002 + return {"s", "z", "Z"}.isdisjoint(format) def strptime_to_pyspark_format(format: str) -> str: # noqa: A002 From 311c7ea4d556e7530db45c30e6d25a2d3dfdf5d5 Mon Sep 17 00:00:00 2001 From: dangotbanned <125183946+dangotbanned@users.noreply.github.com> Date: Fri, 7 Mar 2025 22:41:01 +0000 Subject: [PATCH 14/14] keep logic the same I hope https://github.com/narwhals-dev/narwhals/pull/2152#discussion_r1985798112 --- narwhals/_spark_like/expr_str.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/narwhals/_spark_like/expr_str.py b/narwhals/_spark_like/expr_str.py index f83d844f58..d8646d4dee 100644 --- a/narwhals/_spark_like/expr_str.py +++ b/narwhals/_spark_like/expr_str.py @@ -124,7 +124,7 @@ def to_datetime(self: Self, format: str | None) -> SparkLikeExpr: # noqa: A002 def is_naive_format(format: str) -> bool: # noqa: A002 - return {"s", "z", "Z"}.isdisjoint(format) + return not any(x in format for x in ("%s", "%z", "Z")) def strptime_to_pyspark_format(format: str) -> str: # noqa: A002