diff --git a/.github/workflows/extremes.yml b/.github/workflows/extremes.yml
index 6749db244a..ae69bfe39b 100644
--- a/.github/workflows/extremes.yml
+++ b/.github/workflows/extremes.yml
@@ -25,7 +25,7 @@ jobs:
           cache-suffix: ${{ matrix.python-version }}
           cache-dependency-glob: "pyproject.toml"
       - name: install-minimum-versions
-        run: uv pip install pipdeptree tox virtualenv setuptools pandas==0.25.3 polars==0.20.3 numpy==1.17.5 pyarrow==11.0.0 "pyarrow-stubs<17" scipy==1.5.0 scikit-learn==1.1.0 duckdb==1.0 tzdata --system
+        run: uv pip install pipdeptree tox virtualenv setuptools pandas==0.25.3 polars==0.20.3 numpy==1.17.5 pyarrow==11.0.0 "pyarrow-stubs<17" scipy==1.5.0 scikit-learn==1.1.0 duckdb==1.0 tzdata backports.zoneinfo --system
       - name: install-reqs
         run: |
           uv pip install -e ".[tests]" --system
@@ -62,7 +62,7 @@ jobs:
           cache-suffix: ${{ matrix.python-version }}
           cache-dependency-glob: "pyproject.toml"
       - name: install-pretty-old-versions
-        run: uv pip install pipdeptree tox virtualenv setuptools pandas==1.1.5 polars==0.20.3 numpy==1.17.5 pyarrow==11.0.0 "pyarrow-stubs<17" pyspark==3.5.0 scipy==1.5.0 scikit-learn==1.1.0 duckdb==1.0 tzdata --system
+        run: uv pip install pipdeptree tox virtualenv setuptools pandas==1.1.5 polars==0.20.3 numpy==1.17.5 pyarrow==11.0.0 "pyarrow-stubs<17" pyspark==3.5.0 scipy==1.5.0 scikit-learn==1.1.0 duckdb==1.0 tzdata backports.zoneinfo --system
       - name: install-reqs
         run: uv pip install -e ".[tests]" --system
       - name: show-deps
diff --git a/.github/workflows/pytest.yml b/.github/workflows/pytest.yml
index 6bb570e536..790e4565b8 100644
--- a/.github/workflows/pytest.yml
+++ b/.github/workflows/pytest.yml
@@ -28,7 +28,7 @@ jobs:
           cache-dependency-glob: "pyproject.toml"
       - name: install-reqs
         # Python3.8 is technically at end-of-life, so we don't test everything
-        run: uv pip install -e ".[tests, core]" --system
+        run: uv pip install -e ".[tests, core]" backports.zoneinfo --system
       - name: show-deps
         run: uv pip freeze
       - name: Run pytest
diff --git a/narwhals/_duckdb/expr_str.py b/narwhals/_duckdb/expr_str.py
index b7804662f4..6f29898494 100644
--- a/narwhals/_duckdb/expr_str.py
+++ b/narwhals/_duckdb/expr_str.py
@@ -108,7 +108,7 @@ def replace(self: Self, pattern: str, value: str, *, literal: bool, n: int) -> N
 
     def to_datetime(self: Self, format: str | None) -> DuckDBExpr:  # noqa: A002
         if format is None:
-            msg = "Cannot infer format with DuckDB backend"
+            msg = "Cannot infer format with DuckDB backend, please specify `format` explicitly."
             raise NotImplementedError(msg)
 
         return self._compliant_expr._from_call(
diff --git a/narwhals/_duckdb/utils.py b/narwhals/_duckdb/utils.py
index 5fa205279e..bffccac4f5 100644
--- a/narwhals/_duckdb/utils.py
+++ b/narwhals/_duckdb/utils.py
@@ -83,6 +83,10 @@ def native_to_narwhals_dtype(duckdb_dtype: str, version: Version) -> DType:
         return dtypes.Date()
     if duckdb_dtype == "TIMESTAMP":
         return dtypes.Datetime()
+    if duckdb_dtype == "TIMESTAMP WITH TIME ZONE":
+        # TODO(marco): is UTC correct, or should we be getting the connection timezone?
+        # https://github.com/narwhals-dev/narwhals/issues/2165
+        return dtypes.Datetime(time_zone="UTC")
     if duckdb_dtype == "BOOLEAN":
         return dtypes.Boolean()
     if duckdb_dtype == "INTERVAL":
diff --git a/narwhals/_pandas_like/series_str.py b/narwhals/_pandas_like/series_str.py
index 5fae5867e3..6e75908489 100644
--- a/narwhals/_pandas_like/series_str.py
+++ b/narwhals/_pandas_like/series_str.py
@@ -85,11 +85,23 @@ def split(self: Self, by: str) -> PandasLikeSeries:
         )
 
     def to_datetime(self: Self, format: str | None) -> PandasLikeSeries:  # noqa: A002
-        return self._compliant_series._from_native_series(
-            to_datetime(self._compliant_series._implementation)(
+        if format is not None and any(x in format for x in ("%z", "Z")):
+            # We know that the inputs are timezone-aware, so we can directly pass
+            # `utc=True` for better performance.
+            return self._compliant_series._from_native_series(
+                to_datetime(self._compliant_series._implementation, utc=True)(
+                    self._compliant_series._native_series, format=format
+                )
+            )
+        result = self._compliant_series._from_native_series(
+            to_datetime(self._compliant_series._implementation, utc=False)(
                 self._compliant_series._native_series, format=format
             )
         )
+        result_time_zone = result.dtype.time_zone  # type: ignore[attr-defined]
+        if result_time_zone is not None and result_time_zone != "UTC":
+            result = result.dt.convert_time_zone("UTC")
+        return result
 
     def to_uppercase(self: Self) -> PandasLikeSeries:
         return self._compliant_series._from_native_series(
diff --git a/narwhals/_pandas_like/utils.py b/narwhals/_pandas_like/utils.py
index 51fa5108c8..a442c64029 100644
--- a/narwhals/_pandas_like/utils.py
+++ b/narwhals/_pandas_like/utils.py
@@ -1,9 +1,9 @@
 from __future__ import annotations
 
+import functools
 import re
 import warnings
 from contextlib import suppress
-from functools import lru_cache
 from typing import TYPE_CHECKING
 from typing import Any
 from typing import Iterable
@@ -356,7 +356,7 @@ def rename(
     return obj.rename(*args, **kwargs, copy=False)  # type: ignore[attr-defined]
 
 
-@lru_cache(maxsize=16)
+@functools.lru_cache(maxsize=16)
 def non_object_native_to_narwhals_dtype(dtype: str, version: Version) -> DType:
     dtypes = import_dtypes_module(version)
     if dtype in {"int64", "Int64", "Int64[pyarrow]", "int64[pyarrow]"}:
@@ -679,9 +679,11 @@ def align_series_full_broadcast(
     return reindexed
 
 
-def to_datetime(implementation: Implementation) -> Any:
+def to_datetime(implementation: Implementation, *, utc: bool) -> Any:
     if implementation in PANDAS_LIKE_IMPLEMENTATION:
-        return implementation.to_native_namespace().to_datetime
+        return functools.partial(
+            implementation.to_native_namespace().to_datetime, utc=utc
+        )
 
     else:  # pragma: no cover
         msg = f"Expected pandas-like implementation ({PANDAS_LIKE_IMPLEMENTATION}), found {implementation}"
diff --git a/narwhals/_spark_like/utils.py b/narwhals/_spark_like/utils.py
index 6332f35d47..c02a100c92 100644
--- a/narwhals/_spark_like/utils.py
+++ b/narwhals/_spark_like/utils.py
@@ -55,6 +55,8 @@ def native_to_narwhals_dtype(
     if isinstance(dtype, native.TimestampNTZType):
         return dtypes.Datetime()
     if isinstance(dtype, native.TimestampType):
+        # TODO(marco): is UTC correct, or should we be getting the connection timezone?
+        # https://github.com/narwhals-dev/narwhals/issues/2165
         return dtypes.Datetime(time_zone="UTC")
     if isinstance(dtype, native.DecimalType):
         return dtypes.Decimal()
diff --git a/narwhals/expr_str.py b/narwhals/expr_str.py
index 9206009691..3b837779ba 100644
--- a/narwhals/expr_str.py
+++ b/narwhals/expr_str.py
@@ -370,6 +370,13 @@ def tail(self: Self, n: int = 5) -> ExprT:
     def to_datetime(self: Self, format: str | None = None) -> ExprT:  # noqa: A002
         """Convert to Datetime dtype.
 
+        Notes:
+            - pandas defaults to nanosecond time unit, Polars to microsecond.
+              Prior to pandas 2.0, nanoseconds were the only time unit supported
+              in pandas, with no ability to set any other one. The ability to
+              set the time unit in pandas, if the version permits, will arrive.
+            - timezone-aware strings are all converted to and parsed as UTC.
+
         Warning:
             As different backends auto-infer format in different ways, if `format=None`
             there is no guarantee that the result will be equal.
@@ -381,12 +388,6 @@ def to_datetime(self: Self, format: str | None = None) -> ExprT:  # noqa: A002
         Returns:
             A new expression.
 
-        Notes:
-            pandas defaults to nanosecond time unit, Polars to microsecond.
-            Prior to pandas 2.0, nanoseconds were the only time unit supported
-            in pandas, with no ability to set any other one. The ability to
-            set the time unit in pandas, if the version permits, will arrive.
-
         Examples:
             >>> import polars as pl
             >>> import narwhals as nw
diff --git a/narwhals/series_str.py b/narwhals/series_str.py
index 98e4280753..1169e0da55 100644
--- a/narwhals/series_str.py
+++ b/narwhals/series_str.py
@@ -377,10 +377,11 @@ def to_datetime(self: Self, format: str | None = None) -> SeriesT:  # noqa: A002
         """Parse Series with strings to a Series with Datetime dtype.
 
         Notes:
-            pandas defaults to nanosecond time unit, Polars to microsecond.
-            Prior to pandas 2.0, nanoseconds were the only time unit supported
-            in pandas, with no ability to set any other one. The ability to
-            set the time unit in pandas, if the version permits, will arrive.
+            - pandas defaults to nanosecond time unit, Polars to microsecond.
+              Prior to pandas 2.0, nanoseconds were the only time unit supported
+              in pandas, with no ability to set any other one. The ability to
+              set the time unit in pandas, if the version permits, will arrive.
+            - timezone-aware strings are all converted to and parsed as UTC.
 
         Warning:
             As different backends auto-infer format in different ways, if `format=None`
diff --git a/tests/expr_and_series/str/to_datetime_test.py b/tests/expr_and_series/str/to_datetime_test.py
index 24687af9ec..e5ac785095 100644
--- a/tests/expr_and_series/str/to_datetime_test.py
+++ b/tests/expr_and_series/str/to_datetime_test.py
@@ -1,5 +1,6 @@
 from __future__ import annotations
 
+from contextlib import nullcontext as does_not_raise
 from datetime import datetime
 from datetime import timezone
 from typing import TYPE_CHECKING
@@ -9,7 +10,10 @@
 
 import narwhals.stable.v1 as nw
 from narwhals._arrow.utils import parse_datetime_format
+from tests.utils import PANDAS_VERSION
+from tests.utils import PYARROW_VERSION
 from tests.utils import assert_equal_data
+from tests.utils import is_pyarrow_windows_no_tzdata
 
 if TYPE_CHECKING:
     from tests.utils import Constructor
@@ -28,10 +32,16 @@ def test_to_datetime(constructor: Constructor) -> None:
         nw.from_native(constructor(data))
         .lazy()
         .select(b=nw.col("a").str.to_datetime(format="%Y-%m-%dT%H:%M:%S"))
-        .collect()
-        .item(row=0, column="b")
     )
-    assert str(result) == expected
+    result_schema = result.collect_schema()
+    assert isinstance(result_schema["b"], nw.Datetime)
+    if "sqlframe" in str(constructor):
+        # https://github.com/eakmanrq/sqlframe/issues/326
+        assert result_schema["b"].time_zone == "UTC"  # pyright: ignore[reportAttributeAccessIssue]
+    else:
+        assert result_schema["b"].time_zone is None  # pyright: ignore[reportAttributeAccessIssue]
+    result_item = result.collect().item(row=0, column="b")
+    assert str(result_item) == expected
 
 
 def test_to_datetime_series(constructor_eager: ConstructorEager) -> None:
@@ -190,3 +200,39 @@ def test_pyarrow_infer_datetime_raise_inconsistent_date_fmt(
 ) -> None:
     with pytest.raises(ValueError, match="Unable to infer datetime format. "):
         parse_datetime_format(pa.chunked_array([data]))
+
+
+@pytest.mark.parametrize("format", [None, "%Y-%m-%dT%H:%M:%S%z"])
+def test_to_datetime_tz_aware(
+    constructor: Constructor,
+    request: pytest.FixtureRequest,
+    format: str | None,  # noqa: A002
+) -> None:
+    if "pyarrow_table" in str(constructor) and PYARROW_VERSION < (13,):
+        # bugged
+        pytest.skip()
+    if "pandas" in str(constructor) and PANDAS_VERSION < (1,):
+        # "Cannot pass a tz argument when parsing strings with timezone information."
+        pytest.skip()
+    if is_pyarrow_windows_no_tzdata(constructor):
+        pytest.skip()
+    if "sqlframe" in str(constructor):
+        # https://github.com/eakmanrq/sqlframe/issues/325
+        request.applymarker(pytest.mark.xfail)
+    context = (
+        pytest.raises(NotImplementedError)
+        if any(x in str(constructor) for x in ("duckdb", "sqlframe")) and format is None
+        else does_not_raise()
+    )
+    df = nw.from_native(constructor({"a": ["2020-01-01T01:02:03+0100"]}))
+    with context:
+        result = df.with_columns(b=nw.col("a").str.to_datetime(format))
+        assert isinstance(result.collect_schema()["b"], nw.Datetime)
+        result_schema = result.lazy().collect().schema
+        assert result_schema["a"] == nw.String
+        assert isinstance(result_schema["b"], nw.Datetime)
+        expected = {
+            "a": ["2020-01-01T01:02:03+0100"],
+            "b": [datetime(2020, 1, 1, 0, 2, 3, tzinfo=timezone.utc)],
+        }
+        assert_equal_data(result, expected)