Skip to content
Merged
Show file tree
Hide file tree
Changes from 3 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions doc/source/whatsnew/v2.1.0.rst
Original file line number Diff line number Diff line change
Expand Up @@ -149,6 +149,7 @@ Other enhancements
- Adding ``engine_kwargs`` parameter to :meth:`DataFrame.read_excel` (:issue:`52214`)
- Classes that are useful for type-hinting have been added to the public API in the new submodule ``pandas.api.typing`` (:issue:`48577`)
- Implemented :attr:`Series.dt.is_month_start`, :attr:`Series.dt.is_month_end`, :attr:`Series.dt.is_year_start`, :attr:`Series.dt.is_year_end`, :attr:`Series.dt.is_quarter_start`, :attr:`Series.dt.is_quarter_end`, :attr:`Series.dt.is_days_in_month`, :attr:`Series.dt.unit`, :meth:`Series.dt.is_normalize`, :meth:`Series.dt.day_name`, :meth:`Series.dt.month_name`, :meth:`Series.dt.tz_convert` for :class:`ArrowDtype` with ``pyarrow.timestamp`` (:issue:`52388`, :issue:`51718`)
- Implemented :func:`api.interchange.from_dataframe` for :class:`DatetimeTZDtype` (:issue:`54239`)
- Implemented ``__from_arrow__`` on :class:`DatetimeTZDtype`. (:issue:`52201`)
- Implemented ``__pandas_priority__`` to allow custom types to take precedence over :class:`DataFrame`, :class:`Series`, :class:`Index`, or :class:`ExtensionArray` for arithmetic operations, :ref:`see the developer guide <extending.pandas_priority>` (:issue:`48347`)
- Improve error message when having incompatible columns using :meth:`DataFrame.merge` (:issue:`51861`)
Expand Down
13 changes: 11 additions & 2 deletions pandas/core/interchange/column.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,10 @@
from pandas.errors import NoBufferPresent
from pandas.util._decorators import cache_readonly

from pandas.core.dtypes.dtypes import ArrowDtype
from pandas.core.dtypes.dtypes import (
ArrowDtype,
DatetimeTZDtype,
)

import pandas as pd
from pandas.api.types import is_string_dtype
Expand Down Expand Up @@ -138,6 +141,8 @@ def _dtype_from_pandasdtype(self, dtype) -> tuple[DtypeKind, int, str, str]:
raise ValueError(f"Data type {dtype} not supported by interchange protocol")
if isinstance(dtype, ArrowDtype):
byteorder = dtype.numpy_dtype.byteorder
elif isinstance(dtype, DatetimeTZDtype):
byteorder = dtype.base.byteorder # type: ignore[union-attr]
else:
byteorder = dtype.byteorder

Expand Down Expand Up @@ -269,7 +274,11 @@ def _get_data_buffer(
DtypeKind.BOOL,
DtypeKind.DATETIME,
):
buffer = PandasBuffer(self._col.to_numpy(), allow_copy=self._allow_copy)
if self.dtype[0] == DtypeKind.DATETIME and len(self.dtype[2]) > 4:
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

so anything longer than 'tsn:' will go here (like tss:US/Pacific), nice

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yeah correct. Added a comment to that effect

np_arr = self._col.dt.tz_convert(None).to_numpy()
else:
np_arr = self._col.to_numpy()
buffer = PandasBuffer(np_arr, allow_copy=self._allow_copy)
dtype = self.dtype
elif self.dtype[0] == DtypeKind.CATEGORICAL:
codes = self._col.values._codes
Expand Down
10 changes: 5 additions & 5 deletions pandas/core/interchange/from_dataframe.py
Original file line number Diff line number Diff line change
Expand Up @@ -325,20 +325,20 @@ def string_column_to_ndarray(col: Column) -> tuple[np.ndarray, Any]:
return np.asarray(str_list, dtype="object"), buffers


def parse_datetime_format_str(format_str, data):
def parse_datetime_format_str(format_str, data) -> pd.Series | np.ndarray:
"""Parse datetime `format_str` to interpret the `data`."""
# timestamp 'ts{unit}:tz'
timestamp_meta = re.match(r"ts([smun]):(.*)", format_str)
if timestamp_meta:
unit, tz = timestamp_meta.group(1), timestamp_meta.group(2)
if tz != "":
raise NotImplementedError("Timezones are not supported yet")
if unit != "s":
# the format string describes only a first letter of the unit, so
# add one extra letter to convert the unit to numpy-style:
# 'm' -> 'ms', 'u' -> 'us', 'n' -> 'ns'
unit += "s"
data = data.astype(f"datetime64[{unit}]")
if tz != "":
data = pd.Series(data).dt.tz_localize("UTC").dt.tz_convert(tz)
return data

# date 'td{Days/Ms}'
Expand All @@ -358,7 +358,7 @@ def parse_datetime_format_str(format_str, data):
raise NotImplementedError(f"DateTime kind is not supported: {format_str}")


def datetime_column_to_ndarray(col: Column) -> tuple[np.ndarray, Any]:
def datetime_column_to_ndarray(col: Column) -> tuple[np.ndarray | pd.Series, Any]:
"""
Convert a column holding DateTime data to a NumPy array.

Expand Down Expand Up @@ -389,7 +389,7 @@ def datetime_column_to_ndarray(col: Column) -> tuple[np.ndarray, Any]:
length=col.size(),
)

data = parse_datetime_format_str(format_str, data)
data = parse_datetime_format_str(format_str, data) # type: ignore[assignment]
data = set_nulls(data, col, buffers["validity"])
return data, buffers

Expand Down
9 changes: 6 additions & 3 deletions pandas/core/interchange/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,6 @@

from __future__ import annotations

import re
import typing

import numpy as np
Expand All @@ -14,6 +13,7 @@
from pandas.core.dtypes.dtypes import (
ArrowDtype,
CategoricalDtype,
DatetimeTZDtype,
)

if typing.TYPE_CHECKING:
Expand Down Expand Up @@ -134,10 +134,13 @@ def dtype_to_arrow_c_fmt(dtype: DtypeObj) -> str:

if lib.is_np_dtype(dtype, "M"):
# Selecting the first char of resolution string:
# dtype.str -> '<M8[ns]'
resolution = re.findall(r"\[(.*)\]", dtype.str)[0][:1]
# dtype.str -> '<M8[ns]' -> 'n'
resolution = np.datetime_data(dtype)[0][0]
return ArrowCTypes.TIMESTAMP.format(resolution=resolution, tz="")

elif isinstance(dtype, DatetimeTZDtype):
return ArrowCTypes.TIMESTAMP.format(resolution=dtype.unit[0], tz=dtype.tz)

raise NotImplementedError(
f"Conversion of {dtype} to Arrow C format string is not implemented."
)
11 changes: 11 additions & 0 deletions pandas/tests/interchange/test_impl.py
Original file line number Diff line number Diff line change
Expand Up @@ -284,3 +284,14 @@ def test_empty_pyarrow(data):
arrow_df = pa_from_dataframe(expected)
result = from_dataframe(arrow_df)
tm.assert_frame_equal(result, expected)


@pytest.mark.parametrize("tz", ["UTC", "US/Pacific"])
@pytest.mark.parametrize("unit", ["s", "ms", "us", "ns"])
def test_datetimetzdtype(tz, unit):
# GH 54239
tz_data = (
pd.date_range("2018-01-01", periods=5, freq="D").tz_localize(tz).as_unit(unit)
)
df = pd.DataFrame({"ts_tz": tz_data})
tm.assert_frame_equal(df, from_dataframe(df.__dataframe__()))