Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Implement DatetimeIndex.round(), DatetimeIndex.floor(), and DatetimeIndex.ceil() #2082

Merged
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
96 changes: 96 additions & 0 deletions databricks/koalas/indexes/datetimes.py
Original file line number Diff line number Diff line change
Expand Up @@ -431,3 +431,99 @@ def days_in_month(self) -> Index:
return Index(self.to_series().dt.days_in_month)

days_in_month.__doc__ = daysinmonth.__doc__

# Methods
def ceil(self, freq, *args, **kwargs) -> "DatetimeIndex":
"""
Perform ceil operation on the data to the specified freq.

Parameters
----------
freq : str or Offset
The frequency level to ceil the index to. Must be a fixed
frequency like 'S' (second) not 'ME' (month end).

Returns
-------
DatetimeIndex

Raises
------
ValueError if the `freq` cannot be converted.

Examples
--------
>>> rng = ks.date_range('1/1/2018 11:59:00', periods=3, freq='min')
>>> rng.ceil('H') # doctest: +NORMALIZE_WHITESPACE
DatetimeIndex(['2018-01-01 12:00:00', '2018-01-01 12:00:00',
'2018-01-01 13:00:00'],
dtype='datetime64[ns]', freq=None)
"""
disallow_nanoseconds(freq)

return DatetimeIndex(self.to_series().dt.ceil(freq, *args, **kwargs))

def floor(self, freq, *args, **kwargs) -> "DatetimeIndex":
"""
Perform floor operation on the data to the specified freq.

Parameters
----------
freq : str or Offset
The frequency level to floor the index to. Must be a fixed
frequency like 'S' (second) not 'ME' (month end).

Returns
-------
DatetimeIndex

Raises
------
ValueError if the `freq` cannot be converted.

Examples
--------
>>> rng = ks.date_range('1/1/2018 11:59:00', periods=3, freq='min')
>>> rng.floor("H") # doctest: +NORMALIZE_WHITESPACE
DatetimeIndex(['2018-01-01 11:00:00', '2018-01-01 12:00:00',
'2018-01-01 12:00:00'],
dtype='datetime64[ns]', freq=None)
"""
disallow_nanoseconds(freq)

return DatetimeIndex(self.to_series().dt.floor(freq, *args, **kwargs))

def round(self, freq, *args, **kwargs) -> "DatetimeIndex":
"""
Perform round operation on the data to the specified freq.

Parameters
----------
freq : str or Offset
The frequency level to round the index to. Must be a fixed
frequency like 'S' (second) not 'ME' (month end).

Returns
-------
DatetimeIndex

Raises
------
ValueError if the `freq` cannot be converted.

Examples
--------
>>> rng = ks.date_range('1/1/2018 11:59:00', periods=3, freq='min')
>>> rng.round("H") # doctest: +NORMALIZE_WHITESPACE
DatetimeIndex(['2018-01-01 12:00:00', '2018-01-01 12:00:00',
'2018-01-01 12:00:00'],
dtype='datetime64[ns]', freq=None)
"""
disallow_nanoseconds(freq)

return DatetimeIndex(self.to_series().dt.round(freq, *args, **kwargs))


def disallow_nanoseconds(freq):
if freq in ["N", "ns"]:
raise ValueError("nanoseconds is not supported")
3 changes: 0 additions & 3 deletions databricks/koalas/missing/indexes.py
Original file line number Diff line number Diff line change
Expand Up @@ -117,9 +117,6 @@ class MissingPandasLikeDatetimeIndex(MissingPandasLikeIndex):
snap = _unsupported_function("snap", cls="DatetimeIndex")
tz_convert = _unsupported_function("tz_convert", cls="DatetimeIndex")
tz_localize = _unsupported_function("tz_localize", cls="DatetimeIndex")
round = _unsupported_function("round", cls="DatetimeIndex")
floor = _unsupported_function("floor", cls="DatetimeIndex")
ceil = _unsupported_function("ceil", cls="DatetimeIndex")
to_period = _unsupported_function("to_period", cls="DatetimeIndex")
to_perioddelta = _unsupported_function("to_perioddelta", cls="DatetimeIndex")
to_pydatetime = _unsupported_function("to_pydatetime", cls="DatetimeIndex")
Expand Down
25 changes: 25 additions & 0 deletions databricks/koalas/tests/indexes/test_datetime.py
Original file line number Diff line number Diff line change
Expand Up @@ -57,6 +57,10 @@ def kidxs(self):
def idx_pairs(self):
return list(zip(self.kidxs, self.pidxs))

def _disallow_nanoseconds(self, f):
self.assertRaises(ValueError, lambda: f(freq="ns"))
self.assertRaises(ValueError, lambda: f(freq="N"))

def test_properties(self):
for kidx, pidx in self.idx_pairs:
self.assert_eq(kidx.year, pidx.year)
Expand Down Expand Up @@ -85,3 +89,24 @@ def test_properties(self):
if LooseVersion(pd.__version__) >= LooseVersion("1.2.0"):
self.assert_eq(kidx.day_of_year, pidx.day_of_year)
self.assert_eq(kidx.day_of_week, pidx.day_of_week)

def test_ceil(self):
for kidx, pidx in self.idx_pairs:
for freq in self.fixed_freqs:
self.assert_eq(kidx.ceil(freq), pidx.ceil(freq))

self._disallow_nanoseconds(self.kidxs[0].ceil)

def test_floor(self):
for kidx, pidx in self.idx_pairs:
for freq in self.fixed_freqs:
self.assert_eq(kidx.floor(freq), pidx.floor(freq))

self._disallow_nanoseconds(self.kidxs[0].floor)

def test_round(self):
for kidx, pidx in self.idx_pairs:
for freq in self.fixed_freqs:
self.assert_eq(kidx.round(freq), pidx.round(freq))

self._disallow_nanoseconds(self.kidxs[0].round)
9 changes: 9 additions & 0 deletions docs/source/reference/indexing.rst
Original file line number Diff line number Diff line change
Expand Up @@ -323,3 +323,12 @@ Time/date components
DatetimeIndex.is_leap_year
DatetimeIndex.daysinmonth
DatetimeIndex.days_in_month

Time-specific operations
~~~~~~~~~~~~~~~~~~~~~~~~
.. autosummary::
:toctree: api/

DatetimeIndex.round
DatetimeIndex.floor
DatetimeIndex.ceil