Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Implement DatetimeIndex.round(), DatetimeIndex.floor(), and DatetimeIndex.ceil() #2082

Merged
Merged
Changes from 4 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
96 changes: 96 additions & 0 deletions databricks/koalas/indexes/datetimes.py
Original file line number Diff line number Diff line change
@@ -431,3 +431,99 @@ def days_in_month(self) -> Index:
return Index(self.to_series().dt.days_in_month)

days_in_month.__doc__ = daysinmonth.__doc__

# Methods
def ceil(self, freq, *args, **kwargs) -> "DatetimeIndex":
"""
Perform floor operation on the data to the specified freq.
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Nit: floor -> ceil ?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Good catch! Thanks!


Parameters
----------
freq : str or Offset
The frequency level to ceil the index to. Must be a fixed
frequency like 'S' (second) not 'ME' (month end).

Returns
-------
DatetimeIndex

Raises
------
ValueError if the `freq` cannot be converted.

Examples
--------
>>> rng = ks.date_range('1/1/2018 11:59:00', periods=3, freq='min')
>>> rng.ceil('H') # doctest: +NORMALIZE_WHITESPACE
DatetimeIndex(['2018-01-01 12:00:00', '2018-01-01 12:00:00',
'2018-01-01 13:00:00'],
dtype='datetime64[ns]', freq=None)
"""
disallow_nanoseconds(freq)

return DatetimeIndex(self.to_series().dt.ceil(freq, *args, **kwargs))

def floor(self, freq, *args, **kwargs) -> "DatetimeIndex":
"""
Perform floor operation on the data to the specified freq.

Parameters
----------
freq : str or Offset
The frequency level to floor the index to. Must be a fixed
frequency like 'S' (second) not 'ME' (month end).

Returns
-------
DatetimeIndex

Raises
------
ValueError if the `freq` cannot be converted.

Examples
--------
>>> rng = ks.date_range('1/1/2018 11:59:00', periods=3, freq='min')
>>> rng.floor("H") # doctest: +NORMALIZE_WHITESPACE
DatetimeIndex(['2018-01-01 11:00:00', '2018-01-01 12:00:00',
'2018-01-01 12:00:00'],
dtype='datetime64[ns]', freq=None)
"""
disallow_nanoseconds(freq)

return DatetimeIndex(self.to_series().dt.floor(freq, *args, **kwargs))

def round(self, freq, *args, **kwargs) -> "DatetimeIndex":
"""
Perform round operation on the data to the specified freq.

Parameters
----------
freq : str or Offset
The frequency level to round the index to. Must be a fixed
frequency like 'S' (second) not 'ME' (month end).

Returns
-------
DatetimeIndex

Raises
------
ValueError if the `freq` cannot be converted.

Examples
--------
>>> rng = ks.date_range('1/1/2018 11:59:00', periods=3, freq='min')
>>> rng.round("H") # doctest: +NORMALIZE_WHITESPACE
DatetimeIndex(['2018-01-01 12:00:00', '2018-01-01 12:00:00',
'2018-01-01 12:00:00'],
dtype='datetime64[ns]', freq=None)
"""
disallow_nanoseconds(freq)

return DatetimeIndex(self.to_series().dt.round(freq, *args, **kwargs))


def disallow_nanoseconds(freq):
if freq in ["N", "ns"]:
raise ValueError("nanoseconds is not supported")
3 changes: 0 additions & 3 deletions databricks/koalas/missing/indexes.py
Original file line number Diff line number Diff line change
@@ -117,9 +117,6 @@ class MissingPandasLikeDatetimeIndex(MissingPandasLikeIndex):
snap = _unsupported_function("snap", cls="DatetimeIndex")
tz_convert = _unsupported_function("tz_convert", cls="DatetimeIndex")
tz_localize = _unsupported_function("tz_localize", cls="DatetimeIndex")
round = _unsupported_function("round", cls="DatetimeIndex")
floor = _unsupported_function("floor", cls="DatetimeIndex")
ceil = _unsupported_function("ceil", cls="DatetimeIndex")
to_period = _unsupported_function("to_period", cls="DatetimeIndex")
to_perioddelta = _unsupported_function("to_perioddelta", cls="DatetimeIndex")
to_pydatetime = _unsupported_function("to_pydatetime", cls="DatetimeIndex")
25 changes: 25 additions & 0 deletions databricks/koalas/tests/indexes/test_datetime.py
Original file line number Diff line number Diff line change
@@ -57,6 +57,10 @@ def kidxs(self):
def idx_pairs(self):
return list(zip(self.kidxs, self.pidxs))

def _disallow_nanoseconds(self, f):
self.assertRaises(ValueError, lambda: f(freq="ns"))
self.assertRaises(ValueError, lambda: f(freq="N"))

def test_properties(self):
for kidx, pidx in self.idx_pairs:
self.assert_eq(kidx.year, pidx.year)
@@ -85,3 +89,24 @@ def test_properties(self):
if LooseVersion(pd.__version__) >= LooseVersion("1.2.0"):
self.assert_eq(kidx.day_of_year, pidx.day_of_year)
self.assert_eq(kidx.day_of_week, pidx.day_of_week)

def test_ceil(self):
for kidx, pidx in self.idx_pairs:
for freq in self.fixed_freqs:
self.assert_eq(kidx.ceil(freq), pidx.ceil(freq))

self._disallow_nanoseconds(self.kidxs[0].ceil)

def test_floor(self):
for kidx, pidx in self.idx_pairs:
for freq in self.fixed_freqs:
self.assert_eq(kidx.floor(freq), pidx.floor(freq))

self._disallow_nanoseconds(self.kidxs[0].floor)

def test_round(self):
for kidx, pidx in self.idx_pairs:
for freq in self.fixed_freqs:
self.assert_eq(kidx.round(freq), pidx.round(freq))

self._disallow_nanoseconds(self.kidxs[0].round)
9 changes: 9 additions & 0 deletions docs/source/reference/indexing.rst
Original file line number Diff line number Diff line change
@@ -323,3 +323,12 @@ Time/date components
DatetimeIndex.is_leap_year
DatetimeIndex.daysinmonth
DatetimeIndex.days_in_month

Time-specific operations
~~~~~~~~~~~~~~~~~~~~~~~~
.. autosummary::
:toctree: api/

DatetimeIndex.round
DatetimeIndex.floor
DatetimeIndex.ceil