Skip to content

Commit

Permalink
Add properties for DatetimeIndex (#2074)
Browse files Browse the repository at this point in the history
Add properties for DatetimeIndex
  • Loading branch information
xinrong-meng authored Mar 3, 2021
1 parent e6f90e8 commit 66d3c1b
Show file tree
Hide file tree
Showing 4 changed files with 363 additions and 22 deletions.
295 changes: 295 additions & 0 deletions databricks/koalas/indexes/datetimes.py
Original file line number Diff line number Diff line change
Expand Up @@ -136,3 +136,298 @@ def __getattr__(self, item: str) -> Any:
else:
return partial(property_or_func, self)
raise AttributeError("'DatetimeIndex' object has no attribute '{}'".format(item))

# Properties
@property
def year(self) -> Index:
"""
The year of the datetime.
"""
return Index(self.to_series().dt.year)

@property
def month(self) -> Index:
"""
The month of the timestamp as January = 1 December = 12.
"""
return Index(self.to_series().dt.month)

@property
def day(self) -> Index:
"""
The days of the datetime.
"""
return Index(self.to_series().dt.day)

@property
def hour(self) -> Index:
"""
The hours of the datetime.
"""
return Index(self.to_series().dt.hour)

@property
def minute(self) -> Index:
"""
The minutes of the datetime.
"""
return Index(self.to_series().dt.minute)

@property
def second(self) -> Index:
"""
The seconds of the datetime.
"""
return Index(self.to_series().dt.second)

@property
def microsecond(self) -> Index:
"""
The microseconds of the datetime.
"""
return Index(self.to_series().dt.microsecond)

@property
def week(self) -> Index:
"""
The week ordinal of the year.
"""
return Index(self.to_series().dt.week)

@property
def weekofyear(self) -> Index:
return Index(self.to_series().dt.weekofyear)

weekofyear.__doc__ = week.__doc__

@property
def dayofweek(self) -> Index:
"""
The day of the week with Monday=0, Sunday=6.
Return the day of the week. It is assumed the week starts on
Monday, which is denoted by 0 and ends on Sunday which is denoted
by 6. This method is available on both Series with datetime
values (using the `dt` accessor) or DatetimeIndex.
Returns
-------
Series or Index
Containing integers indicating the day number.
See Also
--------
Series.dt.dayofweek : Alias.
Series.dt.weekday : Alias.
Series.dt.day_name : Returns the name of the day of the week.
Examples
--------
>>> idx = ks.from_pandas(pd.date_range('2016-12-31', '2017-01-08', freq='D'))
>>> idx.dayofweek
Int64Index([5, 6, 0, 1, 2, 3, 4, 5, 6], dtype='int64')
"""
return Index(self.to_series().dt.dayofweek)

@property
def day_of_week(self) -> Index:
return self.dayofweek

day_of_week.__doc__ = dayofweek.__doc__

@property
def weekday(self) -> Index:
return Index(self.to_series().dt.weekday)

weekday.__doc__ = dayofweek.__doc__

@property
def dayofyear(self) -> Index:
"""
The ordinal day of the year.
"""
return Index(self.to_series().dt.dayofyear)

@property
def day_of_year(self) -> Index:
return self.dayofyear

day_of_year.__doc__ = dayofyear.__doc__

@property
def quarter(self) -> Index:
"""
The quarter of the date.
"""
return Index(self.to_series().dt.quarter)

@property
def is_month_start(self) -> Index:
"""
Indicates whether the date is the first day of the month.
Returns
-------
Index
Returns a Index with boolean values
See Also
--------
is_month_end : Return a boolean indicating whether the date
is the last day of the month.
Examples
--------
>>> idx = ks.from_pandas(pd.date_range("2018-02-27", periods=3))
>>> idx.is_month_start
Index([False, False, True], dtype='object')
"""
return Index(self.to_series().dt.is_month_start)

@property
def is_month_end(self) -> Index:
"""
Indicates whether the date is the last day of the month.
Returns
-------
Index
Returns a Index with boolean values.
See Also
--------
is_month_start : Return a boolean indicating whether the date
is the first day of the month.
Examples
--------
>>> idx = ks.from_pandas(pd.date_range("2018-02-27", periods=3))
>>> idx.is_month_end
Index([False, True, False], dtype='object')
"""
return Index(self.to_series().dt.is_month_end)

@property
def is_quarter_start(self) -> Index:
"""
Indicator for whether the date is the first day of a quarter.
Returns
-------
is_quarter_start : Index
Returns an Index with boolean values.
See Also
--------
quarter : Return the quarter of the date.
is_quarter_end : Similar property for indicating the quarter start.
Examples
--------
>>> idx = ks.from_pandas(pd.date_range('2017-03-30', periods=4))
>>> idx.is_quarter_start
Index([False, False, True, False], dtype='object')
"""
return Index(self.to_series().dt.is_quarter_start)

@property
def is_quarter_end(self) -> Index:
"""
Indicator for whether the date is the last day of a quarter.
Returns
-------
is_quarter_end : Index
Returns an Index with boolean values.
See Also
--------
quarter : Return the quarter of the date.
is_quarter_start : Similar property indicating the quarter start.
Examples
--------
>>> idx = ks.from_pandas(pd.date_range('2017-03-30', periods=4))
>>> idx.is_quarter_end
Index([False, True, False, False], dtype='object')
"""
return Index(self.to_series().dt.is_quarter_end)

@property
def is_year_start(self) -> Index:
"""
Indicate whether the date is the first day of a year.
Returns
-------
Index
Returns an Index with boolean values.
See Also
--------
is_year_end : Similar property indicating the last day of the year.
Examples
--------
>>> idx = ks.from_pandas(pd.date_range("2017-12-30", periods=3))
>>> idx.is_year_start
Index([False, False, True], dtype='object')
"""
return Index(self.to_series().dt.is_year_start)

@property
def is_year_end(self) -> Index:
"""
Indicate whether the date is the last day of the year.
Returns
-------
Index
Returns an Index with boolean values.
See Also
--------
is_year_start : Similar property indicating the start of the year.
Examples
--------
>>> idx = ks.from_pandas(pd.date_range("2017-12-30", periods=3))
>>> idx.is_year_end
Index([False, True, False], dtype='object')
"""
return Index(self.to_series().dt.is_year_end)

@property
def is_leap_year(self) -> Index:
"""
Boolean indicator if the date belongs to a leap year.
A leap year is a year, which has 366 days (instead of 365) including
29th of February as an intercalary day.
Leap years are years which are multiples of four with the exception
of years divisible by 100 but not by 400.
Returns
-------
Index
Booleans indicating if dates belong to a leap year.
Examples
--------
>>> idx = ks.from_pandas(pd.date_range("2012-01-01", "2015-01-01", freq="Y"))
>>> idx.is_leap_year
Index([True, False, False], dtype='object')
"""
return Index(self.to_series().dt.is_leap_year)

@property
def daysinmonth(self) -> Index:
"""
The number of days in the month.
"""
return Index(self.to_series().dt.daysinmonth)

@property
def days_in_month(self) -> Index:
return Index(self.to_series().dt.days_in_month)

days_in_month.__doc__ = daysinmonth.__doc__
22 changes: 0 additions & 22 deletions databricks/koalas/missing/indexes.py
Original file line number Diff line number Diff line change
Expand Up @@ -100,35 +100,13 @@ class MissingPandasLikeIndex(object):
class MissingPandasLikeDatetimeIndex(MissingPandasLikeIndex):

# Properties
year = _unsupported_property("year", cls="DatetimeIndex")
month = _unsupported_property("month", cls="DatetimeIndex")
day = _unsupported_property("day", cls="DatetimeIndex")
hour = _unsupported_property("hour", cls="DatetimeIndex")
minute = _unsupported_property("minute", cls="DatetimeIndex")
second = _unsupported_property("second", cls="DatetimeIndex")
microsecond = _unsupported_property("microsecond", cls="DatetimeIndex")
nanosecond = _unsupported_property("nanosecond", cls="DatetimeIndex")
date = _unsupported_property("date", cls="DatetimeIndex")
time = _unsupported_property("time", cls="DatetimeIndex")
timetz = _unsupported_property("timetz", cls="DatetimeIndex")
dayofyear = _unsupported_property("dayofyear", cls="DatetimeIndex")
day_of_year = _unsupported_property("day_of_year", cls="DatetimeIndex")
weekofyear = _unsupported_property("weekofyear", cls="DatetimeIndex")
week = _unsupported_property("week", cls="DatetimeIndex")
dayofweek = _unsupported_property("dayofweek", cls="DatetimeIndex")
day_of_week = _unsupported_property("day_of_week", cls="DatetimeIndex")
weekday = _unsupported_property("weekday", cls="DatetimeIndex")
quarter = _unsupported_property("quarter", cls="DatetimeIndex")
tz = _unsupported_property("tz", cls="DatetimeIndex")
freq = _unsupported_property("freq", cls="DatetimeIndex")
freqstr = _unsupported_property("freqstr", cls="DatetimeIndex")
is_month_start = _unsupported_property("is_month_start", cls="DatetimeIndex")
is_month_end = _unsupported_property("is_month_end", cls="DatetimeIndex")
is_quarter_start = _unsupported_property("is_quarter_start", cls="DatetimeIndex")
is_quarter_end = _unsupported_property("is_quarter_end", cls="DatetimeIndex")
is_year_start = _unsupported_property("is_year_start", cls="DatetimeIndex")
is_year_end = _unsupported_property("is_year_end", cls="DatetimeIndex")
is_leap_year = _unsupported_property("is_leap_year", cls="DatetimeIndex")
inferred_freq = _unsupported_property("inferred_freq", cls="DatetimeIndex")

# Functions
Expand Down
38 changes: 38 additions & 0 deletions databricks/koalas/tests/test_indexes.py
Original file line number Diff line number Diff line change
Expand Up @@ -370,6 +370,44 @@ def test_index_unique(self):
with self.assertRaisesRegex(KeyError, "Requested level (hi)*"):
kidx.unique(level="hi")

def test_datetime_index_properties(self):
pidx_list = [
pd.DatetimeIndex([0]),
pd.DatetimeIndex(["2004-01-01", "2002-12-31", "2000-04-01"]),
] + [
pd.date_range("2000-01-01", periods=3, freq=unit)
for unit in ["ns", "us", "ms", "s", "m", "h", "D"]
]

for pidx in pidx_list:
kidx = ks.from_pandas(pidx)
self.assert_eq(kidx.year, pidx.year)
self.assert_eq(kidx.month, pidx.month)
self.assert_eq(kidx.day, pidx.day)
self.assert_eq(kidx.hour, pidx.hour)
self.assert_eq(kidx.minute, pidx.minute)
self.assert_eq(kidx.second, pidx.second)
self.assert_eq(kidx.microsecond, pidx.microsecond)
self.assert_eq(kidx.week, pidx.week)
self.assert_eq(kidx.weekofyear, pidx.weekofyear)
self.assert_eq(kidx.dayofweek, pidx.dayofweek)
self.assert_eq(kidx.weekday, pidx.weekday)
self.assert_eq(kidx.dayofyear, pidx.dayofyear)
self.assert_eq(kidx.quarter, pidx.quarter)
self.assert_eq(kidx.daysinmonth, pidx.daysinmonth)
self.assert_eq(kidx.days_in_month, pidx.days_in_month)
self.assert_eq(kidx.is_month_start, pd.Index(pidx.is_month_start))
self.assert_eq(kidx.is_month_end, pd.Index(pidx.is_month_end))
self.assert_eq(kidx.is_quarter_start, pd.Index(pidx.is_quarter_start))
self.assert_eq(kidx.is_quarter_end, pd.Index(pidx.is_quarter_end))
self.assert_eq(kidx.is_year_start, pd.Index(pidx.is_year_start))
self.assert_eq(kidx.is_year_end, pd.Index(pidx.is_year_end))
self.assert_eq(kidx.is_leap_year, pd.Index(pidx.is_leap_year))

if LooseVersion(pd.__version__) >= LooseVersion("1.2.0"):
self.assert_eq(kidx.day_of_year, pidx.day_of_year)
self.assert_eq(kidx.day_of_week, pidx.day_of_week)

def test_multi_index_copy(self):
arrays = [[1, 1, 2, 2], ["red", "blue", "red", "blue"]]
idx = pd.MultiIndex.from_arrays(arrays, names=("number", "color"))
Expand Down
Loading

0 comments on commit 66d3c1b

Please sign in to comment.