Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add properties for DatetimeIndex #2074

Merged
Merged
Show file tree
Hide file tree
Changes from 2 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
295 changes: 295 additions & 0 deletions databricks/koalas/indexes/datetimes.py
Original file line number Diff line number Diff line change
Expand Up @@ -136,3 +136,298 @@ def __getattr__(self, item: str) -> Any:
else:
return partial(property_or_func, self)
raise AttributeError("'DatetimeIndex' object has no attribute '{}'".format(item))

# Properties
@property
def year(self) -> Index:
"""
The year of the datetime.
"""
return Index(self.to_series().dt.year)

@property
def month(self) -> Index:
"""
The month of the timestamp as January = 1 December = 12.
"""
return Index(self.to_series().dt.month)

@property
def day(self) -> Index:
"""
The days of the datetime.
"""
return Index(self.to_series().dt.day)

@property
def hour(self) -> Index:
"""
The hours of the datetime.
"""
return Index(self.to_series().dt.hour)

@property
def minute(self) -> Index:
"""
The minutes of the datetime.
"""
return Index(self.to_series().dt.minute)

@property
def second(self) -> Index:
"""
The seconds of the datetime.
"""
return Index(self.to_series().dt.second)

@property
def microsecond(self) -> Index:
"""
The microseconds of the datetime.
"""
return Index(self.to_series().dt.microsecond)

@property
def week(self) -> Index:
"""
The week ordinal of the year.
"""
return Index(self.to_series().dt.week)

@property
def weekofyear(self) -> Index:
return Index(self.to_series().dt.weekofyear)

weekofyear.__doc__ = week.__doc__

@property
def dayofweek(self) -> Index:
"""
The day of the week with Monday=0, Sunday=6.
Return the day of the week. It is assumed the week starts on
Monday, which is denoted by 0 and ends on Sunday which is denoted
by 6. This method is available on both Series with datetime
values (using the `dt` accessor) or DatetimeIndex.

Returns
-------
Series or Index
Containing integers indicating the day number.

See Also
--------
Series.dt.dayofweek : Alias.
Series.dt.weekday : Alias.
Series.dt.day_name : Returns the name of the day of the week.

Examples
--------
>>> idx = ks.from_pandas(pd.date_range('2016-12-31', '2017-01-08', freq='D'))
>>> idx.dayofweek
Int64Index([5, 6, 0, 1, 2, 3, 4, 5, 6], dtype='int64')
"""
return Index(self.to_series().dt.dayofweek)

@property
def day_of_week(self) -> Index:
return self.dayofweek

day_of_week.__doc__ = dayofweek.__doc__

@property
def weekday(self) -> Index:
return Index(self.to_series().dt.weekday)

weekday.__doc__ = dayofweek.__doc__

@property
def dayofyear(self) -> Index:
"""
The ordinal day of the year.
"""
return Index(self.to_series().dt.dayofyear)

@property
def day_of_year(self) -> Index:
return self.dayofyear

day_of_year.__doc__ = dayofyear.__doc__

@property
def quarter(self) -> Index:
"""
The quarter of the date.
"""
return Index(self.to_series().dt.quarter)

@property
def is_month_start(self) -> Index:
"""
Indicates whether the date is the first day of the month.

Returns
-------
Index
Returns a Index with boolean values

See Also
--------
is_month_end : Return a boolean indicating whether the date
is the last day of the month.

Examples
--------
>>> idx = ks.from_pandas(pd.date_range("2018-02-27", periods=3))
>>> idx.is_month_start
Index([False, False, True], dtype='object')
"""
return Index(self.to_series().dt.is_month_start)

@property
def is_month_end(self) -> Index:
"""
Indicates whether the date is the last day of the month.

Returns
-------
Index
Returns a Index with boolean values.

See Also
--------
is_month_start : Return a boolean indicating whether the date
is the first day of the month.

Examples
--------
>>> idx = ks.from_pandas(pd.date_range("2018-02-27", periods=3))
>>> idx.is_month_end
Index([False, True, False], dtype='object')
"""
return Index(self.to_series().dt.is_month_end)

@property
def is_quarter_start(self) -> Index:
"""
Indicator for whether the date is the first day of a quarter.

Returns
-------
is_quarter_start : Index
Returns an Index with boolean values.

See Also
--------
quarter : Return the quarter of the date.
is_quarter_end : Similar property for indicating the quarter start.

Examples
--------
>>> idx = ks.from_pandas(pd.date_range('2017-03-30', periods=4))
>>> idx.is_quarter_start
Index([False, False, True, False], dtype='object')
"""
return Index(self.to_series().dt.is_quarter_start)

@property
def is_quarter_end(self) -> Index:
"""
Indicator for whether the date is the last day of a quarter.

Returns
-------
is_quarter_end : Index
Returns an Index with boolean values.

See Also
--------
quarter : Return the quarter of the date.
is_quarter_start : Similar property indicating the quarter start.

Examples
--------
>>> idx = ks.from_pandas(pd.date_range('2017-03-30', periods=4))
>>> idx.is_quarter_end
Index([False, True, False, False], dtype='object')
"""
return Index(self.to_series().dt.is_quarter_end)

@property
def is_year_start(self) -> Index:
"""
Indicate whether the date is the first day of a year.

Returns
-------
Index
Returns an Index with boolean values.

See Also
--------
is_year_end : Similar property indicating the last day of the year.

Examples
--------
>>> idx = ks.from_pandas(pd.date_range("2017-12-30", periods=3))
>>> idx.is_year_start
Index([False, False, True], dtype='object')
"""
return Index(self.to_series().dt.is_year_start)

@property
def is_year_end(self) -> Index:
"""
Indicate whether the date is the last day of the year.

Returns
-------
Index
Returns an Index with boolean values.

See Also
--------
is_year_start : Similar property indicating the start of the year.

Examples
--------
>>> idx = ks.from_pandas(pd.date_range("2017-12-30", periods=3))
>>> idx.is_year_end
Index([False, True, False], dtype='object')
"""
return Index(self.to_series().dt.is_year_end)

@property
def is_leap_year(self) -> Index:
"""
Boolean indicator if the date belongs to a leap year.

A leap year is a year, which has 366 days (instead of 365) including
29th of February as an intercalary day.
Leap years are years which are multiples of four with the exception
of years divisible by 100 but not by 400.

Returns
-------
Index
Booleans indicating if dates belong to a leap year.

Examples
--------
>>> idx = ks.from_pandas(pd.date_range("2012-01-01", "2015-01-01", freq="Y"))
>>> idx.is_leap_year
Index([True, False, False], dtype='object')
"""
return Index(self.to_series().dt.is_leap_year)

@property
def daysinmonth(self) -> Index:
"""
The number of days in the month.
"""
return Index(self.to_series().dt.daysinmonth)

@property
def days_in_month(self) -> Index:
return Index(self.to_series().dt.days_in_month)

days_in_month.__doc__ = daysinmonth.__doc__
22 changes: 0 additions & 22 deletions databricks/koalas/missing/indexes.py
Original file line number Diff line number Diff line change
Expand Up @@ -100,35 +100,13 @@ class MissingPandasLikeIndex(object):
class MissingPandasLikeDatetimeIndex(MissingPandasLikeIndex):

# Properties
year = _unsupported_property("year", cls="DatetimeIndex")
month = _unsupported_property("month", cls="DatetimeIndex")
day = _unsupported_property("day", cls="DatetimeIndex")
hour = _unsupported_property("hour", cls="DatetimeIndex")
minute = _unsupported_property("minute", cls="DatetimeIndex")
second = _unsupported_property("second", cls="DatetimeIndex")
microsecond = _unsupported_property("microsecond", cls="DatetimeIndex")
nanosecond = _unsupported_property("nanosecond", cls="DatetimeIndex")
Copy link
Contributor Author

@xinrong-meng xinrong-meng Mar 2, 2021

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Properties that are still missing would be implemented in follow-up PRs.
We don't implement them in datetimelike properties of the Series values, either.

date = _unsupported_property("date", cls="DatetimeIndex")
time = _unsupported_property("time", cls="DatetimeIndex")
timetz = _unsupported_property("timetz", cls="DatetimeIndex")
dayofyear = _unsupported_property("dayofyear", cls="DatetimeIndex")
day_of_year = _unsupported_property("day_of_year", cls="DatetimeIndex")
weekofyear = _unsupported_property("weekofyear", cls="DatetimeIndex")
week = _unsupported_property("week", cls="DatetimeIndex")
dayofweek = _unsupported_property("dayofweek", cls="DatetimeIndex")
day_of_week = _unsupported_property("day_of_week", cls="DatetimeIndex")
weekday = _unsupported_property("weekday", cls="DatetimeIndex")
quarter = _unsupported_property("quarter", cls="DatetimeIndex")
tz = _unsupported_property("tz", cls="DatetimeIndex")
freq = _unsupported_property("freq", cls="DatetimeIndex")
freqstr = _unsupported_property("freqstr", cls="DatetimeIndex")
is_month_start = _unsupported_property("is_month_start", cls="DatetimeIndex")
is_month_end = _unsupported_property("is_month_end", cls="DatetimeIndex")
is_quarter_start = _unsupported_property("is_quarter_start", cls="DatetimeIndex")
is_quarter_end = _unsupported_property("is_quarter_end", cls="DatetimeIndex")
is_year_start = _unsupported_property("is_year_start", cls="DatetimeIndex")
is_year_end = _unsupported_property("is_year_end", cls="DatetimeIndex")
is_leap_year = _unsupported_property("is_leap_year", cls="DatetimeIndex")
inferred_freq = _unsupported_property("inferred_freq", cls="DatetimeIndex")

# Functions
Expand Down
38 changes: 38 additions & 0 deletions databricks/koalas/tests/test_indexes.py
Original file line number Diff line number Diff line change
Expand Up @@ -370,6 +370,44 @@ def test_index_unique(self):
with self.assertRaisesRegex(KeyError, "Requested level (hi)*"):
kidx.unique(level="hi")

def test_datetime_index_properties(self):
pids = [
ueshin marked this conversation as resolved.
Show resolved Hide resolved
pd.DatetimeIndex([0]),
pd.DatetimeIndex(["2004-01-01", "2002-12-31", "2000-04-01"]),
] + [
pd.date_range("2000-01-01", periods=3, freq=unit)
for unit in ["ns", "us", "ms", "s", "m", "h", "D"]
]

for pid in pids:
ueshin marked this conversation as resolved.
Show resolved Hide resolved
kid = ks.from_pandas(pid)
ueshin marked this conversation as resolved.
Show resolved Hide resolved
self.assert_eq(kid.year, pid.year)
self.assert_eq(kid.month, pid.month)
self.assert_eq(kid.day, pid.day)
self.assert_eq(kid.hour, pid.hour)
self.assert_eq(kid.minute, pid.minute)
self.assert_eq(kid.second, pid.second)
self.assert_eq(kid.microsecond, pid.microsecond)
self.assert_eq(kid.week, pid.week)
self.assert_eq(kid.weekofyear, pid.weekofyear)
self.assert_eq(kid.dayofweek, pid.dayofweek)
self.assert_eq(kid.weekday, pid.weekday)
self.assert_eq(kid.dayofyear, pid.dayofyear)
self.assert_eq(kid.quarter, pid.quarter)
self.assert_eq(kid.daysinmonth, pid.daysinmonth)
self.assert_eq(kid.days_in_month, pid.days_in_month)
self.assert_eq(kid.is_month_start, ks.Index(pid.is_month_start))
self.assert_eq(kid.is_month_end, ks.Index(pid.is_month_end))
self.assert_eq(kid.is_quarter_start, ks.Index(pid.is_quarter_start))
self.assert_eq(kid.is_quarter_end, ks.Index(pid.is_quarter_end))
self.assert_eq(kid.is_year_start, ks.Index(pid.is_year_start))
self.assert_eq(kid.is_year_end, ks.Index(pid.is_year_end))
self.assert_eq(kid.is_leap_year, ks.Index(pid.is_leap_year))
ueshin marked this conversation as resolved.
Show resolved Hide resolved

if LooseVersion(pd.__version__) >= LooseVersion("1.2.0"):
self.assert_eq(kid.day_of_year, pid.day_of_year)
self.assert_eq(kid.day_of_week, pid.day_of_week)

def test_multi_index_copy(self):
arrays = [[1, 1, 2, 2], ["red", "blue", "red", "blue"]]
idx = pd.MultiIndex.from_arrays(arrays, names=("number", "color"))
Expand Down