|
49 | 49 | import pandas as pd
|
50 | 50 | from pandas.api.types import is_list_like, is_dict_like, is_scalar
|
51 | 51 | from pandas.api.extensions import ExtensionDtype
|
| 52 | +from pandas.tseries.frequencies import DateOffset, to_offset |
52 | 53 |
|
53 | 54 | if TYPE_CHECKING:
|
54 | 55 | from pandas.io.formats.style import Styler
|
@@ -5670,6 +5671,63 @@ def head(self, n: int = 5) -> "DataFrame":
|
5670 | 5671 | sdf = sdf.orderBy(NATURAL_ORDER_COLUMN_NAME)
|
5671 | 5672 | return DataFrame(self._internal.with_new_sdf(sdf.limit(n)))
|
5672 | 5673 |
|
| 5674 | + def last(self, offset: Union[str, DateOffset]) -> "DataFrame": |
| 5675 | + """ |
| 5676 | + Select final periods of time series data based on a date offset. |
| 5677 | +
|
| 5678 | + When having a DataFrame with dates as index, this function can |
| 5679 | + select the last few rows based on a date offset. |
| 5680 | +
|
| 5681 | + Parameters |
| 5682 | + ---------- |
| 5683 | + offset : str or DateOffset |
| 5684 | + The offset length of the data that will be selected. For instance, |
| 5685 | + '3D' will display all the rows having their index within the last 3 days. |
| 5686 | +
|
| 5687 | + Returns |
| 5688 | + ------- |
| 5689 | + DataFrame |
| 5690 | + A subset of the caller. |
| 5691 | +
|
| 5692 | + Raises |
| 5693 | + ------ |
| 5694 | + TypeError |
| 5695 | + If the index is not a :class:`DatetimeIndex` |
| 5696 | +
|
| 5697 | + Examples |
| 5698 | + -------- |
| 5699 | +
|
| 5700 | + >>> index = pd.date_range('2018-04-09', periods=4, freq='2D') |
| 5701 | + >>> kdf = ks.DataFrame({'A': [1, 2, 3, 4]}, index=index) |
| 5702 | + >>> kdf |
| 5703 | + A |
| 5704 | + 2018-04-09 1 |
| 5705 | + 2018-04-11 2 |
| 5706 | + 2018-04-13 3 |
| 5707 | + 2018-04-15 4 |
| 5708 | +
|
| 5709 | + Get the rows for the last 3 days: |
| 5710 | +
|
| 5711 | + >>> kdf.last('3D') |
| 5712 | + A |
| 5713 | + 2018-04-13 3 |
| 5714 | + 2018-04-15 4 |
| 5715 | +
|
| 5716 | + Notice the data for 3 last calendar days were returned, not the last |
| 5717 | + 3 observed days in the dataset, and therefore data for 2018-04-11 was |
| 5718 | + not returned. |
| 5719 | + """ |
| 5720 | + # Check index type should be format DateTime |
| 5721 | + from databricks.koalas.indexes import DatetimeIndex |
| 5722 | + |
| 5723 | + if not isinstance(self.index, DatetimeIndex): |
| 5724 | + raise TypeError("'last' only supports a DatetimeIndex") |
| 5725 | + |
| 5726 | + offset = to_offset(offset) |
| 5727 | + from_date = self.index.max() - offset |
| 5728 | + |
| 5729 | + return cast(DataFrame, self.loc[from_date:]) |
| 5730 | + |
5673 | 5731 | def pivot_table(
|
5674 | 5732 | self, values=None, index=None, columns=None, aggfunc="mean", fill_value=None
|
5675 | 5733 | ) -> "DataFrame":
|
|
0 commit comments