diff --git a/superset/charts/schemas.py b/superset/charts/schemas.py index 9916c0b221ec..9a41c54896d2 100644 --- a/superset/charts/schemas.py +++ b/superset/charts/schemas.py @@ -749,6 +749,7 @@ class ChartDataPostProcessingOperationSchema(Schema): "sort", "diff", "compare", + "resample", ) ), example="aggregate", diff --git a/superset/utils/pandas_postprocessing.py b/superset/utils/pandas_postprocessing.py index ad6c6afedade..23761216a6b0 100644 --- a/superset/utils/pandas_postprocessing.py +++ b/superset/utils/pandas_postprocessing.py @@ -916,3 +916,29 @@ def outliers(series: Series) -> Set[float]: for metric in metrics } return aggregate(df, groupby=groupby, aggregates=aggregates) + + +def resample( + df: DataFrame, + rule: str, + method: str, + time_column: str, + fill_value: Optional[Union[float, int]] = None, +) -> DataFrame: + """ + resample a timeseries dataframe. + + :param df: DataFrame to resample. + :param rule: The offset string representing target conversion. + :param method: How to fill the NaN value after resample. + :param time_column: existing columns in DataFrame. + :param fill_value: What values do fill missing. + :return: DataFrame after resample + :raises QueryObjectValidationError: If the request in incorrect + """ + df = df.set_index(time_column) + if method == "asfreq" and fill_value is not None: + df = df.resample(rule).asfreq(fill_value=fill_value) + else: + df = getattr(df.resample(rule), method)() + return df.reset_index() diff --git a/tests/integration_tests/pandas_postprocessing_tests.py b/tests/integration_tests/pandas_postprocessing_tests.py index 1763dad33614..3e8fdcecda82 100644 --- a/tests/integration_tests/pandas_postprocessing_tests.py +++ b/tests/integration_tests/pandas_postprocessing_tests.py @@ -870,3 +870,22 @@ def test_boxplot_percentile_incorrect_params(self): metrics=["cars"], percentiles=[10, 90, 10], ) + + def test_resample(self): + df = timeseries_df.copy() + df.index.name = "time_column" + df.reset_index(inplace=True) + + post_df = proc.resample( + df=df, rule="1D", method="ffill", time_column="time_column", + ) + self.assertListEqual( + post_df["label"].tolist(), ["x", "y", "y", "y", "z", "z", "q"] + ) + self.assertListEqual(post_df["y"].tolist(), [1.0, 2.0, 2.0, 2.0, 3.0, 3.0, 4.0]) + + post_df = proc.resample( + df=df, rule="1D", method="asfreq", time_column="time_column", fill_value=0, + ) + self.assertListEqual(post_df["label"].tolist(), ["x", "y", 0, 0, "z", 0, "q"]) + self.assertListEqual(post_df["y"].tolist(), [1.0, 2.0, 0, 0, 3.0, 0, 4.0])