Skip to content
Merged
Show file tree
Hide file tree
Changes from 3 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions superset/charts/schemas.py
Original file line number Diff line number Diff line change
Expand Up @@ -749,6 +749,7 @@ class ChartDataPostProcessingOperationSchema(Schema):
"sort",
"diff",
"compare",
"resample",
)
),
example="aggregate",
Expand Down
26 changes: 26 additions & 0 deletions superset/utils/pandas_postprocessing.py
Original file line number Diff line number Diff line change
Expand Up @@ -916,3 +916,29 @@ def outliers(series: Series) -> Set[float]:
for metric in metrics
}
return aggregate(df, groupby=groupby, aggregates=aggregates)


def resample(
df: DataFrame,
resample_rule: str,
resample_method: str,
time_column: str,
resample_fill_zero: bool = False,
Comment thread
zhaoyongjie marked this conversation as resolved.
Outdated
) -> DataFrame:
Comment thread
zhaoyongjie marked this conversation as resolved.
"""
resample a timeseries dataframe.

:param df: DataFrame to resample.
:param resample_rule: The offset string representing target conversion.
:param resample_method: How to fill the NaN value after resample.
:param time_column: existing columns in DataFrame.
:param resample_fill_zero: fill missing values with zero.
:return: DataFrame after resample
:raises QueryObjectValidationError: If the request in incorrect
"""
df = df.set_index(time_column)
if resample_method == "asfreq" and resample_fill_zero:
df = df.resample(resample_rule).asfreq(fill_value=0)
else:
df = getattr(df.resample(resample_rule), resample_method)()
return df.reset_index()
26 changes: 26 additions & 0 deletions tests/integration_tests/pandas_postprocessing_tests.py
Original file line number Diff line number Diff line change
Expand Up @@ -870,3 +870,29 @@ def test_boxplot_percentile_incorrect_params(self):
metrics=["cars"],
percentiles=[10, 90, 10],
)

def test_resample(self):
df = timeseries_df.copy()
df.index.name = "time_column"
df.reset_index(inplace=True)

post_df = proc.resample(
df=df,
resample_rule="1D",
resample_method="ffill",
time_column="time_column",
)
self.assertListEqual(
post_df["label"].tolist(), ["x", "y", "y", "y", "z", "z", "q"]
)
self.assertListEqual(post_df["y"].tolist(), [1.0, 2.0, 2.0, 2.0, 3.0, 3.0, 4.0])

post_df = proc.resample(
df=df,
resample_rule="1D",
resample_method="asfreq",
time_column="time_column",
resample_fill_zero=True,
)
self.assertListEqual(post_df["label"].tolist(), ["x", "y", 0, 0, "z", 0, "q"])
self.assertListEqual(post_df["y"].tolist(), [1.0, 2.0, 0, 0, 3.0, 0, 4.0])