From a2cf9e9a3245cc1fbe068a5518be396783ca4e89 Mon Sep 17 00:00:00 2001 From: Oliver Newland Date: Wed, 13 Jul 2022 14:50:05 -0700 Subject: [PATCH 1/5] add enum-based granularity processor --- snuba/datasets/entities/generic_metrics.py | 3 +- .../query/processors/granularity_processor.py | 34 +++++++++++++++++++ tests/test_generic_metrics_api.py | 10 +++--- 3 files changed, 41 insertions(+), 6 deletions(-) diff --git a/snuba/datasets/entities/generic_metrics.py b/snuba/datasets/entities/generic_metrics.py index bd7687578c..f8930515c5 100644 --- a/snuba/datasets/entities/generic_metrics.py +++ b/snuba/datasets/entities/generic_metrics.py @@ -30,6 +30,7 @@ ) from snuba.pipeline.simple_pipeline import SimplePipelineBuilder from snuba.query.processors import QueryProcessor +from snuba.query.processors.granularity_processor import MappedGranularityProcessor from snuba.query.validation.validators import ( EntityRequiredColumnValidator, QueryValidator, @@ -97,7 +98,7 @@ def __init__( ) def get_query_processors(self) -> Sequence[QueryProcessor]: - return [TagsTypeTransformer()] + return [TagsTypeTransformer(), MappedGranularityProcessor()] class GenericMetricsSetsEntity(GenericMetricsEntity): diff --git a/snuba/query/processors/granularity_processor.py b/snuba/query/processors/granularity_processor.py index cb945c4ae7..cb3aa9273c 100644 --- a/snuba/query/processors/granularity_processor.py +++ b/snuba/query/processors/granularity_processor.py @@ -39,3 +39,37 @@ def process_query(self, query: Query, query_settings: QuerySettings) -> None: Literal(None, granularity), ) ) + + +class MappedGranularityProcessor(QueryProcessor): + DEFAULT_GRANULARITY_ENUM = 1 + PERFORMANCE_GRANULARITIES = [(60, 1), (3600, 2), (86400, 3)] + """Use the granularity set on the query to filter on the granularity column""" + + def __get_granularity(self, query: Query) -> int: + """Find the best fitting granularity for this query""" + requested_granularity = query.get_granularity() + + if requested_granularity is None: + return self.DEFAULT_GRANULARITY_ENUM + elif requested_granularity > 0: + for (granularity, mapped_value) in reversed(self.PERFORMANCE_GRANULARITIES): + if ( + requested_granularity % granularity == 0 + or requested_granularity == mapped_value + ): + return mapped_value + + raise InvalidGranularityException( + f"Granularity must be multiple of one of {GRANULARITIES_AVAILABLE}" + ) + + def process_query(self, query: Query, query_settings: QuerySettings) -> None: + granularity = self.__get_granularity(query) + query.add_condition_to_ast( + binary_condition( + ConditionFunctions.EQ, + Column(None, None, "granularity"), + Literal(None, granularity), + ) + ) diff --git a/tests/test_generic_metrics_api.py b/tests/test_generic_metrics_api.py index f8f9e099f3..64afb7269f 100644 --- a/tests/test_generic_metrics_api.py +++ b/tests/test_generic_metrics_api.py @@ -143,7 +143,7 @@ def test_retrieval_basic(self) -> None: AND metric_id = {self.metric_id} AND timestamp >= toDateTime('{self.start_time}') AND timestamp < toDateTime('{self.end_time}') - GRANULARITY 1 + GRANULARITY 60 """ response = self.app.post( SNQL_ROUTE, @@ -180,7 +180,7 @@ def test_raw_tags(self) -> None: AND tags_raw[{tag_key}] = '{value_as_string}' AND timestamp >= toDateTime('{self.start_time}') AND timestamp < toDateTime('{self.end_time}') - GRANULARITY 1 + GRANULARITY 60 """ response = self.app.post( SNQL_ROUTE, @@ -217,7 +217,7 @@ def test_indexed_tags(self) -> None: AND tags[{tag_key}] = {tag_idx_value} AND timestamp >= toDateTime('{self.start_time}') AND timestamp < toDateTime('{self.end_time}') - GRANULARITY 1 + GRANULARITY 60 """ response = self.app.post( SNQL_ROUTE, @@ -308,7 +308,7 @@ def test_retrieval_basic(self) -> None: AND metric_id = {self.metric_id} AND timestamp >= toDateTime('{self.start_time}') AND timestamp < toDateTime('{self.end_time}') - GRANULARITY 1 + GRANULARITY 60 """ response = self.app.post( SNQL_ROUTE, @@ -333,7 +333,7 @@ def test_retrieval_percentiles(self) -> None: AND metric_id = {self.metric_id} AND timestamp >= toDateTime('{self.start_time}') AND timestamp < toDateTime('{self.end_time}') - GRANULARITY 1 + GRANULARITY 60 """ response = self.app.post( SNQL_ROUTE, From 8f25780ab1305bd12bcc5937591124b0f84b51d2 Mon Sep 17 00:00:00 2001 From: Oliver Newland Date: Wed, 13 Jul 2022 14:53:05 -0700 Subject: [PATCH 2/5] add TimeSeriesProcessor --- snuba/datasets/entities/generic_metrics.py | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/snuba/datasets/entities/generic_metrics.py b/snuba/datasets/entities/generic_metrics.py index f8930515c5..6ae84407a6 100644 --- a/snuba/datasets/entities/generic_metrics.py +++ b/snuba/datasets/entities/generic_metrics.py @@ -31,6 +31,7 @@ from snuba.pipeline.simple_pipeline import SimplePipelineBuilder from snuba.query.processors import QueryProcessor from snuba.query.processors.granularity_processor import MappedGranularityProcessor +from snuba.query.processors.timeseries_processor import TimeSeriesProcessor from snuba.query.validation.validators import ( EntityRequiredColumnValidator, QueryValidator, @@ -98,7 +99,11 @@ def __init__( ) def get_query_processors(self) -> Sequence[QueryProcessor]: - return [TagsTypeTransformer(), MappedGranularityProcessor()] + return [ + TagsTypeTransformer(), + MappedGranularityProcessor(), + TimeSeriesProcessor({"bucketed_time": "timestamp"}, ("timestamp",)), + ] class GenericMetricsSetsEntity(GenericMetricsEntity): From ff9551e0c1712d61894bb775a8b62681d3ea4b2d Mon Sep 17 00:00:00 2001 From: Oliver Newland Date: Wed, 13 Jul 2022 15:24:49 -0700 Subject: [PATCH 3/5] added unit tests for mapped granularity processor --- snuba/datasets/entities/generic_metrics.py | 11 ++- .../query/processors/granularity_processor.py | 30 ++++++-- .../processors/test_granularity_processor.py | 77 ++++++++++++++++++- 3 files changed, 109 insertions(+), 9 deletions(-) diff --git a/snuba/datasets/entities/generic_metrics.py b/snuba/datasets/entities/generic_metrics.py index 6ae84407a6..661e7cc8bb 100644 --- a/snuba/datasets/entities/generic_metrics.py +++ b/snuba/datasets/entities/generic_metrics.py @@ -30,7 +30,11 @@ ) from snuba.pipeline.simple_pipeline import SimplePipelineBuilder from snuba.query.processors import QueryProcessor -from snuba.query.processors.granularity_processor import MappedGranularityProcessor +from snuba.query.processors.granularity_processor import ( + DEFAULT_MAPPED_GRANULARITY_ENUM, + PERFORMANCE_GRANULARITIES, + MappedGranularityProcessor, +) from snuba.query.processors.timeseries_processor import TimeSeriesProcessor from snuba.query.validation.validators import ( EntityRequiredColumnValidator, @@ -101,7 +105,10 @@ def __init__( def get_query_processors(self) -> Sequence[QueryProcessor]: return [ TagsTypeTransformer(), - MappedGranularityProcessor(), + MappedGranularityProcessor( + accepted_granularities=PERFORMANCE_GRANULARITIES, + default_granularity_enum=DEFAULT_MAPPED_GRANULARITY_ENUM, + ), TimeSeriesProcessor({"bucketed_time": "timestamp"}, ("timestamp",)), ] diff --git a/snuba/query/processors/granularity_processor.py b/snuba/query/processors/granularity_processor.py index cb3aa9273c..730ec1ddb8 100644 --- a/snuba/query/processors/granularity_processor.py +++ b/snuba/query/processors/granularity_processor.py @@ -1,3 +1,5 @@ +from typing import Sequence, Tuple + from snuba.datasets.metrics import DEFAULT_GRANULARITY from snuba.query.conditions import ConditionFunctions, binary_condition from snuba.query.exceptions import InvalidGranularityException @@ -41,19 +43,35 @@ def process_query(self, query: Query, query_settings: QuerySettings) -> None: ) +PERFORMANCE_GRANULARITIES = [(60, 1), (3600, 2), (86400, 3)] +DEFAULT_MAPPED_GRANULARITY_ENUM = 1 + + class MappedGranularityProcessor(QueryProcessor): - DEFAULT_GRANULARITY_ENUM = 1 - PERFORMANCE_GRANULARITIES = [(60, 1), (3600, 2), (86400, 3)] - """Use the granularity set on the query to filter on the granularity column""" + """ + Use the granularity set on the query to filter on the granularity column, + supporting generic-metrics style enum mapping + """ + + def __init__( + self, + accepted_granularities: Sequence[Tuple[int, int]], + default_granularity_enum: int, + ): + self._accepted_granularities = accepted_granularities + self._available_granularities_values = [ + x[0] for x in self._accepted_granularities + ] + self._default_granularity_enum = default_granularity_enum def __get_granularity(self, query: Query) -> int: """Find the best fitting granularity for this query""" requested_granularity = query.get_granularity() if requested_granularity is None: - return self.DEFAULT_GRANULARITY_ENUM + return self._default_granularity_enum elif requested_granularity > 0: - for (granularity, mapped_value) in reversed(self.PERFORMANCE_GRANULARITIES): + for (granularity, mapped_value) in reversed(self._accepted_granularities): if ( requested_granularity % granularity == 0 or requested_granularity == mapped_value @@ -61,7 +79,7 @@ def __get_granularity(self, query: Query) -> int: return mapped_value raise InvalidGranularityException( - f"Granularity must be multiple of one of {GRANULARITIES_AVAILABLE}" + f"Granularity must be multiple of one of {self._available_granularities_values}" ) def process_query(self, query: Query, query_settings: QuerySettings) -> None: diff --git a/tests/query/processors/test_granularity_processor.py b/tests/query/processors/test_granularity_processor.py index 607476ae34..f240d6a700 100644 --- a/tests/query/processors/test_granularity_processor.py +++ b/tests/query/processors/test_granularity_processor.py @@ -15,7 +15,12 @@ from snuba.query.exceptions import InvalidGranularityException from snuba.query.expressions import Column, Literal from snuba.query.logical import Query -from snuba.query.processors.granularity_processor import GranularityProcessor +from snuba.query.processors.granularity_processor import ( + DEFAULT_MAPPED_GRANULARITY_ENUM, + PERFORMANCE_GRANULARITIES, + GranularityProcessor, + MappedGranularityProcessor, +) from snuba.query.query_settings import HTTPQuerySettings @@ -83,3 +88,73 @@ def test_granularity_added( ), granularity=(requested_granularity), ) + + +@pytest.mark.parametrize( + "entity_key,column", + [ + (EntityKey.GENERIC_METRICS_DISTRIBUTIONS, "percentiles"), + (EntityKey.GENERIC_METRICS_SETS, "value"), + ], +) +@pytest.mark.parametrize( + "requested_granularity, query_granularity", + [ + (None, 1), + (10, None), + (60, 1), + (90, None), + (120, 1), + (60 * 60, 2), + (90 * 60, 1), + (120 * 60, 2), + (24 * 60 * 60, 3), + (32 * 60 * 60, 2), + (48 * 60 * 60, 3), + (13, None), + (0, None), + # valid enum values are pass-through + (2, 2), + ], +) +def test_granularity_enum_mapping( + entity_key: EntityKey, + column: str, + requested_granularity: Optional[int], + query_granularity: int, +) -> None: + query = Query( + QueryEntity(entity_key, ColumnSet([])), + selected_columns=[SelectedExpression(column, Column(None, None, column))], + condition=binary_condition( + ConditionFunctions.EQ, Column(None, None, "metric_id"), Literal(None, 123) + ), + granularity=(requested_granularity), + ) + + try: + MappedGranularityProcessor( + accepted_granularities=PERFORMANCE_GRANULARITIES, + default_granularity_enum=DEFAULT_MAPPED_GRANULARITY_ENUM, + ).process_query(query, HTTPQuerySettings()) + except InvalidGranularityException: + assert query_granularity is None + else: + assert query == Query( + QueryEntity(entity_key, ColumnSet([])), + selected_columns=[SelectedExpression(column, Column(None, None, column))], + condition=binary_condition( + BooleanFunctions.AND, + binary_condition( + ConditionFunctions.EQ, + Column(None, None, "granularity"), + Literal(None, query_granularity), + ), + binary_condition( + ConditionFunctions.EQ, + Column(None, None, "metric_id"), + Literal(None, 123), + ), + ), + granularity=(requested_granularity), + ) From b1a55286a1551084f10c9d0107d3891ecd4877e2 Mon Sep 17 00:00:00 2001 From: Oliver Newland Date: Wed, 13 Jul 2022 16:07:01 -0700 Subject: [PATCH 4/5] test that time bucketing actually occurs --- tests/test_generic_metrics_api.py | 30 ++++++++++++++++++++++++++++++ 1 file changed, 30 insertions(+) diff --git a/tests/test_generic_metrics_api.py b/tests/test_generic_metrics_api.py index 64afb7269f..dd76f4dcaf 100644 --- a/tests/test_generic_metrics_api.py +++ b/tests/test_generic_metrics_api.py @@ -268,6 +268,9 @@ def setup_method(self, test_method: Any) -> None: self.end_time = ( self.base_time + timedelta(seconds=self.count) + timedelta(seconds=10) ) + self.hour_before_start_time = self.start_time - timedelta(hours=1) + self.hour_after_start_time = self.start_time + timedelta(hours=1) + self.generate_dists() def generate_dists(self) -> None: @@ -349,3 +352,30 @@ def test_retrieval_percentiles(self) -> None: assert aggregation["org_id"] == self.org_id assert aggregation["project_id"] == self.project_id assert aggregation["quants"] == [2.0, approx(4.0), approx(4.0), approx(4.0)] + + def test_arbitrary_granularity(self) -> None: + query_str = f"""MATCH (generic_metrics_distributions) + SELECT quantiles(0.5,0.9,0.95,0.99)(value) AS quants, min(bucketed_time) AS min_time + BY project_id, org_id + WHERE org_id = {self.org_id} + AND project_id = {self.project_id} + AND metric_id = {self.metric_id} + AND timestamp >= toDateTime('{self.hour_before_start_time}') + AND timestamp < toDateTime('{self.hour_after_start_time}') + GRANULARITY 3600 + """ + response = self.app.post( + SNQL_ROUTE, + data=json.dumps({"query": query_str, "dataset": "generic_metrics"}), + ) + data = json.loads(response.data) + + assert response.status_code == 200 + assert len(data["data"]) == 1, data + + aggregation = data["data"][0] + smallest_time_bucket = datetime.strptime( + aggregation["min_time"], "%Y-%m-%dT%H:%M:%S+00:00" + ) + assert smallest_time_bucket.hour == 12 + assert smallest_time_bucket.minute == 0 From 1b8f5ef9a66135106aa134dcb89517f702e15b4c Mon Sep 17 00:00:00 2001 From: Oliver Newland Date: Thu, 14 Jul 2022 09:48:20 -0700 Subject: [PATCH 5/5] addressed PR comments --- .../query/processors/granularity_processor.py | 30 ++++++++++++------- .../processors/test_granularity_processor.py | 2 -- 2 files changed, 19 insertions(+), 13 deletions(-) diff --git a/snuba/query/processors/granularity_processor.py b/snuba/query/processors/granularity_processor.py index 730ec1ddb8..2ea6baf50e 100644 --- a/snuba/query/processors/granularity_processor.py +++ b/snuba/query/processors/granularity_processor.py @@ -1,4 +1,4 @@ -from typing import Sequence, Tuple +from typing import NamedTuple, Sequence from snuba.datasets.metrics import DEFAULT_GRANULARITY from snuba.query.conditions import ConditionFunctions, binary_condition @@ -43,7 +43,16 @@ def process_query(self, query: Query, query_settings: QuerySettings) -> None: ) -PERFORMANCE_GRANULARITIES = [(60, 1), (3600, 2), (86400, 3)] +class GranularityMapping(NamedTuple): + raw: int + enum_value: int + + +PERFORMANCE_GRANULARITIES: Sequence[GranularityMapping] = [ + GranularityMapping(60, 1), + GranularityMapping(3600, 2), + GranularityMapping(86400, 3), +] DEFAULT_MAPPED_GRANULARITY_ENUM = 1 @@ -55,12 +64,14 @@ class MappedGranularityProcessor(QueryProcessor): def __init__( self, - accepted_granularities: Sequence[Tuple[int, int]], + accepted_granularities: Sequence[GranularityMapping], default_granularity_enum: int, ): - self._accepted_granularities = accepted_granularities + self._accepted_granularities = sorted( + accepted_granularities, key=lambda mapping: mapping.raw, reverse=True + ) self._available_granularities_values = [ - x[0] for x in self._accepted_granularities + mapping.raw for mapping in self._accepted_granularities ] self._default_granularity_enum = default_granularity_enum @@ -71,12 +82,9 @@ def __get_granularity(self, query: Query) -> int: if requested_granularity is None: return self._default_granularity_enum elif requested_granularity > 0: - for (granularity, mapped_value) in reversed(self._accepted_granularities): - if ( - requested_granularity % granularity == 0 - or requested_granularity == mapped_value - ): - return mapped_value + for mapping in self._accepted_granularities: + if requested_granularity % mapping.raw == 0: + return mapping.enum_value raise InvalidGranularityException( f"Granularity must be multiple of one of {self._available_granularities_values}" diff --git a/tests/query/processors/test_granularity_processor.py b/tests/query/processors/test_granularity_processor.py index f240d6a700..67962d9162 100644 --- a/tests/query/processors/test_granularity_processor.py +++ b/tests/query/processors/test_granularity_processor.py @@ -113,8 +113,6 @@ def test_granularity_added( (48 * 60 * 60, 3), (13, None), (0, None), - # valid enum values are pass-through - (2, 2), ], ) def test_granularity_enum_mapping(