From a2cf9e9a3245cc1fbe068a5518be396783ca4e89 Mon Sep 17 00:00:00 2001
From: Oliver Newland <oliver.newland@sentry.io>
Date: Wed, 13 Jul 2022 14:50:05 -0700
Subject: [PATCH 1/5] add enum-based granularity processor

---
 snuba/datasets/entities/generic_metrics.py    |  3 +-
 .../query/processors/granularity_processor.py | 34 +++++++++++++++++++
 tests/test_generic_metrics_api.py             | 10 +++---
 3 files changed, 41 insertions(+), 6 deletions(-)

diff --git a/snuba/datasets/entities/generic_metrics.py b/snuba/datasets/entities/generic_metrics.py
index bd7687578c..f8930515c5 100644
--- a/snuba/datasets/entities/generic_metrics.py
+++ b/snuba/datasets/entities/generic_metrics.py
@@ -30,6 +30,7 @@
 )
 from snuba.pipeline.simple_pipeline import SimplePipelineBuilder
 from snuba.query.processors import QueryProcessor
+from snuba.query.processors.granularity_processor import MappedGranularityProcessor
 from snuba.query.validation.validators import (
     EntityRequiredColumnValidator,
     QueryValidator,
@@ -97,7 +98,7 @@ def __init__(
         )
 
     def get_query_processors(self) -> Sequence[QueryProcessor]:
-        return [TagsTypeTransformer()]
+        return [TagsTypeTransformer(), MappedGranularityProcessor()]
 
 
 class GenericMetricsSetsEntity(GenericMetricsEntity):
diff --git a/snuba/query/processors/granularity_processor.py b/snuba/query/processors/granularity_processor.py
index cb945c4ae7..cb3aa9273c 100644
--- a/snuba/query/processors/granularity_processor.py
+++ b/snuba/query/processors/granularity_processor.py
@@ -39,3 +39,37 @@ def process_query(self, query: Query, query_settings: QuerySettings) -> None:
                 Literal(None, granularity),
             )
         )
+
+
+class MappedGranularityProcessor(QueryProcessor):
+    DEFAULT_GRANULARITY_ENUM = 1
+    PERFORMANCE_GRANULARITIES = [(60, 1), (3600, 2), (86400, 3)]
+    """Use the granularity set on the query to filter on the granularity column"""
+
+    def __get_granularity(self, query: Query) -> int:
+        """Find the best fitting granularity for this query"""
+        requested_granularity = query.get_granularity()
+
+        if requested_granularity is None:
+            return self.DEFAULT_GRANULARITY_ENUM
+        elif requested_granularity > 0:
+            for (granularity, mapped_value) in reversed(self.PERFORMANCE_GRANULARITIES):
+                if (
+                    requested_granularity % granularity == 0
+                    or requested_granularity == mapped_value
+                ):
+                    return mapped_value
+
+        raise InvalidGranularityException(
+            f"Granularity must be multiple of one of {GRANULARITIES_AVAILABLE}"
+        )
+
+    def process_query(self, query: Query, query_settings: QuerySettings) -> None:
+        granularity = self.__get_granularity(query)
+        query.add_condition_to_ast(
+            binary_condition(
+                ConditionFunctions.EQ,
+                Column(None, None, "granularity"),
+                Literal(None, granularity),
+            )
+        )
diff --git a/tests/test_generic_metrics_api.py b/tests/test_generic_metrics_api.py
index f8f9e099f3..64afb7269f 100644
--- a/tests/test_generic_metrics_api.py
+++ b/tests/test_generic_metrics_api.py
@@ -143,7 +143,7 @@ def test_retrieval_basic(self) -> None:
                     AND metric_id = {self.metric_id}
                     AND timestamp >= toDateTime('{self.start_time}')
                     AND timestamp < toDateTime('{self.end_time}')
-                    GRANULARITY 1
+                    GRANULARITY 60
                     """
         response = self.app.post(
             SNQL_ROUTE,
@@ -180,7 +180,7 @@ def test_raw_tags(self) -> None:
                     AND tags_raw[{tag_key}] = '{value_as_string}'
                     AND timestamp >= toDateTime('{self.start_time}')
                     AND timestamp < toDateTime('{self.end_time}')
-                    GRANULARITY 1
+                    GRANULARITY 60
                     """
         response = self.app.post(
             SNQL_ROUTE,
@@ -217,7 +217,7 @@ def test_indexed_tags(self) -> None:
                     AND tags[{tag_key}] = {tag_idx_value}
                     AND timestamp >= toDateTime('{self.start_time}')
                     AND timestamp < toDateTime('{self.end_time}')
-                    GRANULARITY 1
+                    GRANULARITY 60
                     """
         response = self.app.post(
             SNQL_ROUTE,
@@ -308,7 +308,7 @@ def test_retrieval_basic(self) -> None:
                     AND metric_id = {self.metric_id}
                     AND timestamp >= toDateTime('{self.start_time}')
                     AND timestamp < toDateTime('{self.end_time}')
-                    GRANULARITY 1
+                    GRANULARITY 60
                     """
         response = self.app.post(
             SNQL_ROUTE,
@@ -333,7 +333,7 @@ def test_retrieval_percentiles(self) -> None:
                     AND metric_id = {self.metric_id}
                     AND timestamp >= toDateTime('{self.start_time}')
                     AND timestamp < toDateTime('{self.end_time}')
-                    GRANULARITY 1
+                    GRANULARITY 60
                     """
         response = self.app.post(
             SNQL_ROUTE,

From 8f25780ab1305bd12bcc5937591124b0f84b51d2 Mon Sep 17 00:00:00 2001
From: Oliver Newland <oliver.newland@sentry.io>
Date: Wed, 13 Jul 2022 14:53:05 -0700
Subject: [PATCH 2/5] add TimeSeriesProcessor

---
 snuba/datasets/entities/generic_metrics.py | 7 ++++++-
 1 file changed, 6 insertions(+), 1 deletion(-)

diff --git a/snuba/datasets/entities/generic_metrics.py b/snuba/datasets/entities/generic_metrics.py
index f8930515c5..6ae84407a6 100644
--- a/snuba/datasets/entities/generic_metrics.py
+++ b/snuba/datasets/entities/generic_metrics.py
@@ -31,6 +31,7 @@
 from snuba.pipeline.simple_pipeline import SimplePipelineBuilder
 from snuba.query.processors import QueryProcessor
 from snuba.query.processors.granularity_processor import MappedGranularityProcessor
+from snuba.query.processors.timeseries_processor import TimeSeriesProcessor
 from snuba.query.validation.validators import (
     EntityRequiredColumnValidator,
     QueryValidator,
@@ -98,7 +99,11 @@ def __init__(
         )
 
     def get_query_processors(self) -> Sequence[QueryProcessor]:
-        return [TagsTypeTransformer(), MappedGranularityProcessor()]
+        return [
+            TagsTypeTransformer(),
+            MappedGranularityProcessor(),
+            TimeSeriesProcessor({"bucketed_time": "timestamp"}, ("timestamp",)),
+        ]
 
 
 class GenericMetricsSetsEntity(GenericMetricsEntity):

From ff9551e0c1712d61894bb775a8b62681d3ea4b2d Mon Sep 17 00:00:00 2001
From: Oliver Newland <oliver.newland@sentry.io>
Date: Wed, 13 Jul 2022 15:24:49 -0700
Subject: [PATCH 3/5] added unit tests for mapped granularity processor

---
 snuba/datasets/entities/generic_metrics.py    | 11 ++-
 .../query/processors/granularity_processor.py | 30 ++++++--
 .../processors/test_granularity_processor.py  | 77 ++++++++++++++++++-
 3 files changed, 109 insertions(+), 9 deletions(-)

diff --git a/snuba/datasets/entities/generic_metrics.py b/snuba/datasets/entities/generic_metrics.py
index 6ae84407a6..661e7cc8bb 100644
--- a/snuba/datasets/entities/generic_metrics.py
+++ b/snuba/datasets/entities/generic_metrics.py
@@ -30,7 +30,11 @@
 )
 from snuba.pipeline.simple_pipeline import SimplePipelineBuilder
 from snuba.query.processors import QueryProcessor
-from snuba.query.processors.granularity_processor import MappedGranularityProcessor
+from snuba.query.processors.granularity_processor import (
+    DEFAULT_MAPPED_GRANULARITY_ENUM,
+    PERFORMANCE_GRANULARITIES,
+    MappedGranularityProcessor,
+)
 from snuba.query.processors.timeseries_processor import TimeSeriesProcessor
 from snuba.query.validation.validators import (
     EntityRequiredColumnValidator,
@@ -101,7 +105,10 @@ def __init__(
     def get_query_processors(self) -> Sequence[QueryProcessor]:
         return [
             TagsTypeTransformer(),
-            MappedGranularityProcessor(),
+            MappedGranularityProcessor(
+                accepted_granularities=PERFORMANCE_GRANULARITIES,
+                default_granularity_enum=DEFAULT_MAPPED_GRANULARITY_ENUM,
+            ),
             TimeSeriesProcessor({"bucketed_time": "timestamp"}, ("timestamp",)),
         ]
 
diff --git a/snuba/query/processors/granularity_processor.py b/snuba/query/processors/granularity_processor.py
index cb3aa9273c..730ec1ddb8 100644
--- a/snuba/query/processors/granularity_processor.py
+++ b/snuba/query/processors/granularity_processor.py
@@ -1,3 +1,5 @@
+from typing import Sequence, Tuple
+
 from snuba.datasets.metrics import DEFAULT_GRANULARITY
 from snuba.query.conditions import ConditionFunctions, binary_condition
 from snuba.query.exceptions import InvalidGranularityException
@@ -41,19 +43,35 @@ def process_query(self, query: Query, query_settings: QuerySettings) -> None:
         )
 
 
+PERFORMANCE_GRANULARITIES = [(60, 1), (3600, 2), (86400, 3)]
+DEFAULT_MAPPED_GRANULARITY_ENUM = 1
+
+
 class MappedGranularityProcessor(QueryProcessor):
-    DEFAULT_GRANULARITY_ENUM = 1
-    PERFORMANCE_GRANULARITIES = [(60, 1), (3600, 2), (86400, 3)]
-    """Use the granularity set on the query to filter on the granularity column"""
+    """
+    Use the granularity set on the query to filter on the granularity column,
+    supporting generic-metrics style enum mapping
+    """
+
+    def __init__(
+        self,
+        accepted_granularities: Sequence[Tuple[int, int]],
+        default_granularity_enum: int,
+    ):
+        self._accepted_granularities = accepted_granularities
+        self._available_granularities_values = [
+            x[0] for x in self._accepted_granularities
+        ]
+        self._default_granularity_enum = default_granularity_enum
 
     def __get_granularity(self, query: Query) -> int:
         """Find the best fitting granularity for this query"""
         requested_granularity = query.get_granularity()
 
         if requested_granularity is None:
-            return self.DEFAULT_GRANULARITY_ENUM
+            return self._default_granularity_enum
         elif requested_granularity > 0:
-            for (granularity, mapped_value) in reversed(self.PERFORMANCE_GRANULARITIES):
+            for (granularity, mapped_value) in reversed(self._accepted_granularities):
                 if (
                     requested_granularity % granularity == 0
                     or requested_granularity == mapped_value
@@ -61,7 +79,7 @@ def __get_granularity(self, query: Query) -> int:
                     return mapped_value
 
         raise InvalidGranularityException(
-            f"Granularity must be multiple of one of {GRANULARITIES_AVAILABLE}"
+            f"Granularity must be multiple of one of {self._available_granularities_values}"
         )
 
     def process_query(self, query: Query, query_settings: QuerySettings) -> None:
diff --git a/tests/query/processors/test_granularity_processor.py b/tests/query/processors/test_granularity_processor.py
index 607476ae34..f240d6a700 100644
--- a/tests/query/processors/test_granularity_processor.py
+++ b/tests/query/processors/test_granularity_processor.py
@@ -15,7 +15,12 @@
 from snuba.query.exceptions import InvalidGranularityException
 from snuba.query.expressions import Column, Literal
 from snuba.query.logical import Query
-from snuba.query.processors.granularity_processor import GranularityProcessor
+from snuba.query.processors.granularity_processor import (
+    DEFAULT_MAPPED_GRANULARITY_ENUM,
+    PERFORMANCE_GRANULARITIES,
+    GranularityProcessor,
+    MappedGranularityProcessor,
+)
 from snuba.query.query_settings import HTTPQuerySettings
 
 
@@ -83,3 +88,73 @@ def test_granularity_added(
             ),
             granularity=(requested_granularity),
         )
+
+
+@pytest.mark.parametrize(
+    "entity_key,column",
+    [
+        (EntityKey.GENERIC_METRICS_DISTRIBUTIONS, "percentiles"),
+        (EntityKey.GENERIC_METRICS_SETS, "value"),
+    ],
+)
+@pytest.mark.parametrize(
+    "requested_granularity, query_granularity",
+    [
+        (None, 1),
+        (10, None),
+        (60, 1),
+        (90, None),
+        (120, 1),
+        (60 * 60, 2),
+        (90 * 60, 1),
+        (120 * 60, 2),
+        (24 * 60 * 60, 3),
+        (32 * 60 * 60, 2),
+        (48 * 60 * 60, 3),
+        (13, None),
+        (0, None),
+        # valid enum values are pass-through
+        (2, 2),
+    ],
+)
+def test_granularity_enum_mapping(
+    entity_key: EntityKey,
+    column: str,
+    requested_granularity: Optional[int],
+    query_granularity: int,
+) -> None:
+    query = Query(
+        QueryEntity(entity_key, ColumnSet([])),
+        selected_columns=[SelectedExpression(column, Column(None, None, column))],
+        condition=binary_condition(
+            ConditionFunctions.EQ, Column(None, None, "metric_id"), Literal(None, 123)
+        ),
+        granularity=(requested_granularity),
+    )
+
+    try:
+        MappedGranularityProcessor(
+            accepted_granularities=PERFORMANCE_GRANULARITIES,
+            default_granularity_enum=DEFAULT_MAPPED_GRANULARITY_ENUM,
+        ).process_query(query, HTTPQuerySettings())
+    except InvalidGranularityException:
+        assert query_granularity is None
+    else:
+        assert query == Query(
+            QueryEntity(entity_key, ColumnSet([])),
+            selected_columns=[SelectedExpression(column, Column(None, None, column))],
+            condition=binary_condition(
+                BooleanFunctions.AND,
+                binary_condition(
+                    ConditionFunctions.EQ,
+                    Column(None, None, "granularity"),
+                    Literal(None, query_granularity),
+                ),
+                binary_condition(
+                    ConditionFunctions.EQ,
+                    Column(None, None, "metric_id"),
+                    Literal(None, 123),
+                ),
+            ),
+            granularity=(requested_granularity),
+        )

From b1a55286a1551084f10c9d0107d3891ecd4877e2 Mon Sep 17 00:00:00 2001
From: Oliver Newland <oliver.newland@sentry.io>
Date: Wed, 13 Jul 2022 16:07:01 -0700
Subject: [PATCH 4/5] test that time bucketing actually occurs

---
 tests/test_generic_metrics_api.py | 30 ++++++++++++++++++++++++++++++
 1 file changed, 30 insertions(+)

diff --git a/tests/test_generic_metrics_api.py b/tests/test_generic_metrics_api.py
index 64afb7269f..dd76f4dcaf 100644
--- a/tests/test_generic_metrics_api.py
+++ b/tests/test_generic_metrics_api.py
@@ -268,6 +268,9 @@ def setup_method(self, test_method: Any) -> None:
         self.end_time = (
             self.base_time + timedelta(seconds=self.count) + timedelta(seconds=10)
         )
+        self.hour_before_start_time = self.start_time - timedelta(hours=1)
+        self.hour_after_start_time = self.start_time + timedelta(hours=1)
+
         self.generate_dists()
 
     def generate_dists(self) -> None:
@@ -349,3 +352,30 @@ def test_retrieval_percentiles(self) -> None:
         assert aggregation["org_id"] == self.org_id
         assert aggregation["project_id"] == self.project_id
         assert aggregation["quants"] == [2.0, approx(4.0), approx(4.0), approx(4.0)]
+
+    def test_arbitrary_granularity(self) -> None:
+        query_str = f"""MATCH (generic_metrics_distributions)
+                    SELECT quantiles(0.5,0.9,0.95,0.99)(value) AS quants, min(bucketed_time) AS min_time
+                    BY project_id, org_id
+                    WHERE org_id = {self.org_id}
+                    AND project_id = {self.project_id}
+                    AND metric_id = {self.metric_id}
+                    AND timestamp >= toDateTime('{self.hour_before_start_time}')
+                    AND timestamp < toDateTime('{self.hour_after_start_time}')
+                    GRANULARITY 3600
+                    """
+        response = self.app.post(
+            SNQL_ROUTE,
+            data=json.dumps({"query": query_str, "dataset": "generic_metrics"}),
+        )
+        data = json.loads(response.data)
+
+        assert response.status_code == 200
+        assert len(data["data"]) == 1, data
+
+        aggregation = data["data"][0]
+        smallest_time_bucket = datetime.strptime(
+            aggregation["min_time"], "%Y-%m-%dT%H:%M:%S+00:00"
+        )
+        assert smallest_time_bucket.hour == 12
+        assert smallest_time_bucket.minute == 0

From 1b8f5ef9a66135106aa134dcb89517f702e15b4c Mon Sep 17 00:00:00 2001
From: Oliver Newland <oliver.newland@sentry.io>
Date: Thu, 14 Jul 2022 09:48:20 -0700
Subject: [PATCH 5/5] addressed PR comments

---
 .../query/processors/granularity_processor.py | 30 ++++++++++++-------
 .../processors/test_granularity_processor.py  |  2 --
 2 files changed, 19 insertions(+), 13 deletions(-)

diff --git a/snuba/query/processors/granularity_processor.py b/snuba/query/processors/granularity_processor.py
index 730ec1ddb8..2ea6baf50e 100644
--- a/snuba/query/processors/granularity_processor.py
+++ b/snuba/query/processors/granularity_processor.py
@@ -1,4 +1,4 @@
-from typing import Sequence, Tuple
+from typing import NamedTuple, Sequence
 
 from snuba.datasets.metrics import DEFAULT_GRANULARITY
 from snuba.query.conditions import ConditionFunctions, binary_condition
@@ -43,7 +43,16 @@ def process_query(self, query: Query, query_settings: QuerySettings) -> None:
         )
 
 
-PERFORMANCE_GRANULARITIES = [(60, 1), (3600, 2), (86400, 3)]
+class GranularityMapping(NamedTuple):
+    raw: int
+    enum_value: int
+
+
+PERFORMANCE_GRANULARITIES: Sequence[GranularityMapping] = [
+    GranularityMapping(60, 1),
+    GranularityMapping(3600, 2),
+    GranularityMapping(86400, 3),
+]
 DEFAULT_MAPPED_GRANULARITY_ENUM = 1
 
 
@@ -55,12 +64,14 @@ class MappedGranularityProcessor(QueryProcessor):
 
     def __init__(
         self,
-        accepted_granularities: Sequence[Tuple[int, int]],
+        accepted_granularities: Sequence[GranularityMapping],
         default_granularity_enum: int,
     ):
-        self._accepted_granularities = accepted_granularities
+        self._accepted_granularities = sorted(
+            accepted_granularities, key=lambda mapping: mapping.raw, reverse=True
+        )
         self._available_granularities_values = [
-            x[0] for x in self._accepted_granularities
+            mapping.raw for mapping in self._accepted_granularities
         ]
         self._default_granularity_enum = default_granularity_enum
 
@@ -71,12 +82,9 @@ def __get_granularity(self, query: Query) -> int:
         if requested_granularity is None:
             return self._default_granularity_enum
         elif requested_granularity > 0:
-            for (granularity, mapped_value) in reversed(self._accepted_granularities):
-                if (
-                    requested_granularity % granularity == 0
-                    or requested_granularity == mapped_value
-                ):
-                    return mapped_value
+            for mapping in self._accepted_granularities:
+                if requested_granularity % mapping.raw == 0:
+                    return mapping.enum_value
 
         raise InvalidGranularityException(
             f"Granularity must be multiple of one of {self._available_granularities_values}"
diff --git a/tests/query/processors/test_granularity_processor.py b/tests/query/processors/test_granularity_processor.py
index f240d6a700..67962d9162 100644
--- a/tests/query/processors/test_granularity_processor.py
+++ b/tests/query/processors/test_granularity_processor.py
@@ -113,8 +113,6 @@ def test_granularity_added(
         (48 * 60 * 60, 3),
         (13, None),
         (0, None),
-        # valid enum values are pass-through
-        (2, 2),
     ],
 )
 def test_granularity_enum_mapping(