From c4a80c66097f19f22ca7dce46f98827d1d71f102 Mon Sep 17 00:00:00 2001 From: Simon Hellmayr Date: Fri, 24 May 2024 14:34:57 +0200 Subject: [PATCH 1/7] feat(metrics): add wildcard support to MQL parser --- snuba/query/mql/parser.py | 62 ++++++++++++++++++++++++++++++++++----- 1 file changed, 55 insertions(+), 7 deletions(-) diff --git a/snuba/query/mql/parser.py b/snuba/query/mql/parser.py index e141e27edb..e3021f0f0c 100644 --- a/snuba/query/mql/parser.py +++ b/snuba/query/mql/parser.py @@ -341,15 +341,35 @@ def visit_filter_factor( if isinstance(factor, FunctionCall): # If we have a parenthesized expression, we just return it. return factor + condition_op, lhs, _, _, _, rhs = factor condition_op_value = ( "!" if len(condition_op) == 1 and condition_op[0] == "!" else "" ) + + contains_wildcard, rhs = rhs + + if contains_wildcard: + if not condition_op_value: + op = ConditionFunctions.LIKE + elif condition_op_value == "!": + op = ConditionFunctions.NOT_LIKE + + return FunctionCall( + None, + op, + ( + Column(None, None, lhs[0]), + Literal(None, rhs), + ), + ) + if isinstance(rhs, list): if not condition_op_value: op = ConditionFunctions.IN elif condition_op_value == "!": op = ConditionFunctions.NOT_IN + return FunctionCall( None, op, @@ -430,10 +450,36 @@ def visit_tag_key(self, node: Node, children: Sequence[Any]) -> str: return node.text def visit_tag_value( - self, node: Node, children: Sequence[Sequence[str]] - ) -> Union[str, Sequence[str]]: - tag_value = children[0] - return tag_value + self, node: Node, children: Sequence[tuple[bool, Union[str, Sequence[str]]]] + ) -> tuple[bool, Union[str, Sequence[str]]]: + contains_wildcard, tag_value = children[0] + return contains_wildcard, tag_value + + def visit_quoted_suffix_wildcard_tag_value( + self, node: Node, children: Sequence[Any] + ) -> tuple[bool, str]: + _, text_before_wildcard, _, _ = children + rhs = f"{text_before_wildcard}%" + return True, rhs + + def visit_suffix_wildcard_tag_value( + self, node: Node, children: Sequence[Any] + ) -> tuple[bool, str]: + text_before_wildcard, _ = children + rhs = f"{text_before_wildcard}%" + return True, rhs + + def visit_quoted_string_filter( + self, node: Node, children: Sequence[Any] + ) -> tuple[bool, str]: + text = str(node.text[1:-1]) + match = text.replace('\\"', '"') + return False, match + + def visit_unquoted_string_filter( + self, node: Node, children: Sequence[Any] + ) -> tuple[bool, str]: + return False, str(node.text) def visit_unquoted_string(self, node: Node, children: Sequence[Any]) -> str: assert isinstance(node.text, str) @@ -444,9 +490,11 @@ def visit_quoted_string(self, node: Node, children: Sequence[Any]) -> str: match = str(node.text[1:-1]).replace('\\"', '"') return match - def visit_string_tuple(self, node: Node, children: Sequence[Any]) -> Sequence[str]: + def visit_string_tuple( + self, node: Node, children: Sequence[Any] + ) -> tuple[bool, Sequence[str]]: _, _, first, zero_or_more_others, _, _ = children - return [first[0], *(v[0] for _, _, _, v in zero_or_more_others)] + return False, [first[0], *(v[0] for _, _, _, v in zero_or_more_others)] def visit_group_by_name(self, node: Node, children: Sequence[Any]) -> str: assert isinstance(node.text, str) @@ -1047,7 +1095,7 @@ def populate_query_from_mql_context( def quantiles_to_quantile( - query: Union[CompositeQuery[LogicalDataSource], LogicalQuery] + query: Union[CompositeQuery[LogicalDataSource], LogicalQuery], ) -> None: """ Changes quantiles(0.5)(...) to arrayElement(quantiles(0.5)(...), 1). This is to simplify From d73aec6f5359b4bd5cc8adec40a5b41d45f464c8 Mon Sep 17 00:00:00 2001 From: Simon Hellmayr Date: Mon, 10 Jun 2024 15:53:29 +0200 Subject: [PATCH 2/7] add FilterFactorValue in accordance with snuba-sdk --- snuba/query/mql/parser.py | 42 ++++++++++++++++++++++++--------------- 1 file changed, 26 insertions(+), 16 deletions(-) diff --git a/snuba/query/mql/parser.py b/snuba/query/mql/parser.py index e3021f0f0c..f0b4cbb95a 100644 --- a/snuba/query/mql/parser.py +++ b/snuba/query/mql/parser.py @@ -7,6 +7,7 @@ import sentry_sdk from parsimonious.exceptions import IncompleteParseError from parsimonious.nodes import Node, NodeVisitor +from snuba_sdk import BooleanCondition, Condition from snuba_sdk.metrics_visitors import AGGREGATE_ALIAS from snuba_sdk.mql.mql import MQL_GRAMMAR @@ -342,12 +343,13 @@ def visit_filter_factor( # If we have a parenthesized expression, we just return it. return factor - condition_op, lhs, _, _, _, rhs = factor + condition_op, lhs, _, _, _, filter_factor_value = factor condition_op_value = ( "!" if len(condition_op) == 1 and condition_op[0] == "!" else "" ) - contains_wildcard, rhs = rhs + contains_wildcard = filter_factor_value.contains_wildcard + rhs = filter_factor_value.value if contains_wildcard: if not condition_op_value: @@ -450,36 +452,36 @@ def visit_tag_key(self, node: Node, children: Sequence[Any]) -> str: return node.text def visit_tag_value( - self, node: Node, children: Sequence[tuple[bool, Union[str, Sequence[str]]]] - ) -> tuple[bool, Union[str, Sequence[str]]]: - contains_wildcard, tag_value = children[0] - return contains_wildcard, tag_value + self, node: Node, children: Sequence[FilterFactorValue] + ) -> FilterFactorValue: + filter_factor_value = children[0] + return filter_factor_value def visit_quoted_suffix_wildcard_tag_value( self, node: Node, children: Sequence[Any] - ) -> tuple[bool, str]: + ) -> FilterFactorValue: _, text_before_wildcard, _, _ = children rhs = f"{text_before_wildcard}%" - return True, rhs + return FilterFactorValue(rhs, True) def visit_suffix_wildcard_tag_value( self, node: Node, children: Sequence[Any] - ) -> tuple[bool, str]: + ) -> FilterFactorValue: text_before_wildcard, _ = children rhs = f"{text_before_wildcard}%" - return True, rhs + return FilterFactorValue(rhs, True) def visit_quoted_string_filter( self, node: Node, children: Sequence[Any] - ) -> tuple[bool, str]: + ) -> FilterFactorValue: text = str(node.text[1:-1]) match = text.replace('\\"', '"') - return False, match + return FilterFactorValue(match, False) def visit_unquoted_string_filter( self, node: Node, children: Sequence[Any] - ) -> tuple[bool, str]: - return False, str(node.text) + ) -> FilterFactorValue: + return FilterFactorValue(str(node.text), False) def visit_unquoted_string(self, node: Node, children: Sequence[Any]) -> str: assert isinstance(node.text, str) @@ -492,9 +494,11 @@ def visit_quoted_string(self, node: Node, children: Sequence[Any]) -> str: def visit_string_tuple( self, node: Node, children: Sequence[Any] - ) -> tuple[bool, Sequence[str]]: + ) -> FilterFactorValue: _, _, first, zero_or_more_others, _, _ = children - return False, [first[0], *(v[0] for _, _, _, v in zero_or_more_others)] + return FilterFactorValue( + [first[0], *(v[0] for _, _, _, v in zero_or_more_others)], False + ) def visit_group_by_name(self, node: Node, children: Sequence[Any]) -> str: assert isinstance(node.text, str) @@ -1257,3 +1261,9 @@ def _process_data( _post_process(query, VALIDATORS) return query + + +@dataclass +class FilterFactorValue(object): + value: str | Sequence[str] | Condition | BooleanCondition + contains_wildcard: bool From a99b6a27e4955564000ed6ba65ef4811d523de06 Mon Sep 17 00:00:00 2001 From: Simon Hellmayr Date: Tue, 11 Jun 2024 12:58:32 +0200 Subject: [PATCH 3/7] add wildcard awareness to mql parser and tests --- snuba/query/mql/parser.py | 1 + tests/query/parser/test_parser.py | 208 ++++++++++++++++++++++++++++++ 2 files changed, 209 insertions(+) diff --git a/snuba/query/mql/parser.py b/snuba/query/mql/parser.py index f0b4cbb95a..0bbb84cc5a 100644 --- a/snuba/query/mql/parser.py +++ b/snuba/query/mql/parser.py @@ -352,6 +352,7 @@ def visit_filter_factor( rhs = filter_factor_value.value if contains_wildcard: + rhs = rhs[:-1] + "%" if not condition_op_value: op = ConditionFunctions.LIKE elif condition_op_value == "!": diff --git a/tests/query/parser/test_parser.py b/tests/query/parser/test_parser.py index bf3722da87..20d233cde9 100644 --- a/tests/query/parser/test_parser.py +++ b/tests/query/parser/test_parser.py @@ -160,6 +160,214 @@ def test_mql() -> None: assert eq, reason +def test_mql_wildcards() -> None: + mql = 'sum(`d:transactions/duration@millisecond`){mytag:"before_wildcard_*"}' + context = { + "start": "2021-01-01T00:00:00", + "end": "2021-01-02T00:00:00", + "rollup": { + "orderby": "ASC", + "granularity": 60, + "interval": None, + "with_totals": None, + }, + "scope": { + "org_ids": [1], + "project_ids": [1], + "use_case_id": "transactions", + }, + "limit": None, + "offset": None, + "indexer_mappings": { + "d:transactions/duration@millisecond": 123456, + "mytag": 42, + }, + } + expected = Query( + QueryEntity( + EntityKey.GENERIC_METRICS_DISTRIBUTIONS, + get_entity(EntityKey.GENERIC_METRICS_DISTRIBUTIONS).get_data_model(), + ), + selected_columns=[ + SelectedExpression( + "aggregate_value", + FunctionCall( + "_snuba_aggregate_value", + "sum", + (Column("_snuba_value", None, "value"),), + ), + ), + ], + groupby=[], + condition=and_cond( + and_cond( + and_cond( + f.equals( + column("granularity", None, "_snuba_granularity"), literal(60) + ), + in_cond( + column("project_id", None, "_snuba_project_id"), + f.tuple(literal(1)), + ), + ), + and_cond( + in_cond( + column("org_id", None, "_snuba_org_id"), f.tuple(literal(1)) + ), + f.equals( + column("use_case_id", None, "_snuba_use_case_id"), + literal("transactions"), + ), + ), + ), + and_cond( + and_cond( + f.greaterOrEquals( + column("timestamp", None, "_snuba_timestamp"), + literal(datetime(2021, 1, 1, 0, 0)), + ), + f.less( + column("timestamp", None, "_snuba_timestamp"), + literal(datetime(2021, 1, 2, 0, 0)), + ), + ), + and_cond( + f.equals( + column("metric_id", None, "_snuba_metric_id"), literal(123456) + ), + f.like(tags_raw["42"], literal("before_wildcard_%")), + ), + ), + ), + order_by=[ + OrderBy( + OrderByDirection.ASC, + FunctionCall( + alias="_snuba_aggregate_value", + function_name="sum", + parameters=( + ( + Column( + alias="_snuba_value", + table_name=None, + column_name="value", + ), + ) + ), + ), + ), + ], + limit=1000, + ) + actual = parse_mql_query(mql, context, get_dataset("generic_metrics")) + eq, reason = actual.equals(expected) + assert eq, reason + + +def test_mql_negated_wildcards() -> None: + mql = 'sum(`d:transactions/duration@millisecond`){!mytag:"before_wildcard_*"}' + context = { + "start": "2021-01-01T00:00:00", + "end": "2021-01-02T00:00:00", + "rollup": { + "orderby": "ASC", + "granularity": 60, + "interval": None, + "with_totals": None, + }, + "scope": { + "org_ids": [1], + "project_ids": [1], + "use_case_id": "transactions", + }, + "limit": None, + "offset": None, + "indexer_mappings": { + "d:transactions/duration@millisecond": 123456, + "mytag": 42, + }, + } + expected = Query( + QueryEntity( + EntityKey.GENERIC_METRICS_DISTRIBUTIONS, + get_entity(EntityKey.GENERIC_METRICS_DISTRIBUTIONS).get_data_model(), + ), + selected_columns=[ + SelectedExpression( + "aggregate_value", + FunctionCall( + "_snuba_aggregate_value", + "sum", + (Column("_snuba_value", None, "value"),), + ), + ), + ], + groupby=[], + condition=and_cond( + and_cond( + and_cond( + f.equals( + column("granularity", None, "_snuba_granularity"), literal(60) + ), + in_cond( + column("project_id", None, "_snuba_project_id"), + f.tuple(literal(1)), + ), + ), + and_cond( + in_cond( + column("org_id", None, "_snuba_org_id"), f.tuple(literal(1)) + ), + f.equals( + column("use_case_id", None, "_snuba_use_case_id"), + literal("transactions"), + ), + ), + ), + and_cond( + and_cond( + f.greaterOrEquals( + column("timestamp", None, "_snuba_timestamp"), + literal(datetime(2021, 1, 1, 0, 0)), + ), + f.less( + column("timestamp", None, "_snuba_timestamp"), + literal(datetime(2021, 1, 2, 0, 0)), + ), + ), + and_cond( + f.equals( + column("metric_id", None, "_snuba_metric_id"), literal(123456) + ), + f.notLike(tags_raw["42"], literal("before_wildcard_%")), + ), + ), + ), + order_by=[ + OrderBy( + OrderByDirection.ASC, + FunctionCall( + alias="_snuba_aggregate_value", + function_name="sum", + parameters=( + ( + Column( + alias="_snuba_value", + table_name=None, + column_name="value", + ), + ) + ), + ), + ), + ], + limit=1000, + ) + actual = parse_mql_query(mql, context, get_dataset("generic_metrics")) + eq, reason = actual.equals(expected) + assert eq, reason + + def test_formula_mql() -> None: mql_context = { "entity": "generic_metrics_distributions", From 977c1255256060c0a7c2c90f615062ae426fbeee Mon Sep 17 00:00:00 2001 From: Simon Hellmayr Date: Thu, 11 Jul 2024 10:53:39 +0200 Subject: [PATCH 4/7] bump snuba-sdk to 2.0.35 --- requirements.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/requirements.txt b/requirements.txt index acfb6d0328..a69d4c2593 100644 --- a/requirements.txt +++ b/requirements.txt @@ -28,7 +28,7 @@ sentry-redis-tools==0.3.0 sentry-relay==0.8.44 sentry-sdk==1.40.5 simplejson==3.17.6 -snuba-sdk==2.0.34 +snuba-sdk==2.0.35 structlog==22.3.0 structlog-sentry==2.0.0 sql-metadata==2.6.0 From 323a51cdf3acc16a7ab4bf605c5e76cdefed2bab Mon Sep 17 00:00:00 2001 From: Simon Hellmayr Date: Thu, 11 Jul 2024 14:45:45 +0200 Subject: [PATCH 5/7] fix typing issues --- snuba/query/mql/parser.py | 13 ++++++++++--- 1 file changed, 10 insertions(+), 3 deletions(-) diff --git a/snuba/query/mql/parser.py b/snuba/query/mql/parser.py index 0bbb84cc5a..908657a6a4 100644 --- a/snuba/query/mql/parser.py +++ b/snuba/query/mql/parser.py @@ -2,7 +2,7 @@ import logging from dataclasses import dataclass, replace -from typing import Any, Callable, Dict, Optional, Sequence, Tuple, Union +from typing import Any, Callable, Dict, Optional, Sequence, Tuple, Union, cast import sentry_sdk from parsimonious.exceptions import IncompleteParseError @@ -336,14 +336,20 @@ def visit_filter_term(self, node: Node, children: Sequence[Any]) -> Any: def visit_filter_factor( self, node: Node, - children: Tuple[Sequence[Union[str, Sequence[str]]] | FunctionCall, Any], + children: Tuple[ + Sequence[Union[str, Sequence[str], FilterFactorValue]] | FunctionCall, Any + ], ) -> FunctionCall: factor, *_ = children if isinstance(factor, FunctionCall): # If we have a parenthesized expression, we just return it. return factor - condition_op, lhs, _, _, _, filter_factor_value = factor + condition_op: str = str(factor[0]) + lhs: str = str(factor[1]) + filter_factor_value: FilterFactorValue = cast(FilterFactorValue, factor[4]) + + # condition_op, _, _, _, filter_factor_value = factor condition_op_value = ( "!" if len(condition_op) == 1 and condition_op[0] == "!" else "" ) @@ -352,6 +358,7 @@ def visit_filter_factor( rhs = filter_factor_value.value if contains_wildcard: + rhs = cast(str, rhs) rhs = rhs[:-1] + "%" if not condition_op_value: op = ConditionFunctions.LIKE From e4f8a842fd6024ecf2501c0ec0eb3f9b18455650 Mon Sep 17 00:00:00 2001 From: Simon Hellmayr Date: Thu, 11 Jul 2024 16:38:57 +0200 Subject: [PATCH 6/7] fix typing issues --- snuba/query/mql/parser.py | 15 +++++++-------- 1 file changed, 7 insertions(+), 8 deletions(-) diff --git a/snuba/query/mql/parser.py b/snuba/query/mql/parser.py index 908657a6a4..868e18843e 100644 --- a/snuba/query/mql/parser.py +++ b/snuba/query/mql/parser.py @@ -2,7 +2,7 @@ import logging from dataclasses import dataclass, replace -from typing import Any, Callable, Dict, Optional, Sequence, Tuple, Union, cast +from typing import Any, Callable, Dict, Optional, Sequence, Tuple, Union import sentry_sdk from parsimonious.exceptions import IncompleteParseError @@ -337,7 +337,7 @@ def visit_filter_factor( self, node: Node, children: Tuple[ - Sequence[Union[str, Sequence[str], FilterFactorValue]] | FunctionCall, Any + Sequence[str | Sequence[str] | FilterFactorValue] | FunctionCall, Any ], ) -> FunctionCall: factor, *_ = children @@ -345,11 +345,11 @@ def visit_filter_factor( # If we have a parenthesized expression, we just return it. return factor - condition_op: str = str(factor[0]) - lhs: str = str(factor[1]) - filter_factor_value: FilterFactorValue = cast(FilterFactorValue, factor[4]) + condition_op: str + lhs: str + filter_factor_value: FilterFactorValue - # condition_op, _, _, _, filter_factor_value = factor + condition_op, lhs, _, _, _, filter_factor_value = factor # type: ignore condition_op_value = ( "!" if len(condition_op) == 1 and condition_op[0] == "!" else "" ) @@ -357,8 +357,7 @@ def visit_filter_factor( contains_wildcard = filter_factor_value.contains_wildcard rhs = filter_factor_value.value - if contains_wildcard: - rhs = cast(str, rhs) + if contains_wildcard and isinstance(rhs, str): rhs = rhs[:-1] + "%" if not condition_op_value: op = ConditionFunctions.LIKE From 6f9b8d49b3fc8e301117a7e0678ed3c3f1331edc Mon Sep 17 00:00:00 2001 From: Simon Hellmayr Date: Fri, 12 Jul 2024 09:22:06 +0200 Subject: [PATCH 7/7] add support for wildcards in parser_supported_join --- snuba/query/mql/parser_supported_join.py | 75 +++++++++++++++++++++--- 1 file changed, 67 insertions(+), 8 deletions(-) diff --git a/snuba/query/mql/parser_supported_join.py b/snuba/query/mql/parser_supported_join.py index 314ff414ed..1bd2dffc93 100644 --- a/snuba/query/mql/parser_supported_join.py +++ b/snuba/query/mql/parser_supported_join.py @@ -63,6 +63,7 @@ start_end_time_condition, ) from snuba.query.mql.mql_context import MQLContext +from snuba.query.mql.parser import FilterFactorValue from snuba.query.parser.exceptions import ParsingException from snuba.query.processors.logical.filter_in_select_optimizer import ( FilterInSelectOptimizer, @@ -283,21 +284,49 @@ def visit_filter_term(self, node: Node, children: Sequence[Any]) -> Any: def visit_filter_factor( self, node: Node, - children: Tuple[Sequence[Union[str, Sequence[str]]] | FunctionCall, Any], + children: Tuple[ + Sequence[str | Sequence[str] | FilterFactorValue] | FunctionCall, Any + ], ) -> FunctionCall: factor, *_ = children if isinstance(factor, FunctionCall): # If we have a parenthesized expression, we just return it. return factor - condition_op, lhs, _, _, _, rhs = factor + + condition_op: str + lhs: str + filter_factor_value: FilterFactorValue + + condition_op, lhs, _, _, _, filter_factor_value = factor # type: ignore condition_op_value = ( "!" if len(condition_op) == 1 and condition_op[0] == "!" else "" ) + + contains_wildcard = filter_factor_value.contains_wildcard + rhs = filter_factor_value.value + + if contains_wildcard and isinstance(rhs, str): + rhs = rhs[:-1] + "%" + if not condition_op_value: + op = ConditionFunctions.LIKE + elif condition_op_value == "!": + op = ConditionFunctions.NOT_LIKE + + return FunctionCall( + None, + op, + ( + Column(None, None, lhs[0]), + Literal(None, rhs), + ), + ) + if isinstance(rhs, list): if not condition_op_value: op = ConditionFunctions.IN elif condition_op_value == "!": op = ConditionFunctions.NOT_IN + return FunctionCall( None, op, @@ -378,10 +407,36 @@ def visit_tag_key(self, node: Node, children: Sequence[Any]) -> str: return node.text def visit_tag_value( - self, node: Node, children: Sequence[Sequence[str]] - ) -> Union[str, Sequence[str]]: - tag_value = children[0] - return tag_value + self, node: Node, children: Sequence[FilterFactorValue] + ) -> FilterFactorValue: + filter_factor_value = children[0] + return filter_factor_value + + def visit_quoted_suffix_wildcard_tag_value( + self, node: Node, children: Sequence[Any] + ) -> FilterFactorValue: + _, text_before_wildcard, _, _ = children + rhs = f"{text_before_wildcard}%" + return FilterFactorValue(rhs, True) + + def visit_suffix_wildcard_tag_value( + self, node: Node, children: Sequence[Any] + ) -> FilterFactorValue: + text_before_wildcard, _ = children + rhs = f"{text_before_wildcard}%" + return FilterFactorValue(rhs, True) + + def visit_quoted_string_filter( + self, node: Node, children: Sequence[Any] + ) -> FilterFactorValue: + text = str(node.text[1:-1]) + match = text.replace('\\"', '"') + return FilterFactorValue(match, False) + + def visit_unquoted_string_filter( + self, node: Node, children: Sequence[Any] + ) -> FilterFactorValue: + return FilterFactorValue(str(node.text), False) def visit_unquoted_string(self, node: Node, children: Sequence[Any]) -> str: assert isinstance(node.text, str) @@ -392,9 +447,13 @@ def visit_quoted_string(self, node: Node, children: Sequence[Any]) -> str: match = str(node.text[1:-1]).replace('\\"', '"') return match - def visit_string_tuple(self, node: Node, children: Sequence[Any]) -> Sequence[str]: + def visit_string_tuple( + self, node: Node, children: Sequence[Any] + ) -> FilterFactorValue: _, _, first, zero_or_more_others, _, _ = children - return [first[0], *(v[0] for _, _, _, v in zero_or_more_others)] + return FilterFactorValue( + [first[0], *(v[0] for _, _, _, v in zero_or_more_others)], False + ) def visit_group_by_name(self, node: Node, children: Sequence[Any]) -> str: assert isinstance(node.text, str)