From d9cdce9a987e67830c1c1970925c239fcb570b75 Mon Sep 17 00:00:00 2001 From: Volo Kluev Date: Thu, 25 Apr 2024 13:23:59 -0700 Subject: [PATCH] remove ConditionSimplifierProcessor --- .../entities/discover_transactions.yaml | 1 - .../transactions/entities/transactions.yaml | 1 - .../logical/condition_simplifier_processor.py | 83 ------------------- .../logical/low_cardinality_processor.py | 9 +- snuba/settings/__init__.py | 1 - snuba/settings/settings_test.py | 1 - tests/test_snql_api.py | 48 ----------- 7 files changed, 2 insertions(+), 142 deletions(-) delete mode 100644 snuba/query/processors/logical/condition_simplifier_processor.py diff --git a/snuba/datasets/configuration/discover/entities/discover_transactions.yaml b/snuba/datasets/configuration/discover/entities/discover_transactions.yaml index 26c776b231..51be0fd1e3 100644 --- a/snuba/datasets/configuration/discover/entities/discover_transactions.yaml +++ b/snuba/datasets/configuration/discover/entities/discover_transactions.yaml @@ -327,7 +327,6 @@ query_processors: - processor: BasicFunctionsProcessor - processor: ApdexProcessor - processor: FailureRateProcessor - - processor: ConditionSimplifierProcessor - processor: LowCardinalityProcessor args: columns: diff --git a/snuba/datasets/configuration/transactions/entities/transactions.yaml b/snuba/datasets/configuration/transactions/entities/transactions.yaml index d93027d6d2..88ef4a0cfc 100644 --- a/snuba/datasets/configuration/transactions/entities/transactions.yaml +++ b/snuba/datasets/configuration/transactions/entities/transactions.yaml @@ -359,7 +359,6 @@ query_processors: { processor: BasicFunctionsProcessor }, { processor: ApdexProcessor }, { processor: FailureRateProcessor }, - { processor: ConditionSimplifierProcessor }, { processor: LowCardinalityProcessor, args: diff --git a/snuba/query/processors/logical/condition_simplifier_processor.py b/snuba/query/processors/logical/condition_simplifier_processor.py deleted file mode 100644 index b397c706af..0000000000 --- a/snuba/query/processors/logical/condition_simplifier_processor.py +++ /dev/null @@ -1,83 +0,0 @@ -from snuba import state -from snuba.query.conditions import ( - combine_and_conditions, - get_first_level_and_conditions, - is_condition, -) -from snuba.query.expressions import Column, Expression, FunctionCall -from snuba.query.logical import Query -from snuba.query.processors.logical import LogicalQueryProcessor -from snuba.query.query_settings import QuerySettings - -_LOW_CARDINALITY_COLUMNS = set( - [ - "transaction_name", - "transaction_op", - "platform", - "environment", - "release", - "dist", - "sdk_name", - "sdk_version", - "http_method", - "type", - "app_start_type", - "transaction_source", - "version", - ] -) - - -class ConditionSimplifierProcessor(LogicalQueryProcessor): - """ - 22.8 has some problems when dealing with certain conditions on strings. - Specifically, a condition with multiple values on a string column, - e.g. release IN tuple('a', 'b', 'c') - will cause an error. - THis processor does two things: if the rhs is a single value, change the condition - to be equals(). Otherwise, flip the sides using the `has` operator. - So has(tuple('a', 'b', 'c'), release) instead of in(release, tuple('a', 'b', 'c')) - """ - - def process_query(self, query: Query, query_settings: QuerySettings) -> None: - def transform_cond(exp: Expression) -> Expression: - if not is_condition(exp): - return exp - - assert isinstance(exp, FunctionCall) - if not exp.function_name == "in" or len(exp.parameters) != 2: - return exp - - rhs = exp.parameters[1] - if not isinstance(rhs, FunctionCall) or rhs.function_name not in ( - "tuple", - "array", - ): - return exp - - lhs = exp.parameters[0] - if ( - not isinstance(lhs, Column) - or lhs.column_name not in _LOW_CARDINALITY_COLUMNS - ): - return exp - - if len(rhs.parameters) == 1: - return FunctionCall( - exp.alias, "equals", (exp.parameters[0], rhs.parameters[0]) - ) - - # `has` requires an array, so convert everything to an array - return FunctionCall( - exp.alias, - "has", - (FunctionCall(None, "array", rhs.parameters), exp.parameters[0]), - ) - - if state.get_int_config("use.condition.simplifier.processor", 1) == 0: - return - condition = query.get_condition() - if condition: - conditions = get_first_level_and_conditions(condition) - new_conditions = [transform_cond(c) for c in conditions] - query.set_ast_condition(combine_and_conditions(new_conditions)) diff --git a/snuba/query/processors/logical/low_cardinality_processor.py b/snuba/query/processors/logical/low_cardinality_processor.py index ed4a72a524..79833cbb83 100644 --- a/snuba/query/processors/logical/low_cardinality_processor.py +++ b/snuba/query/processors/logical/low_cardinality_processor.py @@ -1,6 +1,6 @@ from dataclasses import replace -from snuba import settings, state +from snuba import state from snuba.query.expressions import Column, Expression, FunctionCall, Literal from snuba.query.logical import Query from snuba.query.processors.logical import LogicalQueryProcessor @@ -41,12 +41,7 @@ def transform_expressions(exp: Expression) -> Expression: ), ) - if ( - state.get_int_config( - "use.low.cardinality.processor", settings.USE_CARDINALITY_CASTER - ) - == 0 - ): + if state.get_int_config("use.low.cardinality.processor", 1) == 0: return query.transform_expressions(transform_expressions) diff --git a/snuba/settings/__init__.py b/snuba/settings/__init__.py index 21d30ecccb..e869911feb 100644 --- a/snuba/settings/__init__.py +++ b/snuba/settings/__init__.py @@ -435,7 +435,6 @@ class RedisClusters(TypedDict): # yaml file as well because we validate them. By skipping these steps in production environments # we save ~2s on startup time VALIDATE_DATASET_YAMLS_ON_STARTUP = False -USE_CARDINALITY_CASTER = 0 USE_NEW_QUERY_PIPELINE_SAMPLE_RATE = 0.0 TRY_NEW_QUERY_PIPELINE_SAMPLE_RATE = 0.0 diff --git a/snuba/settings/settings_test.py b/snuba/settings/settings_test.py index 19d3bcd82d..94ad4feefa 100644 --- a/snuba/settings/settings_test.py +++ b/snuba/settings/settings_test.py @@ -70,4 +70,3 @@ VALIDATE_DATASET_YAMLS_ON_STARTUP = True USE_NEW_QUERY_PIPELINE_SAMPLE_RATE = 1.0 TRY_NEW_QUERY_PIPELINE_SAMPLE_RATE = 1.0 -USE_CARDINALITY_CASTER = 1 diff --git a/tests/test_snql_api.py b/tests/test_snql_api.py index 402c46cf12..55c030c2f6 100644 --- a/tests/test_snql_api.py +++ b/tests/test_snql_api.py @@ -1391,54 +1391,6 @@ def test_hexint_in_condition(self) -> None: data = json.loads(response.data) assert "9533608433997996441" in data["sql"], data["sql"] # Hexint was applied - def test_allocator_clickhouse_bug(self) -> None: - response = self.post( - "/discover/snql", - data=json.dumps( - { - "dataset": "events", - "query": f""" - MATCH (discover) - SELECT - divide(count(), divide(320519.0, 60)) AS `epm`, - quantile(0.5)(duration) AS `p50`, - countIf(notIn(transaction_status, tuple(2, 0, 1))) AS `failure_count`, - transaction BY transaction - WHERE - type = 'transaction' - AND release IN tuple('1123581321345589') - AND environment IN array('prod', 'dev') - AND timestamp >= toDateTime('{self.base_time.isoformat()}') - AND timestamp < toDateTime('{self.next_time.isoformat()}') - AND project_id IN tuple({self.project_id}) - HAVING - countIf(notIn(transaction_status, tuple(2, 0, 1))) AS `failure_count` > 0.0 - ORDER BY - countIf(notIn(transaction_status, tuple(2, 0, 1))) AS `failure_count` DESC - LIMIT - 6 OFFSET 0""", - "legacy": True, - "app_id": "legacy", - "tenant_ids": { - "organization_id": self.org_id, - "referrer": "join.tag.test", - }, - "parent_api": "/api/0/issues|groups/{issue_id}/tags/", - } - ), - ) - - assert response.status_code == 200 - data = json.loads(response.data) - assert ( - "equals((cast(release, 'Nullable(String)') AS _snuba_release), '1123581321345589')" - in data["sql"] - ) - assert ( - "has(['prod', 'dev'], (cast(environment, 'Nullable(String)') AS _snuba_environment))" - in data["sql"] - ) - def test_low_cardinality_processor(self) -> None: response = self.post( "/discover/snql",