From 7544baf4f1830679c89a0499d5cf7cf5e76623e5 Mon Sep 17 00:00:00 2001 From: Evan Hicks Date: Wed, 10 Apr 2024 12:08:55 -0400 Subject: [PATCH 1/3] feat(meta): Create distributions meta tables This creates the local table, distributed table and materialized view for the meta tables of the distributions cluster. --- snuba/migrations/group_loader.py | 1 + .../0036_distributions_meta_tables.py | 182 ++++++++++++++++++ 2 files changed, 183 insertions(+) create mode 100644 snuba/snuba_migrations/generic_metrics/0036_distributions_meta_tables.py diff --git a/snuba/migrations/group_loader.py b/snuba/migrations/group_loader.py index 76730fb425..ec6e28f43f 100644 --- a/snuba/migrations/group_loader.py +++ b/snuba/migrations/group_loader.py @@ -325,6 +325,7 @@ def get_migrations(self) -> Sequence[str]: "0033_counters_meta_tag_values_table", "0034_counters_meta_tag_values_table_mv", "0035_recreate_counters_meta_tag_value_table_mv", + "0036_distributions_meta_tables", ] diff --git a/snuba/snuba_migrations/generic_metrics/0036_distributions_meta_tables.py b/snuba/snuba_migrations/generic_metrics/0036_distributions_meta_tables.py new file mode 100644 index 0000000000..ef526c9530 --- /dev/null +++ b/snuba/snuba_migrations/generic_metrics/0036_distributions_meta_tables.py @@ -0,0 +1,182 @@ +from typing import Sequence + +from snuba.clickhouse.columns import AggregateFunction, Column, DateTime, String, UInt +from snuba.clusters.storage_sets import StorageSetKey +from snuba.migrations import migration, operations, table_engines +from snuba.migrations.columns import MigrationModifiers as Modifiers +from snuba.migrations.operations import OperationTarget +from snuba.utils.schemas import Float + + +class Migration(migration.ClickhouseNodeMigration): + blocking = False + granularity = "8192" + meta_view_name = "generic_metric_distributions_meta_mv" + meta_local_table_name = "generic_metric_distributions_meta_local" + meta_dist_table_name = "generic_metric_distributions_meta_dist" + meta_table_columns: Sequence[Column[Modifiers]] = [ + Column("org_id", UInt(64)), + Column("project_id", UInt(64)), + Column("use_case_id", String(Modifiers(low_cardinality=True))), + Column("metric_id", UInt(64)), + Column("tag_key", UInt(64)), + Column("timestamp", DateTime(modifiers=Modifiers(codecs=["DoubleDelta"]))), + Column("retention_days", UInt(16)), + Column("count", AggregateFunction("sum", [Float(64)])), + ] + + tag_value_view_name = "generic_metric_distributions_meta_tag_values_mv" + tag_value_local_table_name = "generic_metric_distributions_meta_tag_values_local" + tag_value_dist_table_name = "generic_metric_distributions_meta_tag_values_dist" + tag_value_table_columns: Sequence[Column[Modifiers]] = [ + Column("project_id", UInt(64)), + Column("metric_id", UInt(64)), + Column("tag_key", UInt(64)), + Column("tag_value", String()), + Column("timestamp", DateTime(modifiers=Modifiers(codecs=["DoubleDelta"]))), + Column("retention_days", UInt(16)), + Column("count", AggregateFunction("sum", [Float(64)])), + ] + + storage_set_key = StorageSetKey.GENERIC_METRICS_SETS + + def forwards_ops(self) -> Sequence[operations.SqlOperation]: + return [ + operations.CreateTable( + storage_set=self.storage_set_key, + table_name=self.meta_local_table_name, + engine=table_engines.AggregatingMergeTree( + storage_set=self.storage_set_key, + order_by="(org_id, project_id, use_case_id, metric_id, tag_key, timestamp)", + primary_key="(org_id, project_id, use_case_id, metric_id, tag_key, timestamp)", + partition_by="(retention_days, toMonday(timestamp))", + settings={"index_granularity": self.granularity}, + ttl="timestamp + toIntervalDay(retention_days)", + ), + columns=self.meta_table_columns, + target=OperationTarget.LOCAL, + ), + operations.CreateTable( + storage_set=self.storage_set_key, + table_name=self.meta_dist_table_name, + engine=table_engines.Distributed( + local_table_name=self.meta_local_table_name, sharding_key=None + ), + columns=self.meta_table_columns, + target=OperationTarget.DISTRIBUTED, + ), + operations.CreateMaterializedView( + storage_set=self.storage_set_key, + view_name=self.meta_view_name, + columns=self.meta_table_columns, + destination_table_name=self.meta_local_table_name, + target=OperationTarget.LOCAL, + query=""" + SELECT + org_id, + project_id, + use_case_id, + metric_id, + tag_key, + toStartOfWeek(timestamp) as timestamp, + retention_days, + sumState(count_value) as count + FROM generic_metric_distributions_raw_local + ARRAY JOIN tags.key AS tag_key + WHERE record_meta = 1 + GROUP BY + org_id, + project_id, + use_case_id, + metric_id, + tag_key, + timestamp, + retention_days + """, + ), + operations.CreateTable( + storage_set=self.storage_set_key, + table_name=self.tag_value_local_table_name, + engine=table_engines.AggregatingMergeTree( + storage_set=self.storage_set_key, + order_by="(project_id, metric_id, tag_key, tag_value, timestamp)", + primary_key="(project_id, metric_id, tag_key, tag_value, timestamp)", + partition_by="(retention_days, toMonday(timestamp))", + settings={"index_granularity": self.granularity}, + ttl="timestamp + toIntervalDay(retention_days)", + ), + columns=self.tag_value_table_columns, + target=OperationTarget.LOCAL, + ), + operations.CreateTable( + storage_set=self.storage_set_key, + table_name=self.tag_value_dist_table_name, + engine=table_engines.Distributed( + local_table_name=self.tag_value_local_table_name, sharding_key=None + ), + columns=self.tag_value_table_columns, + target=OperationTarget.DISTRIBUTED, + ), + operations.CreateMaterializedView( + storage_set=self.storage_set_key, + view_name=self.tag_value_view_name, + columns=self.tag_value_table_columns, + destination_table_name=self.tag_value_local_table_name, + target=OperationTarget.LOCAL, + query=""" + SELECT + project_id, + metric_id, + tag_key, + tag_value, + toStartOfWeek(timestamp) as timestamp, + retention_days, + sumState(count_value) as count + FROM generic_metric_distributions_raw_local + ARRAY JOIN + tags.key AS tag_key, tags.raw_value AS tag_value + WHERE record_meta = 1 + GROUP BY + project_id, + metric_id, + tag_key, + tag_value, + timestamp, + retention_days + """, + ), + ] + + def backwards_ops(self) -> Sequence[operations.SqlOperation]: + return [ + operations.DropTable( + storage_set=self.storage_set_key, + table_name=self.tag_value_view_name, + target=OperationTarget.LOCAL, + ), + operations.DropTable( + storage_set=self.storage_set_key, + table_name=self.tag_value_dist_table_name, + target=OperationTarget.DISTRIBUTED, + ), + operations.DropTable( + storage_set=self.storage_set_key, + table_name=self.tag_value_local_table_name, + target=OperationTarget.LOCAL, + ), + operations.DropTable( + storage_set=self.storage_set_key, + table_name=self.meta_view_name, + target=OperationTarget.LOCAL, + ), + operations.DropTable( + storage_set=self.storage_set_key, + table_name=self.meta_dist_table_name, + target=OperationTarget.DISTRIBUTED, + ), + operations.DropTable( + storage_set=self.storage_set_key, + table_name=self.meta_local_table_name, + target=OperationTarget.LOCAL, + ), + ] From 4cdaee8e28b4ea1dec1df83be8344a0a86740bd3 Mon Sep 17 00:00:00 2001 From: Evan Hicks Date: Wed, 10 Apr 2024 13:56:33 -0400 Subject: [PATCH 2/3] fix storage set --- .../generic_metrics/0036_distributions_meta_tables.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/snuba/snuba_migrations/generic_metrics/0036_distributions_meta_tables.py b/snuba/snuba_migrations/generic_metrics/0036_distributions_meta_tables.py index ef526c9530..58905ac23b 100644 --- a/snuba/snuba_migrations/generic_metrics/0036_distributions_meta_tables.py +++ b/snuba/snuba_migrations/generic_metrics/0036_distributions_meta_tables.py @@ -38,7 +38,7 @@ class Migration(migration.ClickhouseNodeMigration): Column("count", AggregateFunction("sum", [Float(64)])), ] - storage_set_key = StorageSetKey.GENERIC_METRICS_SETS + storage_set_key = StorageSetKey.GENERIC_METRICS_DISTRIBUTIONS def forwards_ops(self) -> Sequence[operations.SqlOperation]: return [ From fc5b7576fd03b23e7307bac71bdf28c1beca6f35 Mon Sep 17 00:00:00 2001 From: Evan Hicks Date: Tue, 23 Apr 2024 12:16:08 -0400 Subject: [PATCH 3/3] use 0045 --- snuba/migrations/group_loader.py | 2 +- ...butions_meta_tables.py => 0045_distributions_meta_tables.py} | 0 2 files changed, 1 insertion(+), 1 deletion(-) rename snuba/snuba_migrations/generic_metrics/{0042_distributions_meta_tables.py => 0045_distributions_meta_tables.py} (100%) diff --git a/snuba/migrations/group_loader.py b/snuba/migrations/group_loader.py index 8bdb614ef3..e7f6f679be 100644 --- a/snuba/migrations/group_loader.py +++ b/snuba/migrations/group_loader.py @@ -335,7 +335,7 @@ def get_migrations(self) -> Sequence[str]: "0039_add_record_meta_column_gauges", "0040_remove_counters_meta_tables", "0041_adjust_partitioning_meta_tables", - "0042_distributions_meta_tables", + "0045_distributions_meta_tables", ] diff --git a/snuba/snuba_migrations/generic_metrics/0042_distributions_meta_tables.py b/snuba/snuba_migrations/generic_metrics/0045_distributions_meta_tables.py similarity index 100% rename from snuba/snuba_migrations/generic_metrics/0042_distributions_meta_tables.py rename to snuba/snuba_migrations/generic_metrics/0045_distributions_meta_tables.py