Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 0 additions & 8 deletions .github/workflows/job_test_api_local.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -30,8 +30,6 @@ jobs:
uses: ./.github/actions/setup-go
with:
github_token: ${{ secrets.GITHUB_TOKEN }}
- name: Install Goose
run: go install github.com/pressly/goose/v3/cmd/goose@latest
- name: Build
run: pnpm turbo run build --filter=./apps/api...
env:
Expand All @@ -42,12 +40,6 @@ jobs:
env:
DRIZZLE_DATABASE_URL: "mysql://unkey:password@localhost:3306/unkey"
CI: 1
- name: Migrate ClickHouse
run: goose up
env:
GOOSE_DRIVER: clickhouse
GOOSE_DBSTRING: "tcp://default:password@127.0.0.1:9000"
GOOSE_MIGRATION_DIR: ./internal/clickhouse/schema
- name: Test
run: pnpm vitest run -c vitest.integration.ts --bail=1
working-directory: apps/api
Expand Down
6 changes: 0 additions & 6 deletions .github/workflows/job_test_unit.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -15,12 +15,6 @@ jobs:
run: |
sudo chmod 666 /var/run/docker.sock
docker version
- name: Install goose
run: |
wget -qO- https://github.com/pressly/goose/releases/download/v3.20.0/goose_linux_x86_64 > /tmp/goose
chmod +x /tmp/goose
sudo mv /tmp/goose /usr/local/bin/goose
goose --version
- name: Setup Node
uses: ./.github/actions/setup-node
with:
Expand Down
12 changes: 0 additions & 12 deletions Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -12,18 +12,6 @@ down:
up: down build
docker compose -f ./deployment/docker-compose.yaml up -d

migrate-clickhouse:
@export GOOSE_DRIVER=clickhouse && \
export GOOSE_DBSTRING="tcp://default:password@127.0.0.1:9000" && \
export GOOSE_MIGRATION_DIR=./internal/clickhouse/schema && \
goose up

migrate-clickhouse-reset:
@export GOOSE_DRIVER=clickhouse && \
export GOOSE_DBSTRING="tcp://default:password@127.0.0.1:9000" && \
export GOOSE_MIGRATION_DIR=./internal/clickhouse/schema && \
goose down-to 0

integration: up
@cd apps/api && \
$(MAKE) seed && \
Expand Down
12 changes: 0 additions & 12 deletions apps/engineering/content/docs/contributing/testing.mdx
Original file line number Diff line number Diff line change
Expand Up @@ -25,15 +25,3 @@ Full end to end tests require a running API either locally or in a deployed envi
UNKEY_ROOT_KEY=
pnpm test:integration
```


### Clickhouse Tests

`/internal/clickhouse` is where we store our ClickHouse queries. To run tests locally, please install [goose](https://pressly.github.io/goose).

To install `goose`:

```bash
go install github.com/pressly/goose/v4/cmd/goose@latest
```

8 changes: 8 additions & 0 deletions deployment/Dockerfile.clickhouse
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
FROM bitnami/clickhouse:25.6.4

# Copy ClickHouse schemas
COPY go/pkg/clickhouse/schema/databases/ /opt/clickhouse-schemas/

# Create initialization script that will execute our SQL files on first run
# (script is already made executable on host)
COPY deployment/init-clickhouse.sh /docker-entrypoint-initdb.d/init-clickhouse.sh
56 changes: 44 additions & 12 deletions deployment/docker-compose.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -17,15 +17,23 @@ services:
volumes:
- mysql:/var/lib/mysql
healthcheck:
test: ["CMD", "mysqladmin", "ping", "-h", "localhost", "-u", "root", "-proot"]
test:
["CMD", "mysqladmin", "ping", "-h", "localhost", "-u", "root", "-proot"]
timeout: 20s
retries: 10
start_period: 40s
interval: 10s
planetscale:
container_name: planetscale
image: ghcr.io/mattrobenolt/ps-http-sim:v0.0.12
command: ["-listen-port=3900", "-mysql-dbname=unkey", "-mysql-addr=mysql", "-mysql-max-rows=100000", "-mysql-idle-timeout=1s"]
command:
[
"-listen-port=3900",
"-mysql-dbname=unkey",
"-mysql-addr=mysql",
"-mysql-max-rows=100000",
"-mysql-idle-timeout=1s",
]
depends_on:
mysql:
condition: service_healthy
Expand Down Expand Up @@ -102,8 +110,10 @@ services:
VAULT_MASTER_KEYS: "Ch9rZWtfMmdqMFBJdVhac1NSa0ZhNE5mOWlLSnBHenFPENTt7an5MRogENt9Si6wms4pQ2XIvqNSIgNpaBenJmXgcInhu6Nfv2U="
CLICKHOUSE_URL: "clickhouse://default:password@clickhouse:9000"
clickhouse:
image: bitnami/clickhouse:25.6.4
container_name: clickhouse
build:
context: ..
dockerfile: deployment/Dockerfile.clickhouse
environment:
CLICKHOUSE_ADMIN_USER: default
CLICKHOUSE_ADMIN_PASSWORD: password
Expand All @@ -113,18 +123,23 @@ services:
volumes:
- clickhouse:/bitnami/clickhouse
healthcheck:
test: ["CMD", "clickhouse-client", "--host", "localhost", "--user", "default", "--password", "password", "--query", "SELECT 1"]
test:
[
"CMD",
"clickhouse-client",
"--host",
"localhost",
"--user",
"default",
"--password",
"password",
"--query",
"SELECT 1",
]
timeout: 10s
retries: 10
start_period: 30s
interval: 5s
clickhouse_migrator:
container_name: clickhouse_migrator
build:
context: ../internal/clickhouse
dockerfile: ./Dockerfile
depends_on:
- clickhouse
s3:
container_name: s3
image: bitnami/minio:2025.4.3
Expand All @@ -151,7 +166,24 @@ services:
dockerfile: ./apps/api/Dockerfile.dev
ports:
- 8787:8787
command: ["pnpm", "wrangler", "dev", "--ip=0.0.0.0", "--var=VERSION:1", "--var=DATABASE_HOST:planetscale:3900", "--var=DATABASE_USERNAME:unkey", "--var=DATABASE_PASSWORD:password", "--var=AGENT_URL:http://agent:8080", "--var=AGENT_TOKEN:agent-auth-secret", "--var=EMIT_METRICS_LOGS:false", "--var=SYNC_RATELIMIT_ON_NO_DATA:1.0", "--var=CLICKHOUSE_URL:http://default:password@clickhouse:8123", "--var=CLICKHOUSE_INSERT_URL:http://apiv2:7070", "--var=CLICKHOUSE_PROXY_TOKEN:chproxy-test-token-123"]
command:
[
"pnpm",
"wrangler",
"dev",
"--ip=0.0.0.0",
"--var=VERSION:1",
"--var=DATABASE_HOST:planetscale:3900",
"--var=DATABASE_USERNAME:unkey",
"--var=DATABASE_PASSWORD:password",
"--var=AGENT_URL:http://agent:8080",
"--var=AGENT_TOKEN:agent-auth-secret",
"--var=EMIT_METRICS_LOGS:false",
"--var=SYNC_RATELIMIT_ON_NO_DATA:1.0",
"--var=CLICKHOUSE_URL:http://default:password@clickhouse:8123",
"--var=CLICKHOUSE_INSERT_URL:http://apiv2:7070",
"--var=CLICKHOUSE_PROXY_TOKEN:chproxy-test-token-123",
]
depends_on:
- planetscale
- agent
Expand Down
25 changes: 25 additions & 0 deletions deployment/init-clickhouse.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,25 @@
#!/bin/bash
set -e

echo "Initializing ClickHouse schemas..."

# Execute SQL files in order from our schemas directory
for db_dir in /opt/clickhouse-schemas/*/; do
if [ -d "$db_dir" ]; then
echo "Processing database directory: $db_dir"

# Execute SQL files in numerical order
for sql_file in "$db_dir"*.sql; do
if [ -f "$sql_file" ] && [[ "$sql_file" == *.sql ]]; then
echo "Executing: $sql_file"

if ! clickhouse-client --host localhost --user "$CLICKHOUSE_ADMIN_USER" --password "$CLICKHOUSE_ADMIN_PASSWORD" --queries-file "$sql_file"; then
echo "Error executing $sql_file - stopping initialization"
exit 1
fi
Comment on lines +16 to +19
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

🧹 Nitpick (assertive)

Surface clearer errors and avoid leaking secrets

  • Add a explicit check for required env vars before running.
  • Avoid echoing credentials on failure. The current code doesn’t echo them, which is good; keep it that way.
+if [[ -z "${CLICKHOUSE_ADMIN_USER:-}" || -z "${CLICKHOUSE_ADMIN_PASSWORD:-}" ]]; then
+  echo "CLICKHOUSE_ADMIN_USER/CLICKHOUSE_ADMIN_PASSWORD must be set" >&2
+  exit 1
+fi

I can wire this near the top if you prefer.

📝 Committable suggestion

‼️ IMPORTANT
Carefully review the code before committing. Ensure that it accurately replaces the highlighted code, contains no missing lines, and has no issues with indentation. Thoroughly test & benchmark the code to ensure it meets the requirements.

Suggested change
if ! clickhouse-client --host localhost --user "$CLICKHOUSE_ADMIN_USER" --password "$CLICKHOUSE_ADMIN_PASSWORD" --queries-file "$sql_file"; then
echo "Error executing $sql_file - stopping initialization"
exit 1
fi
# At the top of deployment/init-clickhouse.sh, ensure credentials are set
if [[ -z "${CLICKHOUSE_ADMIN_USER:-}" || -z "${CLICKHOUSE_ADMIN_PASSWORD:-}" ]]; then
echo "CLICKHOUSE_ADMIN_USER/CLICKHOUSE_ADMIN_PASSWORD must be set" >&2
exit 1
fi
if ! clickhouse-client --host localhost --user "$CLICKHOUSE_ADMIN_USER" --password "$CLICKHOUSE_ADMIN_PASSWORD" --queries-file "$sql_file"; then
echo "Error executing $sql_file - stopping initialization"
exit 1
fi
🤖 Prompt for AI Agents
In deployment/init-clickhouse.sh around lines 16 to 19, add an explicit
pre-check at the top of the script for required env vars (at minimum
CLICKHOUSE_ADMIN_USER and CLICKHOUSE_ADMIN_PASSWORD, optionally
CLICKHOUSE_HOST/CLICKHOUSE_PORT) and exit with a generic error if any are
missing; do not print or interpolate the secret values in error messages or
logs, and ensure the clickhouse-client invocation is not run under shell tracing
(avoid set -x) so credentials are never echoed on failure.

fi
done
fi
done

echo "ClickHouse schema initialization complete!"
11 changes: 0 additions & 11 deletions go/Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -25,17 +25,6 @@ pull:

up: pull
@docker compose -f ../deployment/docker-compose.yaml up -d planetscale mysql redis clickhouse s3 otel
@echo "Starting ClickHouse migrations (will retry if ClickHouse isn't ready)..."
@for i in {1..10}; do \
echo "Migration attempt $$i..."; \
if docker compose -f ../deployment/docker-compose.yaml run --rm clickhouse_migrator; then \
echo "Migrations completed successfully!"; \
break; \
else \
echo "Migration failed, retrying in 5 seconds..."; \
sleep 5; \
fi; \
done

clean:
@docker compose -f ../deployment/docker-compose.yaml down --volumes
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,45 @@
CREATE TABLE IF NOT EXISTS verifications.raw_key_verifications_v2(
-- the api request id, so we can correlate the verification with traces and logs
request_id String,

-- unix milli
time Int64 CODEC(Delta, LZ4),
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

🧹 Nitpick (assertive)

Consider DateTime64 for time to avoid repeated conversions

Storing time as DateTime64(3) can reduce repeated fromUnixTimestamp64Milli() conversions in PARTITION BY/TTL and downstream MVs.

-    time Int64 CODEC(Delta, LZ4),
+    time DateTime64(3) CODEC(Delta, LZ4),

This implies adjusting ingestion to write DateTime64(3) and updating PARTITION BY/TTL accordingly.

📝 Committable suggestion

‼️ IMPORTANT
Carefully review the code before committing. Ensure that it accurately replaces the highlighted code, contains no missing lines, and has no issues with indentation. Thoroughly test & benchmark the code to ensure it meets the requirements.

Suggested change
time Int64 CODEC(Delta, LZ4),
time DateTime64(3) CODEC(Delta, LZ4),
🤖 Prompt for AI Agents
In
go/pkg/clickhouse/schema/databases/001_verifications/002_raw_key_verifications_v2.sql
around line 6, the time column is defined as "time Int64 CODEC(Delta, LZ4)";
change it to DateTime64(3) to avoid repeated fromUnixTimestamp64Milli()
conversions, update ingestion pipelines to write timestamps as DateTime64(3)
(milliseconds precision), and adjust any PARTITION BY and TTL expressions and
downstream materialized views to use the DateTime64(3) column type (and revise
functions that previously expected Int64 unixms to use the native DateTime64
functions).


workspace_id String,
key_space_id String,
-- Empty string if the key has no identity
identity_id String,
key_id String,

-- Right now this is a 3 character airport code, but when we move to aws,
-- this will be the region code such as `us-east-1`
region LowCardinality(String),

-- Examples:
-- - "VALID"
-- - "RATE_LIMITED"
-- - "EXPIRED"
-- - "DISABLED
outcome LowCardinality(String),


tags Array(String) DEFAULT [],

Comment on lines +26 to +27
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

🧹 Nitpick (assertive)

Canonicalize tags array

Same concern as aggregated tables: ensure a stable order (e.g., sort at ingestion) to avoid spurious distinctions in downstream aggregates that include tags in GROUP BY.

🤖 Prompt for AI Agents
go/pkg/clickhouse/schema/databases/001_verifications/002_raw_key_verifications_v2.sql
around lines 26-27: the tags column is defined as Array(String) DEFAULT [] but
currently not canonicalized which can cause spurious distinctions in GROUP BY;
ensure tags are stored in a stable order by sorting (and optionally
deduplicating) them at ingestion or via the table definition — update the
insert/ingestion pipeline to apply an ArraySort (and ArrayDistinct if desired)
to the tags value before writing, or add a materialized/DEFAULT expression that
stores tags as a sorted (and deduplicated) array so downstream aggregates remain
deterministic.

-- The number of credits spent on this verification
-- 0 means no credits were spent
spent_credits Int64,

-- Latency in milliseconds for this verification
latency Float64
Comment on lines +32 to +33
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

🧹 Nitpick (assertive)

Latency unit and numeric type

Latency is documented as milliseconds; Float64 is fine. If you plan to store integers (ms) only, consider UInt32 to save space and use quantileTDigest on toFloat64(latency) in MVs. Otherwise, keep Float64 but document whether sub-millisecond precision is expected.

🤖 Prompt for AI Agents
In
go/pkg/clickhouse/schema/databases/001_verifications/002_raw_key_verifications_v2.sql
around lines 32-33, the column definition documents latency as milliseconds but
uses Float64; decide whether sub-millisecond precision is required — if latency
will be recorded as integer milliseconds change the column type to UInt32 to
save space and update any materialized views to compute quantileTDigest using
toFloat64(latency); if sub-millisecond precision is needed keep Float64 but add
a comment in the schema clarifying that sub-millisecond values are expected and
ensure MV queries treat it as Float64.



)
ENGINE = MergeTree()
PARTITION BY toYYYYMM(fromUnixTimestamp64Milli(time))
ORDER BY (workspace_id, time, key_space_id, identity_id, key_id)
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

🧹 Nitpick (assertive)

Validate ORDER BY against query patterns

Primary key is (workspace_id, time, key_space_id, identity_id, key_id). If common queries filter by workspace_id + time range, this is good. If region/outcome are often used in filters before identity_id/key_id, consider including them in PK suffix or adding appropriate skip indexes.

Share typical WHERE clauses; I can propose a PK and secondary index layout aligned to real workloads.

TTL fromUnixTimestamp64Milli(time) + INTERVAL 100 DAY
SETTINGS non_replicated_deduplication_window = 10000
Comment on lines +38 to +41
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

🧹 Nitpick (assertive)

Deduplication setting may not be effective without enabling non-replicated dedup

non_replicated_deduplication_window works when non-replicated dedup is enabled. Confirm your server config sets use_non_replicated_mergetree_deduplication=1, or set it per table if permissible.

If dedup of repeated inserts by request_id is required, consider ReplacingMergeTree with a version column or enforce dedup at ingestion.

🤖 Prompt for AI Agents
In
go/pkg/clickhouse/schema/databases/001_verifications/002_raw_key_verifications_v2.sql
around lines 38 to 41, the table sets non_replicated_deduplication_window but
that has no effect unless non-replicated deduplication is enabled; either ensure
the ClickHouse server config enables
use_non_replicated_mergetree_deduplication=1 (verify and document this), or add
the per-table setting use_non_replicated_mergetree_deduplication=1 if your
ClickHouse version supports it; alternatively, if request_id-based dedupe is
required, switch to ReplacingMergeTree with a version column or deduplicate at
ingestion rather than relying on non-replicated deduplication.

;

ALTER TABLE verifications.raw_key_verifications_v2
ADD INDEX IF NOT EXISTS idx_request_id (request_id) TYPE minmax GRANULARITY 1;
Comment on lines +44 to +45
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

🛠️ Refactor suggestion

Use a Bloom filter skip index for request_id instead of minmax

minmax over String provides little selectivity for equality lookups. Bloom filter indexes are the standard choice for tracing-style IDs.

-ALTER TABLE verifications.raw_key_verifications_v2
-ADD INDEX IF NOT EXISTS idx_request_id (request_id) TYPE minmax GRANULARITY 1;
+ALTER TABLE verifications.raw_key_verifications_v2
+ADD INDEX IF NOT EXISTS idx_request_id (request_id) TYPE bloom_filter GRANULARITY 4;

Optionally add a secondary index on outcome for common filters:

ALTER TABLE verifications.raw_key_verifications_v2
ADD INDEX IF NOT EXISTS idx_outcome (outcome) TYPE set(100) GRANULARITY 4;
🤖 Prompt for AI Agents
In
go/pkg/clickhouse/schema/databases/001_verifications/002_raw_key_verifications_v2.sql
around lines 44-45, the ALTER TABLE adds a minmax index on request_id which is
ineffective for equality lookups; change the index to a Bloom filter skip index
for request_id (use an appropriate false-positive rate and granularity for your
workload) so equality/trace-id queries are selective, and optionally add a
secondary SET index on outcome (with an appropriate cardinality and GRANULARITY)
for common outcome filters.

Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
CREATE TABLE IF NOT EXISTS verifications.key_verifications_per_minute_v2
(
time DateTime,
workspace_id String,
key_space_id String,
identity_id String,
key_id String,
outcome LowCardinality(String),
tags Array(String),
count Int64,
spent_credits Int64,
latency_avg Float64,
latency_p75 Float64,
latency_p99 Float64
)
ENGINE = SummingMergeTree()
ORDER BY (workspace_id, time, key_space_id, identity_id, key_id, tags, outcome)
Comment on lines +3 to +17
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

🛠️ Refactor suggestion

⚠️ Potential issue

Use AggregatingMergeTree with stateful metrics; avoid summing finalized averages/quantiles.

Same issue as ratelimits: SummingMergeTree will corrupt latency_* in merges. Also switch counters to unsigned and consider a normalized key for tags.

Proposed DDL:

 CREATE TABLE IF NOT EXISTS verifications.key_verifications_per_minute_v2
 (
   time          DateTime,
   workspace_id  String,
   key_space_id  String,
   identity_id   String,
   key_id        String,
   outcome       LowCardinality(String),
-  tags          Array(String),
-  count         Int64,
-  spent_credits Int64,
-  latency_avg   Float64,
-  latency_p75   Float64,
-  latency_p99   Float64
+  tags          Array(String),
+  tags_hash     UInt64 MATERIALIZED cityHash64(arrayStringConcat(arraySort(tags))),
+  count         SimpleAggregateFunction(sum, UInt64),
+  spent_credits SimpleAggregateFunction(sum, UInt64),
+  latency_avg_state AggregateFunction(avg, Float64),
+  latency_p75_state AggregateFunction(quantileTDigest(0.75), Float64),
+  latency_p99_state AggregateFunction(quantileTDigest(0.99), Float64)
 )
-ENGINE = SummingMergeTree()
-ORDER BY (workspace_id, time, key_space_id, identity_id, key_id, tags, outcome)
+ENGINE = AggregatingMergeTree()
+PARTITION BY toYYYYMM(time)
+ORDER BY (workspace_id, time, key_space_id, identity_id, key_id, tags_hash, outcome)
 ;
📝 Committable suggestion

‼️ IMPORTANT
Carefully review the code before committing. Ensure that it accurately replaces the highlighted code, contains no missing lines, and has no issues with indentation. Thoroughly test & benchmark the code to ensure it meets the requirements.

Suggested change
time DateTime,
workspace_id String,
key_space_id String,
identity_id String,
key_id String,
outcome LowCardinality(String),
tags Array(String),
count Int64,
spent_credits Int64,
latency_avg Float64,
latency_p75 Float64,
latency_p99 Float64
)
ENGINE = SummingMergeTree()
ORDER BY (workspace_id, time, key_space_id, identity_id, key_id, tags, outcome)
CREATE TABLE IF NOT EXISTS verifications.key_verifications_per_minute_v2
(
time DateTime,
workspace_id String,
key_space_id String,
identity_id String,
key_id String,
outcome LowCardinality(String),
tags Array(String),
tags_hash UInt64 MATERIALIZED cityHash64(arrayStringConcat(arraySort(tags))),
count SimpleAggregateFunction(sum, UInt64),
spent_credits SimpleAggregateFunction(sum, UInt64),
latency_avg_state AggregateFunction(avg, Float64),
latency_p75_state AggregateFunction(quantileTDigest(0.75), Float64),
latency_p99_state AggregateFunction(quantileTDigest(0.99), Float64)
)
ENGINE = AggregatingMergeTree()
PARTITION BY toYYYYMM(time)
ORDER BY (workspace_id, time, key_space_id, identity_id, key_id, tags_hash, outcome)
;
🤖 Prompt for AI Agents
In
go/pkg/clickhouse/schema/databases/001_verifications/003_key_verifications_per_minute_v2.sql
around lines 3-17, replace the SummingMergeTree with an AggregatingMergeTree and
change types to use stateful aggregates and unsigned counters: convert count and
spent_credits to UInt64, replace latency_avg/latency_p75/latency_p99 with
appropriate AggregateFunction states (e.g., AggregateFunction(avgState, Float64)
and AggregateFunction(quantilesState, Float64) or quantileState variants), stop
storing finalized averages/quantiles as Float64, and adjust the ORDER BY to
avoid putting raw tags there (either remove tags from ORDER BY or use a
normalized tag key) — ensure the DDL stores aggregation states so merges compute
metrics correctly and consider normalizing tags (or moving to a tag mapping
table) before including them in the primary key.

;
Original file line number Diff line number Diff line change
@@ -1,6 +1,5 @@
-- +goose up
CREATE MATERIALIZED VIEW IF NOT EXISTS verifications.key_verifications_per_minute_mv_v1
TO verifications.key_verifications_per_minute_v1
CREATE MATERIALIZED VIEW IF NOT EXISTS verifications.key_verifications_per_minute_mv_v2
TO verifications.key_verifications_per_minute_v2
AS
SELECT
workspace_id,
Expand All @@ -9,9 +8,13 @@ SELECT
key_id,
outcome,
count(*) as count,
sum(spent_credits) as spent_credits,
avg(latency) as latency_avg,
quantileTDigest(0.75)(latency) as latency_p75,
quantileTDigest(0.99)(latency) as latency_p99,
Comment on lines +11 to +14
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

💡 Verification agent

🧩 Analysis chain

Emit aggregate states instead of finalized metrics; avoid “count” as a column name.

  • avg()/quantileTDigest() produce finalized values inappropriate for downstream rollups; emit states so higher-level tables (hour/day/month) can aggregate correctly.
  • Prefer alias “total” over “count” to avoid confusion with the SQL function and improve readability (ensure the target table column name matches).

Apply:

-  count(*) as count,
-  sum(spent_credits) as spent_credits,
-  avg(latency) as latency_avg,
-  quantileTDigest(0.75)(latency) as latency_p75,
-  quantileTDigest(0.99)(latency) as latency_p99,
+  count(*)                                     AS total,
+  sum(spent_credits)                           AS spent_credits,
+  avgState(latency)                            AS latency_avg_state,
+  quantileTDigestState(latency)                AS latency_tdigest_state,

Ensure verifications.key_verifications_per_minute_v2 uses AggregatingMergeTree with:

  • total, spent_credits as SimpleAggregateFunction(sum, …)
  • latency_avg_state as AggregateFunction(avg, Float64)
  • latency_tdigest_state as AggregateFunction(quantileTDigest, Float64)

Also applies to: 17-17


🏁 Script executed:

#!/bin/bash
set -euo pipefail

# Display the first 200 lines of the view definition
echo "=== File head (lines 1–200) ==="
sed -n '1,200p' go/pkg/clickhouse/schema/databases/001_verifications/004_key_verifications_per_minute_mv_v2.sql || true

# Show all ENGINE definitions in the file
echo
echo "=== ENGINE definitions ==="
grep -n -i "ENGINE" go/pkg/clickhouse/schema/databases/001_verifications/004_key_verifications_per_minute_mv_v2.sql || true

# Check for AggregatingMergeTree usage
echo
echo "=== AggregatingMergeTree occurrences ==="
grep -n -i "AggregatingMergeTree" go/pkg/clickhouse/schema/databases/001_verifications/004_key_verifications_per_minute_mv_v2.sql || true

# Show the exact column list where aggregate functions are declared
echo
echo "=== Aggregate column definitions around the snippet ==="
grep -n -C3 -E "sum\\(spent_credits\\)|avg\\(latency\\)|quantileTDigest" go/pkg/clickhouse/schema/databases/001_verifications/004_key_verifications_per_minute_mv_v2.sql || true

Length of output: 1885


🏁 Script executed:

#!/bin/bash
set -euo pipefail

echo "=== Searching for target table creation in 001_verifications ==="
grep -R -n "CREATE TABLE.*key_verifications_per_minute_v2" go/pkg/clickhouse/schema/databases/001_verifications || true

echo
echo "=== Searching for any reference to key_verifications_per_minute_v2 in entire schema directory ==="
grep -R -n "key_verifications_per_minute_v2" go/pkg/clickhouse/schema || true

Length of output: 1038


🏁 Script executed:

#!/bin/bash
set -euo pipefail

echo "=== 003_key_verifications_per_minute_v2 table definition (first 200 lines) ==="
sed -n '1,200p' go/pkg/clickhouse/schema/databases/001_verifications/003_key_verifications_per_minute_v2.sql || true

Length of output: 795


Update MV and table to emit aggregate states and use AggregatingMergeTree

The current materialized view is emitting finalized metrics (avg(), quantileTDigest()) into a SummingMergeTree, which prevents correct downstream rollups. We need to:

  • In 004_key_verifications_per_minute_mv_v2.sql (the MV):
    • Rename counttotal
    • Replace avg(latency)avgState(latency)
    • Replace both quantileTDigest(…) calls → quantileTDigestState(latency)
  • In 003_key_verifications_per_minute_v2.sql (the target table):
    • Change count Int64total SimpleAggregateFunction(count, UInt64) (or SimpleAggregateFunction(sum, UInt64) if summing the states)
    • Change spent_credits Int64spent_credits SimpleAggregateFunction(sum, Int64)
    • Change latency_avg Float64latency_avg_state AggregateFunction(avg, Float64)
    • Change latency_p75 Float64 and latency_p99 Float64latency_tdigest_state AggregateFunction(quantileTDigest, Float64)
    • Switch ENGINE = SummingMergeTree()ENGINE = AggregatingMergeTree()

Locations:

  • go/pkg/clickhouse/schema/databases/001_verifications/004_key_verifications_per_minute_mv_v2.sql (lines 10–14)
  • go/pkg/clickhouse/schema/databases/001_verifications/003_key_verifications_per_minute_v2.sql (column definitions and ENGINE clause)
🤖 Prompt for AI Agents
In
go/pkg/clickhouse/schema/databases/001_verifications/004_key_verifications_per_minute_mv_v2.sql
around lines 10–14 and in
go/pkg/clickhouse/schema/databases/001_verifications/003_key_verifications_per_minute_v2.sql
(column defs and ENGINE clause), update the MV to emit aggregate states and
switch the target table to AggregatingMergeTree: in the MV rename count → total,
change avg(latency) → avgState(latency), and replace quantileTDigest(...) →
quantileTDigestState(latency); in the target table change count Int64 → total
SimpleAggregateFunction(count, UInt64) (or SimpleAggregateFunction(sum, UInt64)
if summing states), spent_credits Int64 → spent_credits
SimpleAggregateFunction(sum, Int64), latency_avg Float64 → latency_avg_state
AggregateFunction(avg, Float64), latency_p75/latency_p99 Float64 →
latency_tdigest_state AggregateFunction(quantileTDigest, Float64), and change
ENGINE = SummingMergeTree() to ENGINE = AggregatingMergeTree().

toStartOfMinute(fromUnixTimestamp64Milli(time)) AS time,
tags
FROM verifications.raw_key_verifications_v1
FROM verifications.raw_key_verifications_v2
GROUP BY
workspace_id,
key_space_id,
Expand All @@ -20,7 +23,4 @@ GROUP BY
outcome,
time,
tags
;

-- +goose down
DROP VIEW verifications.key_verifications_per_minute_mv_v1;
;
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
CREATE TABLE IF NOT EXISTS verifications.key_verifications_per_hour_v2
(
time DateTime,
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

🧹 Nitpick (assertive)

Consider DateTime('UTC') for stored bucket timestamps

If all bucketing is meant to be UTC, declare time as DateTime('UTC') for clarity and to prevent accidental timezone conversion.

-  time          DateTime,
+  time          DateTime('UTC'),
📝 Committable suggestion

‼️ IMPORTANT
Carefully review the code before committing. Ensure that it accurately replaces the highlighted code, contains no missing lines, and has no issues with indentation. Thoroughly test & benchmark the code to ensure it meets the requirements.

Suggested change
time DateTime,
time DateTime('UTC'),
🤖 Prompt for AI Agents
In
go/pkg/clickhouse/schema/databases/001_verifications/005_key_verifications_per_hour_v2.sql
around line 3, the column "time" is declared as DateTime without an explicit
timezone; if bucketing is intended to be UTC, change the column type to
DateTime('UTC') to make UTC semantics explicit and avoid accidental timezone
conversion—update the column definition to use DateTime('UTC') and verify any
insert/selects or materialized views still handle UTC values consistently.

workspace_id String,
key_space_id String,
identity_id String,
key_id String,
outcome LowCardinality(String),
tags Array(String),
count Int64,
spent_credits Int64,
latency_avg Float64,
latency_p75 Float64,
latency_p99 Float64
Comment on lines +12 to +14
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

⚠️ Potential issue

latency_avg/p75/p99 are not additive — SummingMergeTree will corrupt these over time

With SummingMergeTree, background merges sum numeric columns for identical primary keys. That is correct for count/spent_credits, but wrong for avg and quantiles. This will produce inflated averages/quantiles as more parts merge.

Two viable fixes:

  • Preferred: use AggregatingMergeTree and store aggregate states.
  • Alternative: keep SummingMergeTree but store merge-safe fields (e.g., latency_sum, latency_count) and compute avg at read time; quantiles still require state or different strategy.

Preferred table DDL change:

 CREATE TABLE IF NOT EXISTS verifications.key_verifications_per_hour_v2
 (
   time          DateTime,
   workspace_id  String,
   key_space_id  String,
   identity_id   String,
   key_id        String,
   outcome       LowCardinality(String),
   tags          Array(String),
-  count         Int64,
-  spent_credits Int64,
-  latency_avg       Float64,
-  latency_p75       Float64,
-  latency_p99       Float64
+  count         SimpleAggregateFunction(sum, UInt64),
+  spent_credits SimpleAggregateFunction(sum, Int64),
+  latency_avg   AggregateFunction(avg, Float64),
+  latency_p75   AggregateFunction(quantileTDigest(0.75), Float64),
+  latency_p99   AggregateFunction(quantileTDigest(0.99), Float64)
 )
-ENGINE = SummingMergeTree()
-ORDER BY (workspace_id, time, key_space_id, identity_id, key_id)
+ENGINE = AggregatingMergeTree()
+ORDER BY (workspace_id, time, key_space_id, identity_id, key_id, outcome)
 ;

Follow-up: update the hour/day/month MVs to emit avgState()/quantileTDigestState() as suggested in those review comments.

📝 Committable suggestion

‼️ IMPORTANT
Carefully review the code before committing. Ensure that it accurately replaces the highlighted code, contains no missing lines, and has no issues with indentation. Thoroughly test & benchmark the code to ensure it meets the requirements.

Suggested change
latency_avg Float64,
latency_p75 Float64,
latency_p99 Float64
CREATE TABLE IF NOT EXISTS verifications.key_verifications_per_hour_v2
(
time DateTime,
workspace_id String,
key_space_id String,
identity_id String,
key_id String,
outcome LowCardinality(String),
tags Array(String),
count SimpleAggregateFunction(sum, UInt64),
spent_credits SimpleAggregateFunction(sum, Int64),
latency_avg AggregateFunction(avg, Float64),
latency_p75 AggregateFunction(quantileTDigest(0.75), Float64),
latency_p99 AggregateFunction(quantileTDigest(0.99), Float64)
)
ENGINE = AggregatingMergeTree()
ORDER BY (workspace_id, time, key_space_id, identity_id, key_id, outcome)
;
🤖 Prompt for AI Agents
In
go/pkg/clickhouse/schema/databases/001_verifications/005_key_verifications_per_hour_v2.sql
around lines 12-14, the columns latency_avg, latency_p75, latency_p99 are
defined but SummingMergeTree will incorrectly sum these over merges; change the
schema to use AggregatingMergeTree and replace those Float64 columns with
aggregate-state columns (e.g., latency_avg_state AggregateFunction(avg, Float64)
and latency_p75_state/latency_p99_state AggregateFunction(quantileTDigest,
Float64)) so merge operations keep correct state, and update the hour/day/month
materialized views to emit avgState()/quantileTDigestState() into these fields;
alternatively, if you must keep SummingMergeTree, replace avg/quantile columns
with merge-safe fields (e.g., latency_sum Float64 and latency_count UInt64) and
compute avg at query time (note: quantiles still need state or a different
approach).

)
ENGINE = SummingMergeTree()
ORDER BY (workspace_id, time, key_space_id, identity_id, key_id)
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

⚠️ Potential issue

Missing outcome (and likely tags) in ORDER BY merges dissimilar rows

The primary key omits outcome (and tags). MergeTree family merges are per primary key; omitting these dimensions will merge SUCCESS/ERROR (and different tag sets) into single rows, corrupting metrics.

Minimum fix (if you keep SummingMergeTree and finalized values, not recommended):

-ORDER BY (workspace_id, time, key_space_id, identity_id, key_id)
+ORDER BY (workspace_id, time, key_space_id, identity_id, key_id, outcome, tags)

Note: including Array(String) in ORDER BY increases index cardinality and may be undesirable or unsupported in your target CH version. A safer alternative is to include a stable tags_hash:

+  tags_hash UInt64 MATERIALIZED cityHash64(arrayStringConcat(arraySort(tags), ',')),
...
-ORDER BY (workspace_id, time, key_space_id, identity_id, key_id)
+ORDER BY (workspace_id, time, key_space_id, identity_id, key_id, outcome, tags_hash)

Choose one approach and make sure all corresponding MVs group by the same key set.

📝 Committable suggestion

‼️ IMPORTANT
Carefully review the code before committing. Ensure that it accurately replaces the highlighted code, contains no missing lines, and has no issues with indentation. Thoroughly test & benchmark the code to ensure it meets the requirements.

Suggested change
ORDER BY (workspace_id, time, key_space_id, identity_id, key_id)
ORDER BY (workspace_id, time, key_space_id, identity_id, key_id, outcome, tags)
🤖 Prompt for AI Agents
In
go/pkg/clickhouse/schema/databases/001_verifications/005_key_verifications_per_hour_v2.sql
around line 17, the ORDER BY currently omits outcome (and tags) causing
SummingMergeTree merges to collapse SUCCESS/ERROR and different tag sets into
single rows; update the ORDER BY to include outcome and either the tags array or
a stable tags_hash (preferred) so the primary key fully distinguishes rows, and
ensure any corresponding materialized views use the identical grouping key set.

;
Original file line number Diff line number Diff line change
@@ -1,5 +1,4 @@
-- +goose up
CREATE MATERIALIZED VIEW verifications.key_verifications_per_hour_mv_v2
CREATE MATERIALIZED VIEW IF NOT EXISTS verifications.key_verifications_per_hour_mv_v2
TO verifications.key_verifications_per_hour_v2
AS
SELECT
Expand All @@ -9,9 +8,13 @@ SELECT
key_id,
outcome,
count(*) as count,
sum(spent_credits) as spent_credits,
avg(latency) as latency_avg,
quantileTDigest(0.75)(latency) as latency_p75,
quantileTDigest(0.99)(latency) as latency_p99,
toStartOfHour(fromUnixTimestamp64Milli(time)) AS time,
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

🧹 Nitpick (assertive)

Timezone determinism for hourly buckets

Normalize to UTC to avoid environment-specific hour boundaries.

-  toStartOfHour(fromUnixTimestamp64Milli(time)) AS time,
+  toStartOfHour(fromUnixTimestamp64Milli(time, 'UTC')) AS time,
🤖 Prompt for AI Agents
In
go/pkg/clickhouse/schema/databases/001_verifications/006_key_verifications_per_hour_mv_v2.sql
around line 15, the hourly bucketing uses
toStartOfHour(fromUnixTimestamp64Milli(time)) which is environment-dependent;
convert the timestamp to UTC before bucketing to ensure deterministic hour
boundaries across deployments (for example:
toStartOfHour(toTimeZone(fromUnixTimestamp64Milli(time), 'UTC')) AS time).

tags
Comment on lines +11 to 16
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

🛠️ Refactor suggestion

Emit aggregate states to match an AggregatingMergeTree target

These finalized aggregates will not survive merges. Emit states so the table can merge them correctly.

-  count(*) as count,
-  sum(spent_credits) as spent_credits,
-  avg(latency) as latency_avg,
-  quantileTDigest(0.75)(latency) as latency_p75,
-  quantileTDigest(0.99)(latency) as latency_p99,
+  countState()                                          AS count,
+  sumState(spent_credits)                               AS spent_credits,
+  avgState(latency)                                     AS latency_avg,
+  quantileTDigestState(0.75)(latency)                   AS latency_p75,
+  quantileTDigestState(0.99)(latency)                   AS latency_p99,

Ensure the target table schema is updated accordingly (see comments on the table file).

Committable suggestion skipped: line range outside the PR's diff.

🤖 Prompt for AI Agents
In
go/pkg/clickhouse/schema/databases/001_verifications/006_key_verifications_per_hour_mv_v2.sql
around lines 11–16 the SELECT emits finalized aggregates (sum, avg,
quantileTDigest) which will not survive AggregatingMergeTree merges; change
these to emit the corresponding aggregate states (e.g., sumState, avgState,
quantileTDigestState) for each metric (rename columns to *_state or otherwise
indicate they are states), keep the time and tags as-is, and ensure the
AggregatingMergeTree target table schema is updated to accept these state
columns with matching types so merges will correctly combine partial aggregates.

FROM verifications.raw_key_verifications_v1
FROM verifications.raw_key_verifications_v2
GROUP BY
workspace_id,
key_space_id,
Expand All @@ -21,7 +24,3 @@ GROUP BY
time,
tags
;


-- +goose down
DROP VIEW verifications.key_verifications_per_hour_mv_v2;
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
CREATE TABLE IF NOT EXISTS verifications.key_verifications_per_day_v4
(
time DateTime,
workspace_id String,
key_space_id String,
identity_id String,
key_id String,
outcome LowCardinality(String),
tags Array(String),
Comment on lines +3 to +9
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

🧹 Nitpick (assertive)

Primary key includes Array(String) “tags”; canonicalize and consider footprint

Using Array(String) in ORDER BY inflates primary key and splits rows when tag order differs. Canonicalize tags in the MV (arraySort(tags)) and consider hashing tags for the PK if cardinality is high.

I can draft MV changes to apply arraySort(tags) at write time.

🤖 Prompt for AI Agents
In
go/pkg/clickhouse/schema/databases/001_verifications/007_key_verifications_per_day_v4.sql
around lines 3-9, the primary key/ORDER BY currently includes tags as
Array(String) which inflates the PK and causes row fragmentation when tag order
differs; canonicalize tags at write time by applying arraySort(tags) in the
materialized view that writes to this table, and if tags cardinality or size is
high replace or supplement the array in the PK with a fixed-size hash (e.g.,
sipHash64/xxHash64 of the sorted array) to keep the primary key compact and
stable.

count Int64,
spent_credits Int64,
latency_avg Float64,
latency_p75 Float64,
latency_p99 Float64
)
ENGINE = SummingMergeTree()
ORDER BY (workspace_id, time, key_space_id, identity_id, key_id, tags, outcome)
Comment on lines +16 to +17
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

🧹 Nitpick (assertive)

Add partitioning for data pruning and manage retention

No PARTITION BY is set. Add PARTITION BY toYYYYMM(time) for day-level table to enable pruning and targeted retention policies.

-ENGINE = SummingMergeTree()
-ORDER BY (workspace_id, time, key_space_id, identity_id, key_id, tags, outcome)
+ENGINE = AggregatingMergeTree()
+PARTITION BY toYYYYMM(time)
+ORDER BY (workspace_id, time, key_space_id, identity_id, key_id, tags, outcome)
📝 Committable suggestion

‼️ IMPORTANT
Carefully review the code before committing. Ensure that it accurately replaces the highlighted code, contains no missing lines, and has no issues with indentation. Thoroughly test & benchmark the code to ensure it meets the requirements.

Suggested change
ENGINE = SummingMergeTree()
ORDER BY (workspace_id, time, key_space_id, identity_id, key_id, tags, outcome)
ENGINE = AggregatingMergeTree()
PARTITION BY toYYYYMM(time)
ORDER BY (workspace_id, time, key_space_id, identity_id, key_id, tags, outcome)
🤖 Prompt for AI Agents
In
go/pkg/clickhouse/schema/databases/001_verifications/007_key_verifications_per_day_v4.sql
around lines 16-17, the table lacks a PARTITION BY clause; add "PARTITION BY
toYYYYMM(time)" immediately before the ORDER BY to enable day-level pruning and
make retention policies targetable. Ensure the partition expression uses the
existing "time" column and is placed in the CREATE TABLE definition prior to the
ORDER BY, keeping the SummingMergeTree() engine unchanged.

;
Comment on lines +1 to +18
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

🛠️ Refactor suggestion

⚠️ Potential issue

Averages and quantiles will be corrupted with SummingMergeTree

SummingMergeTree will sum latency_avg/p75/p99 across parts, yielding nonsensical values. Use AggregatingMergeTree with aggregate states, or store additive fields only (latency_sum, count) and compute on read. Quantiles require state columns.

Option A (preferred): AggregatingMergeTree with states:

-CREATE TABLE IF NOT EXISTS verifications.key_verifications_per_day_v4
+CREATE TABLE IF NOT EXISTS verifications.key_verifications_per_day_v4
 (
   time          DateTime,
   workspace_id  String,
   key_space_id  String,
   identity_id   String,
   key_id        String,
   outcome       LowCardinality(String),
-  tags          Array(String),
-  count         Int64,
-  spent_credits Int64,
-  latency_avg   Float64,
-  latency_p75   Float64,
-  latency_p99   Float64
+  tags          Array(String),
+  count         SimpleAggregateFunction(sum, UInt64),
+  spent_credits SimpleAggregateFunction(sum, UInt64),
+  latency_avg_state AggregateFunction(avg, Float64),
+  latency_p75_state AggregateFunction(quantileTDigest(0.75), Float64),
+  latency_p99_state AggregateFunction(quantileTDigest(0.99), Float64)
 )
-ENGINE = SummingMergeTree()
-ORDER BY (workspace_id, time, key_space_id, identity_id, key_id, tags, outcome)
+ENGINE = AggregatingMergeTree()
+ORDER BY (workspace_id, time, key_space_id, identity_id, key_id, tags, outcome)

This requires the MV to emit avgState/quantileTDigestState and queries to use finalizeAggregation() or *_Merge().

Option B: Keep SummingMergeTree but change schema:

  • Replace latency_avg with latency_sum Float64 and compute avg = latency_sum / count at read time.
  • Remove p75/p99 and instead store quantileTDigestState columns as above (requires AggregatingMergeTree).
📝 Committable suggestion

‼️ IMPORTANT
Carefully review the code before committing. Ensure that it accurately replaces the highlighted code, contains no missing lines, and has no issues with indentation. Thoroughly test & benchmark the code to ensure it meets the requirements.

Suggested change
CREATE TABLE IF NOT EXISTS verifications.key_verifications_per_day_v4
(
time DateTime,
workspace_id String,
key_space_id String,
identity_id String,
key_id String,
outcome LowCardinality(String),
tags Array(String),
count Int64,
spent_credits Int64,
latency_avg Float64,
latency_p75 Float64,
latency_p99 Float64
)
ENGINE = SummingMergeTree()
ORDER BY (workspace_id, time, key_space_id, identity_id, key_id, tags, outcome)
;
CREATE TABLE IF NOT EXISTS verifications.key_verifications_per_day_v4
(
time DateTime,
workspace_id String,
key_space_id String,
identity_id String,
key_id String,
outcome LowCardinality(String),
tags Array(String),
count SimpleAggregateFunction(sum, UInt64),
spent_credits SimpleAggregateFunction(sum, UInt64),
latency_avg_state AggregateFunction(avg, Float64),
latency_p75_state AggregateFunction(quantileTDigest(0.75), Float64),
latency_p99_state AggregateFunction(quantileTDigest(0.99), Float64)
)
ENGINE = AggregatingMergeTree()
ORDER BY (workspace_id, time, key_space_id, identity_id, key_id, tags, outcome)
;
🤖 Prompt for AI Agents
In
go/pkg/clickhouse/schema/databases/001_verifications/007_key_verifications_per_day_v4.sql
lines 1-18, the table uses SummingMergeTree while storing latency_avg,
latency_p75 and latency_p99 which will be incorrectly summed across parts;
change the schema to use AggregatingMergeTree with aggregate-state columns for
averages and quantiles (e.g., latency_avgState, latency_p75State,
latency_p99State) and adjust the MV to emit the corresponding State aggregates
so queries can finalizeAggregation()/*_Merge(), or alternatively keep
SummingMergeTree but replace latency_avg with latency_sum (Float64) and store
count, compute avg at read time and remove p75/p99 (or migrate those to TDigest
state + AggregatingMergeTree) — implement one of these options and update
materialized view and query logic accordingly.

Loading