diff --git a/.github/workflows/ci3.yml b/.github/workflows/ci3.yml
index a706e33fd021..8dbd61bd090c 100644
--- a/.github/workflows/ci3.yml
+++ b/.github/workflows/ci3.yml
@@ -88,6 +88,7 @@ jobs:
PR_COMMITS: ${{ github.event.pull_request.commits }}
PR_NUMBER: ${{ github.event.pull_request.number }}
GITHUB_REF_NAME: ${{ github.ref_name }}
+ GITHUB_ACTOR: ${{ github.actor }}
# NOTE: $CI_MODE is set in the Determine CI Mode step.
run: ./.github/ci3.sh $CI_MODE
diff --git a/ci3/aws_request_instance_type b/ci3/aws_request_instance_type
index 5f6aafbe4dd7..a48c5b58c241 100755
--- a/ci3/aws_request_instance_type
+++ b/ci3/aws_request_instance_type
@@ -86,6 +86,21 @@ if [ -z "${iid:-}" -o "${iid:-}" == "None" ]; then
echo $iid > $iid_path
fi
+tags="Key=Name,Value=$name Key=Group,Value=build-instance"
+[ -n "${GITHUB_ACTOR:-}" ] && tags+=" Key=GithubActor,Value=$GITHUB_ACTOR"
+[ -n "${CI_MODE:-}" ] && tags+=" Key=CICommand,Value=$CI_MODE"
+[ -n "${CI_DASHBOARD:-}" ] && tags+=" Key=Dashboard,Value=$CI_DASHBOARD"
+if [ "${UNSAFE_AWS_KEEP_ALIVE:-0}" -eq 1 ]; then
+ echo_stderr "You have set UNSAFE_AWS_KEEP_ALIVE=1, so the instance will not be terminated after 1.5 hours by the reaper script. Make sure you shut the machine down when done."
+ tags+=" Key=Keep-Alive,Value=true"
+fi
+aws ec2 create-tags --resources $iid --tags $tags
+
+# Record the instance type so callers can pass it downstream (e.g. into Docker).
+echo $instance_type > $state_dir/instance_type
+# Record whether this is spot or on-demand.
+[ -f "$sir_path" ] && echo spot > $state_dir/spot || echo ondemand > $state_dir/spot
+
while [ -z "${ip:-}" ]; do
sleep 1
ip=$(aws ec2 describe-instances \
diff --git a/ci3/bootstrap_ec2 b/ci3/bootstrap_ec2
index a24f0cfc177b..eeffb180d390 100755
--- a/ci3/bootstrap_ec2
+++ b/ci3/bootstrap_ec2
@@ -89,6 +89,8 @@ if [[ -f "$state_dir/sir" ]]; then
sir=$(cat $state_dir/sir)
fi
iid=$(cat $state_dir/iid)
+export EC2_INSTANCE_TYPE=$(cat $state_dir/instance_type 2>/dev/null || echo "unknown")
+export EC2_SPOT=$(cat $state_dir/spot 2>/dev/null || echo "unknown")
# If AWS credentials are not set, try to load them from ~/.aws/build_instance_credentials.
if [ -z "${AWS_ACCESS_KEY_ID:-}" ] || [ -z "${AWS_SECRET_ACCESS_KEY:-}" ]; then
@@ -192,16 +194,6 @@ container_script=$(
log_ci_run FAILED \$ci_log_id
merge_train_failure_slack_notify \$ci_log_id
release_canary_slack_notify \$ci_log_id
- ci_failed_data=\$(jq -n \\
- --arg status "failed" \\
- --arg log_id "\$ci_log_id" \\
- --arg ref_name "\${TARGET_BRANCH:-\$REF_NAME}" \\
- --arg commit_hash "\$COMMIT_HASH" \\
- --arg commit_author "\$COMMIT_AUTHOR" \\
- --arg commit_msg "\$COMMIT_MSG" \\
- --argjson exit_code "\$code" \\
- '{status: \$status, log_id: \$log_id, ref_name: \$ref_name, commit_hash: \$commit_hash, commit_author: \$commit_author, commit_msg: \$commit_msg, exit_code: \$exit_code, timestamp: now | todate}')
- redis_publish "ci:run:failed" "\$ci_failed_data"
;;
esac
exit \$code
@@ -331,6 +323,9 @@ function run {
-e AWS_TOKEN=\$aws_token \
-e NAMESPACE=${NAMESPACE:-} \
-e NETWORK=${NETWORK:-} \
+ -e GITHUB_ACTOR=${GITHUB_ACTOR:-} \
+ -e EC2_INSTANCE_TYPE=${EC2_INSTANCE_TYPE:-unknown} \
+ -e EC2_SPOT=${EC2_SPOT:-unknown} \
--pids-limit=65536 \
--shm-size=2g \
aztecprotocol/devbox:3.0 bash -c $(printf '%q' "$container_script")
diff --git a/ci3/ci-metrics/Dockerfile b/ci3/ci-metrics/Dockerfile
new file mode 100644
index 000000000000..4013545da66d
--- /dev/null
+++ b/ci3/ci-metrics/Dockerfile
@@ -0,0 +1,11 @@
+FROM python:3.12
+
+RUN apt update && apt install -y jq redis-tools && rm -rf /var/lib/apt/lists/*
+
+WORKDIR /app
+COPY requirements.txt requirements.txt
+RUN pip install --no-cache-dir -r requirements.txt gunicorn
+RUN git config --global --add safe.directory /aztec-packages
+COPY . .
+EXPOSE 8081
+CMD ["gunicorn", "-w", "4", "-b", "0.0.0.0:8081", "app:app"]
diff --git a/ci3/ci-metrics/app.py b/ci3/ci-metrics/app.py
new file mode 100644
index 000000000000..c62875e7d19a
--- /dev/null
+++ b/ci3/ci-metrics/app.py
@@ -0,0 +1,848 @@
+from flask import Flask, request, Response, redirect
+from flask_compress import Compress
+from flask_httpauth import HTTPBasicAuth
+from datetime import datetime, timedelta
+import json
+import os
+import re
+import redis
+import threading
+from pathlib import Path
+
+import db
+import metrics
+import github_data
+import billing.aws as billing_aws
+from billing import (
+ get_billing_files_in_range,
+ aggregate_billing_weekly, aggregate_billing_monthly,
+ serve_billing_dashboard,
+)
+
+REDIS_HOST = os.getenv('REDIS_HOST', 'localhost')
+REDIS_PORT = int(os.getenv('REDIS_PORT', '6379'))
+LOGS_DISK_PATH = os.getenv('LOGS_DISK_PATH', '/logs-disk')
+DASHBOARD_PASSWORD = os.getenv('DASHBOARD_PASSWORD', 'password')
+
+r = redis.Redis(host=REDIS_HOST, port=REDIS_PORT, decode_responses=False)
+
+app = Flask(__name__)
+Compress(app)
+auth = HTTPBasicAuth()
+
+
+@auth.verify_password
+def verify_password(username, password):
+ return password == DASHBOARD_PASSWORD
+
+
+def _init():
+ """Initialize SQLite and start background threads."""
+ try:
+ db.get_db()
+ metrics.start_test_listener(r)
+ metrics.start_ci_run_sync(r)
+ print("[ci-metrics] Background threads started")
+ except Exception as e:
+ print(f"[ci-metrics] Warning: startup failed: {e}")
+
+threading.Thread(target=_init, daemon=True, name='metrics-init').start()
+
+
+# ---- Helpers ----
+
+def _aggregate_dates(by_date_list, granularity, sum_fields, avg_fields=None):
+ """Aggregate a list of {date, ...} dicts by weekly/monthly granularity."""
+ if granularity == 'daily' or not by_date_list:
+ return by_date_list
+
+ buckets = {}
+ for entry in by_date_list:
+ d = datetime.strptime(entry['date'], '%Y-%m-%d')
+ if granularity == 'weekly':
+ key = (d - timedelta(days=d.weekday())).strftime('%Y-%m-%d')
+ else: # monthly
+ key = d.strftime('%Y-%m') + '-01'
+
+ if key not in buckets:
+ buckets[key] = {'date': key}
+ for f in sum_fields:
+ buckets[key][f] = 0
+ if avg_fields:
+ for f in avg_fields:
+ buckets[key][f'_avg_sum_{f}'] = 0
+ buckets[key][f'_avg_cnt_{f}'] = 0
+
+ for f in sum_fields:
+ buckets[key][f] += entry.get(f) or 0
+ if avg_fields:
+ for f in avg_fields:
+ val = entry.get(f)
+ if val is not None:
+ buckets[key][f'_avg_sum_{f}'] += val
+ buckets[key][f'_avg_cnt_{f}'] += 1
+
+ result = []
+ for key in sorted(buckets):
+ b = buckets[key]
+ out = {'date': b['date']}
+ for f in sum_fields:
+ out[f] = round(b[f], 2) if isinstance(b[f], float) else b[f]
+ if avg_fields:
+ for f in avg_fields:
+ cnt = b[f'_avg_cnt_{f}']
+ out[f] = round(b[f'_avg_sum_{f}'] / cnt, 1) if cnt else None
+ result.append(out)
+
+ return result
+
+
+def _json(data):
+ return Response(json.dumps(data), mimetype='application/json')
+
+
+# ---- Namespace billing ----
+
+@app.route('/namespace-billing')
+@auth.login_required
+def namespace_billing():
+ html = serve_billing_dashboard()
+ if html:
+ return html
+ return "Billing dashboard not found", 404
+
+
+@app.route('/api/billing/data')
+@auth.login_required
+def billing_data():
+ date_from_str = request.args.get('from')
+ date_to_str = request.args.get('to')
+ granularity = request.args.get('granularity', 'daily')
+
+ if not date_from_str or not date_to_str:
+ return _json({'error': 'from and to date params required (YYYY-MM-DD)'}), 400
+ try:
+ date_from = datetime.strptime(date_from_str, '%Y-%m-%d')
+ date_to = datetime.strptime(date_to_str, '%Y-%m-%d')
+ except ValueError:
+ return _json({'error': 'Invalid date format, use YYYY-MM-DD'}), 400
+
+ daily_data = get_billing_files_in_range(date_from, date_to)
+
+ # Filter out namespaces costing less than $1 total across the range
+ ns_totals = {}
+ for entry in daily_data:
+ for ns, ns_data in entry.get('namespaces', {}).items():
+ ns_totals[ns] = ns_totals.get(ns, 0) + ns_data.get('total', 0)
+ cheap_ns = {ns for ns, total in ns_totals.items() if total < 1.0}
+ if cheap_ns:
+ for entry in daily_data:
+ entry['namespaces'] = {ns: d for ns, d in entry.get('namespaces', {}).items()
+ if ns not in cheap_ns}
+
+ if granularity == 'weekly':
+ result = aggregate_billing_weekly(daily_data)
+ elif granularity == 'monthly':
+ result = aggregate_billing_monthly(daily_data)
+ else:
+ result = daily_data
+
+ return _json(result)
+
+
+# ---- CI runs ----
+
+@app.route('/api/ci/runs')
+@auth.login_required
+def api_ci_runs():
+ date_from = request.args.get('from', '')
+ date_to = request.args.get('to', '')
+ status_filter = request.args.get('status', '')
+ author = request.args.get('author', '')
+ dashboard = request.args.get('dashboard', '')
+ limit = min(int(request.args.get('limit', 100)), 1000)
+ offset = int(request.args.get('offset', 0))
+
+ ts_from = int(datetime.strptime(date_from, '%Y-%m-%d').timestamp() * 1000) if date_from else None
+ ts_to = int((datetime.strptime(date_to, '%Y-%m-%d') + timedelta(days=1)).timestamp() * 1000) if date_to else None
+
+ runs = metrics.get_ci_runs(r, ts_from, ts_to)
+
+ if status_filter:
+ runs = [run for run in runs if run.get('status') == status_filter]
+ if author:
+ runs = [run for run in runs if run.get('author') == author]
+ if dashboard:
+ runs = [run for run in runs if run.get('dashboard') == dashboard]
+
+ runs.sort(key=lambda x: x.get('timestamp', 0), reverse=True)
+ runs = runs[offset:offset + limit]
+
+ return _json(runs)
+
+
+@app.route('/api/ci/stats')
+@auth.login_required
+def api_ci_stats():
+ ts_from = int((datetime.now() - timedelta(days=7)).timestamp() * 1000)
+ runs = metrics.get_ci_runs(r, ts_from)
+
+ total = len(runs)
+ passed = sum(1 for run in runs if run.get('status') == 'PASSED')
+ failed = sum(1 for run in runs if run.get('status') == 'FAILED')
+ costs = [run['cost_usd'] for run in runs if run.get('cost_usd') is not None]
+ durations = []
+ for run in runs:
+ complete = run.get('complete')
+ ts = run.get('timestamp')
+ if complete and ts:
+ durations.append((complete - ts) / 60000.0)
+
+ return _json({
+ 'total_runs': total,
+ 'passed': passed,
+ 'failed': failed,
+ 'total_cost': round(sum(costs), 2) if costs else None,
+ 'avg_duration_mins': round(sum(durations) / len(durations), 1) if durations else None,
+ })
+
+
+# ---- Cost endpoints ----
+
+@app.route('/api/costs/overview')
+@auth.login_required
+def api_costs_overview():
+ date_from = request.args.get('from', (datetime.now() - timedelta(days=30)).strftime('%Y-%m-%d'))
+ date_to = request.args.get('to', datetime.now().strftime('%Y-%m-%d'))
+ granularity = request.args.get('granularity', 'daily')
+ result = billing_aws.get_costs_overview(date_from, date_to)
+ if granularity != 'daily' and result.get('by_date'):
+ buckets = {}
+ for entry in result['by_date']:
+ d = datetime.strptime(entry['date'], '%Y-%m-%d')
+ if granularity == 'weekly':
+ key = (d - timedelta(days=d.weekday())).strftime('%Y-%m-%d')
+ else:
+ key = d.strftime('%Y-%m') + '-01'
+ if key not in buckets:
+ buckets[key] = {'date': key, 'aws': {}, 'gcp': {}, 'aws_total': 0, 'gcp_total': 0}
+ for cat, amt in entry.get('aws', {}).items():
+ buckets[key]['aws'][cat] = buckets[key]['aws'].get(cat, 0) + amt
+ for cat, amt in entry.get('gcp', {}).items():
+ buckets[key]['gcp'][cat] = buckets[key]['gcp'].get(cat, 0) + amt
+ buckets[key]['aws_total'] += entry.get('aws_total', 0)
+ buckets[key]['gcp_total'] += entry.get('gcp_total', 0)
+ result['by_date'] = sorted(buckets.values(), key=lambda x: x['date'])
+ return _json(result)
+
+
+@app.route('/api/costs/details')
+@auth.login_required
+def api_costs_details():
+ """Per-resource (USAGE_TYPE) cost breakdown."""
+ date_from = request.args.get('from', (datetime.now() - timedelta(days=30)).strftime('%Y-%m-%d'))
+ date_to = request.args.get('to', datetime.now().strftime('%Y-%m-%d'))
+
+ rows = billing_aws.get_aws_cost_details(date_from, date_to)
+
+ usage_map = {}
+ for row in rows:
+ ut = row['usage_type']
+ if ut not in usage_map:
+ usage_map[ut] = {
+ 'usage_type': ut,
+ 'service': row['service'],
+ 'category': row['category'],
+ 'total': 0,
+ 'by_date': {},
+ 'is_ri': 'HeavyUsage' in ut,
+ }
+ usage_map[ut]['total'] += row['amount_usd']
+ d = row['date']
+ usage_map[ut]['by_date'][d] = usage_map[ut]['by_date'].get(d, 0) + row['amount_usd']
+
+ items = sorted(usage_map.values(), key=lambda x: -x['total'])
+ for item in items:
+ item['total'] = round(item['total'], 2)
+ item['by_date'] = {d: round(v, 4) for d, v in sorted(item['by_date'].items())}
+
+ all_dates = sorted({row['date'] for row in rows})
+ ri_items = [i for i in items if i['is_ri']]
+ ri_total = round(sum(i['total'] for i in ri_items), 2)
+
+ return _json({
+ 'items': items,
+ 'dates': all_dates,
+ 'ri_total': ri_total,
+ 'grand_total': round(sum(i['total'] for i in items), 2),
+ })
+
+
+@app.route('/api/costs/attribution')
+@auth.login_required
+def api_costs_attribution():
+ """CI cost attribution by user, branch, instance."""
+ date_from = request.args.get('from', (datetime.now() - timedelta(days=30)).strftime('%Y-%m-%d'))
+ date_to = request.args.get('to', datetime.now().strftime('%Y-%m-%d'))
+ ts_from = int(datetime.strptime(date_from, '%Y-%m-%d').timestamp() * 1000)
+ ts_to = int((datetime.strptime(date_to, '%Y-%m-%d') + timedelta(days=1)).timestamp() * 1000)
+
+ runs = metrics.get_ci_runs(r, ts_from, ts_to)
+ runs_with_cost = [run for run in runs if run.get('cost_usd') is not None]
+
+ # Enrich merge queue runs with PR author from GitHub
+ pr_numbers = {run.get('pr_number') for run in runs_with_cost if run.get('pr_number')}
+ pr_authors = github_data.batch_get_pr_authors(pr_numbers)
+
+ granularity = request.args.get('granularity', 'daily')
+
+ instances = []
+ by_user = {}
+ by_branch = {}
+ by_type = {}
+ by_date_type = {}
+
+ for run in runs_with_cost:
+ info = billing_aws.decode_branch_info(run)
+ cost = run['cost_usd']
+ date = metrics._ts_to_date(run.get('timestamp', 0))
+
+ author = info['author']
+ prn = info['pr_number']
+ if prn and int(prn) in pr_authors:
+ author = pr_authors[int(prn)]['author']
+
+ inst_type = run.get('instance_type', 'unknown')
+ vcpus = run.get('instance_vcpus')
+ if inst_type == 'unknown' and vcpus:
+ inst_type = f'{vcpus}vcpu'
+
+ instances.append({
+ 'instance_name': info['instance_name'],
+ 'date': date,
+ 'cost_usd': cost,
+ 'author': author,
+ 'branch': info['branch'],
+ 'pr_number': prn,
+ 'type': info['type'],
+ 'instance_type': inst_type,
+ 'spot': run.get('spot', False),
+ 'job_id': run.get('job_id', ''),
+ 'duration_mins': round((run.get('complete', 0) - run.get('timestamp', 0)) / 60000, 1) if run.get('complete') else None,
+ })
+
+ if author not in by_user:
+ by_user[author] = {'aws_cost': 0, 'gcp_cost': 0, 'runs': 0, 'by_date': {}}
+ by_user[author]['aws_cost'] += cost
+ by_user[author]['runs'] += 1
+ by_user[author]['by_date'][date] = by_user[author]['by_date'].get(date, 0) + cost
+
+ branch_key = info['branch'] or info['type']
+ if branch_key not in by_branch:
+ by_branch[branch_key] = {'cost': 0, 'runs': 0, 'type': info['type'], 'author': author}
+ by_branch[branch_key]['cost'] += cost
+ by_branch[branch_key]['runs'] += 1
+
+ rt = info['type']
+ if rt not in by_type:
+ by_type[rt] = {'cost': 0, 'runs': 0}
+ by_type[rt]['cost'] += cost
+ by_type[rt]['runs'] += 1
+
+ if date not in by_date_type:
+ by_date_type[date] = {}
+ by_date_type[date][rt] = by_date_type[date].get(rt, 0) + cost
+
+ # GCP costs — reported as total, no namespace→user heuristic
+ gcp_total = 0
+ try:
+ from billing.gcp import get_billing_files_in_range as get_gcp_billing
+ gcp_data = get_gcp_billing(
+ datetime.strptime(date_from, '%Y-%m-%d'),
+ datetime.strptime(date_to, '%Y-%m-%d'),
+ )
+ for entry in gcp_data:
+ for ns, ns_data in entry.get('namespaces', {}).items():
+ gcp_total += ns_data.get('total', 0)
+ except Exception as e:
+ print(f"[attribution] GKE billing error: {e}")
+
+ # Sort and format
+ user_list = [{'author': a, 'aws_cost': round(v['aws_cost'], 2), 'gcp_cost': round(v['gcp_cost'], 2),
+ 'total_cost': round(v['aws_cost'] + v['gcp_cost'], 2), 'runs': v['runs'],
+ 'by_date': {d: round(c, 2) for d, c in sorted(v['by_date'].items())}}
+ for a, v in sorted(by_user.items(), key=lambda x: -(x[1]['aws_cost'] + x[1]['gcp_cost']))]
+
+ branch_list = [{'branch': b, 'cost': round(v['cost'], 2), 'runs': v['runs'],
+ 'type': v['type'], 'author': v['author']}
+ for b, v in sorted(by_branch.items(), key=lambda x: -x[1]['cost'])[:100]]
+
+ type_list = [{'type': t, 'cost': round(v['cost'], 2), 'runs': v['runs']}
+ for t, v in sorted(by_type.items(), key=lambda x: -x[1]['cost'])]
+
+ instances.sort(key=lambda x: -(x['cost_usd'] or 0))
+
+ all_types = sorted(by_type.keys())
+ by_date_list = []
+ for date in sorted(by_date_type):
+ entry = {'date': date, 'total': 0, 'runs': 0}
+ for rt in all_types:
+ entry[rt] = round(by_date_type[date].get(rt, 0), 2)
+ entry['total'] += by_date_type[date].get(rt, 0)
+ entry['total'] = round(entry['total'], 2)
+ entry['runs'] = sum(1 for inst in instances if inst['date'] == date)
+ by_date_list.append(entry)
+
+ by_date_list = _aggregate_dates(by_date_list, granularity,
+ sum_fields=['total', 'runs'] + all_types)
+
+ total_aws = sum(u['aws_cost'] for u in user_list)
+
+ return _json({
+ 'by_user': user_list,
+ 'by_branch': branch_list,
+ 'by_type': type_list,
+ 'by_date': by_date_list,
+ 'run_types': all_types,
+ 'instances': instances[:500],
+ 'totals': {'aws': round(total_aws, 2), 'gcp': round(gcp_total, 2),
+ 'gcp_unattributed': round(gcp_total, 2),
+ 'combined': round(total_aws + gcp_total, 2)},
+ })
+
+
+@app.route('/api/costs/runners')
+@auth.login_required
+def api_costs_runners():
+ date_from = request.args.get('from', (datetime.now() - timedelta(days=30)).strftime('%Y-%m-%d'))
+ date_to = request.args.get('to', datetime.now().strftime('%Y-%m-%d'))
+ granularity = request.args.get('granularity', 'daily')
+ dashboard = request.args.get('dashboard', '')
+ ts_from = int(datetime.strptime(date_from, '%Y-%m-%d').timestamp() * 1000)
+ ts_to = int((datetime.strptime(date_to, '%Y-%m-%d') + timedelta(days=1)).timestamp() * 1000)
+
+ runs = metrics.get_ci_runs(r, ts_from, ts_to)
+ runs_with_cost = [run for run in runs if run.get('cost_usd') is not None]
+ if dashboard:
+ runs_with_cost = [run for run in runs_with_cost if run.get('dashboard') == dashboard]
+
+ by_date_map = {}
+ for run in runs_with_cost:
+ date = metrics._ts_to_date(run.get('timestamp', 0))
+ if date not in by_date_map:
+ by_date_map[date] = {'spot_cost': 0, 'ondemand_cost': 0, 'total': 0}
+ cost = run['cost_usd']
+ if run.get('spot'):
+ by_date_map[date]['spot_cost'] += cost
+ else:
+ by_date_map[date]['ondemand_cost'] += cost
+ by_date_map[date]['total'] += cost
+
+ by_date = [{'date': date, 'spot_cost': round(d['spot_cost'], 2),
+ 'ondemand_cost': round(d['ondemand_cost'], 2), 'total': round(d['total'], 2),
+ 'spot_pct': round(100.0 * d['spot_cost'] / max(d['total'], 0.01), 1)}
+ for date, d in sorted(by_date_map.items())]
+
+ by_date = _aggregate_dates(by_date, granularity,
+ sum_fields=['spot_cost', 'ondemand_cost', 'total'])
+ for d in by_date:
+ d['spot_pct'] = round(100.0 * d['spot_cost'] / max(d['total'], 0.01), 1)
+
+ by_instance_map = {}
+ for run in runs_with_cost:
+ inst = run.get('instance_type', 'unknown')
+ if inst not in by_instance_map:
+ by_instance_map[inst] = {'cost': 0, 'runs': 0}
+ by_instance_map[inst]['cost'] += run['cost_usd']
+ by_instance_map[inst]['runs'] += 1
+ by_instance = [{'instance_type': k, 'cost': round(v['cost'], 2), 'runs': v['runs']}
+ for k, v in sorted(by_instance_map.items(), key=lambda x: -x[1]['cost'])]
+
+ by_dash_map = {}
+ for run in runs_with_cost:
+ dash = run.get('dashboard', 'unknown')
+ if dash not in by_dash_map:
+ by_dash_map[dash] = {'cost': 0, 'runs': 0}
+ by_dash_map[dash]['cost'] += run['cost_usd']
+ by_dash_map[dash]['runs'] += 1
+ by_dashboard = [{'dashboard': k, 'cost': round(v['cost'], 2), 'runs': v['runs']}
+ for k, v in sorted(by_dash_map.items(), key=lambda x: -x[1]['cost'])]
+
+ total_cost = sum(run['cost_usd'] for run in runs_with_cost)
+ spot_cost = sum(run['cost_usd'] for run in runs_with_cost if run.get('spot'))
+
+ return _json({
+ 'by_date': by_date,
+ 'by_instance_type': by_instance,
+ 'by_dashboard': by_dashboard,
+ 'summary': {
+ 'total_cost': round(total_cost, 2),
+ 'spot_pct': round(100.0 * spot_cost / max(total_cost, 0.01), 1),
+ 'avg_cost_per_run': round(total_cost / max(len(runs_with_cost), 1), 2),
+ 'total_runs': len(runs_with_cost),
+ },
+ })
+
+
+# ---- CI Performance ----
+
+@app.route('/api/ci/performance')
+@auth.login_required
+def api_ci_performance():
+ date_from = request.args.get('from', (datetime.now() - timedelta(days=30)).strftime('%Y-%m-%d'))
+ date_to = request.args.get('to', datetime.now().strftime('%Y-%m-%d'))
+ dashboard = request.args.get('dashboard', '')
+ granularity = request.args.get('granularity', 'daily')
+ ts_from = int(datetime.strptime(date_from, '%Y-%m-%d').timestamp() * 1000)
+ ts_to = int((datetime.strptime(date_to, '%Y-%m-%d') + timedelta(days=1)).timestamp() * 1000)
+
+ runs = metrics.get_ci_runs(r, ts_from, ts_to)
+ runs = [run for run in runs if run.get('status') in ('PASSED', 'FAILED')]
+ if dashboard:
+ runs = [run for run in runs if run.get('dashboard') == dashboard]
+
+ by_date_map = {}
+ for run in runs:
+ date = metrics._ts_to_date(run.get('timestamp', 0))
+ if date not in by_date_map:
+ by_date_map[date] = {'total': 0, 'passed': 0, 'failed': 0, 'durations': []}
+ by_date_map[date]['total'] += 1
+ if run.get('status') == 'PASSED':
+ by_date_map[date]['passed'] += 1
+ else:
+ by_date_map[date]['failed'] += 1
+ complete = run.get('complete')
+ ts = run.get('timestamp')
+ if complete and ts:
+ by_date_map[date]['durations'].append((complete - ts) / 60000.0)
+
+ by_date = []
+ for date in sorted(by_date_map):
+ d = by_date_map[date]
+ by_date.append({
+ 'date': date,
+ 'total': d['total'],
+ 'passed': d['passed'],
+ 'failed': d['failed'],
+ 'pass_rate': round(100.0 * d['passed'] / max(d['total'], 1), 1),
+ 'failure_rate': round(100.0 * d['failed'] / max(d['total'], 1), 1),
+ 'avg_duration_mins': round(sum(d['durations']) / len(d['durations']), 1) if d['durations'] else None,
+ })
+
+ by_date = _aggregate_dates(by_date, granularity,
+ sum_fields=['total', 'passed', 'failed'],
+ avg_fields=['avg_duration_mins'])
+ for d in by_date:
+ d['pass_rate'] = round(100.0 * d['passed'] / max(d['total'], 1), 1)
+ d['failure_rate'] = round(100.0 * d['failed'] / max(d['total'], 1), 1)
+
+ # Daily flake/failure counts from test_events
+ if dashboard:
+ flake_daily = db.query('''
+ SELECT substr(timestamp, 1, 10) as date, COUNT(*) as count
+ FROM test_events WHERE status = 'flaked' AND dashboard = ?
+ AND timestamp >= ? AND timestamp < ?
+ GROUP BY substr(timestamp, 1, 10)
+ ''', (dashboard, date_from, date_to + 'T23:59:59'))
+ fail_test_daily = db.query('''
+ SELECT substr(timestamp, 1, 10) as date, COUNT(*) as count
+ FROM test_events WHERE status = 'failed' AND dashboard = ?
+ AND timestamp >= ? AND timestamp < ?
+ GROUP BY substr(timestamp, 1, 10)
+ ''', (dashboard, date_from, date_to + 'T23:59:59'))
+ else:
+ flake_daily = db.query('''
+ SELECT substr(timestamp, 1, 10) as date, COUNT(*) as count
+ FROM test_events WHERE status = 'flaked'
+ AND timestamp >= ? AND timestamp < ?
+ GROUP BY substr(timestamp, 1, 10)
+ ''', (date_from, date_to + 'T23:59:59'))
+ fail_test_daily = db.query('''
+ SELECT substr(timestamp, 1, 10) as date, COUNT(*) as count
+ FROM test_events WHERE status = 'failed'
+ AND timestamp >= ? AND timestamp < ?
+ GROUP BY substr(timestamp, 1, 10)
+ ''', (date_from, date_to + 'T23:59:59'))
+ flake_daily_map = {r['date']: r['count'] for r in flake_daily}
+ fail_test_daily_map = {r['date']: r['count'] for r in fail_test_daily}
+ for d in by_date:
+ d['flake_count'] = flake_daily_map.get(d['date'], 0)
+ d['test_failure_count'] = fail_test_daily_map.get(d['date'], 0)
+
+ # Top flakes/failures
+ if dashboard:
+ top_flakes = db.query('''
+ SELECT test_cmd, COUNT(*) as count, ref_name
+ FROM test_events WHERE status='flaked' AND dashboard = ?
+ AND timestamp >= ? AND timestamp <= ?
+ GROUP BY test_cmd ORDER BY count DESC LIMIT 15
+ ''', (dashboard, date_from, date_to + 'T23:59:59'))
+ top_failures = db.query('''
+ SELECT test_cmd, COUNT(*) as count
+ FROM test_events WHERE status='failed' AND dashboard = ?
+ AND timestamp >= ? AND timestamp <= ?
+ GROUP BY test_cmd ORDER BY count DESC LIMIT 15
+ ''', (dashboard, date_from, date_to + 'T23:59:59'))
+ else:
+ top_flakes = db.query('''
+ SELECT test_cmd, COUNT(*) as count, ref_name
+ FROM test_events WHERE status='flaked' AND timestamp >= ? AND timestamp <= ?
+ GROUP BY test_cmd ORDER BY count DESC LIMIT 15
+ ''', (date_from, date_to + 'T23:59:59'))
+ top_failures = db.query('''
+ SELECT test_cmd, COUNT(*) as count
+ FROM test_events WHERE status='failed' AND timestamp >= ? AND timestamp <= ?
+ GROUP BY test_cmd ORDER BY count DESC LIMIT 15
+ ''', (date_from, date_to + 'T23:59:59'))
+
+ # Summary
+ total = len(runs)
+ passed = sum(1 for run in runs if run.get('status') == 'PASSED')
+ failed = total - passed
+ durations = []
+ for run in runs:
+ complete = run.get('complete')
+ ts = run.get('timestamp')
+ if complete and ts:
+ durations.append((complete - ts) / 60000.0)
+
+ if dashboard:
+ flake_count = db.query('''
+ SELECT COUNT(*) as c FROM test_events WHERE status='flaked' AND dashboard = ?
+ AND timestamp >= ? AND timestamp <= ?
+ ''', (dashboard, date_from, date_to + 'T23:59:59'))
+ total_tests = db.query('''
+ SELECT COUNT(*) as c FROM test_events WHERE status IN ('failed','flaked') AND dashboard = ?
+ AND timestamp >= ? AND timestamp <= ?
+ ''', (dashboard, date_from, date_to + 'T23:59:59'))
+ total_failures_count = db.query('''
+ SELECT COUNT(*) as c FROM test_events WHERE status='failed' AND dashboard = ?
+ AND timestamp >= ? AND timestamp <= ?
+ ''', (dashboard, date_from, date_to + 'T23:59:59'))
+ else:
+ flake_count = db.query('''
+ SELECT COUNT(*) as c FROM test_events WHERE status='flaked' AND timestamp >= ? AND timestamp <= ?
+ ''', (date_from, date_to + 'T23:59:59'))
+ total_tests = db.query('''
+ SELECT COUNT(*) as c FROM test_events WHERE status IN ('failed','flaked') AND timestamp >= ? AND timestamp <= ?
+ ''', (date_from, date_to + 'T23:59:59'))
+ total_failures_count = db.query('''
+ SELECT COUNT(*) as c FROM test_events WHERE status='failed' AND timestamp >= ? AND timestamp <= ?
+ ''', (date_from, date_to + 'T23:59:59'))
+
+ fc = flake_count[0]['c'] if flake_count else 0
+ tc = total_tests[0]['c'] if total_tests else 0
+ tfc = total_failures_count[0]['c'] if total_failures_count else 0
+
+ return _json({
+ 'by_date': by_date,
+ 'top_flakes': top_flakes,
+ 'top_failures': top_failures,
+ 'summary': {
+ 'total_runs': total,
+ 'pass_rate': round(100.0 * passed / max(total, 1), 1),
+ 'failure_rate': round(100.0 * failed / max(total, 1), 1),
+ 'avg_duration_mins': round(sum(durations) / len(durations), 1) if durations else None,
+ 'flake_rate': round(100.0 * fc / max(tc, 1), 1) if tc else 0,
+ 'total_flakes': fc,
+ 'total_test_failures': tfc,
+ },
+ })
+
+
+# ---- GitHub integration ----
+
+@app.route('/api/deployments/speed')
+@auth.login_required
+def api_deploy_speed():
+ date_from = request.args.get('from', (datetime.now() - timedelta(days=30)).strftime('%Y-%m-%d'))
+ date_to = request.args.get('to', datetime.now().strftime('%Y-%m-%d'))
+ workflow = request.args.get('workflow', '')
+ granularity = request.args.get('granularity', 'daily')
+ result = github_data.get_deployment_speed(date_from, date_to, workflow)
+ if granularity != 'daily' and result.get('by_date'):
+ result['by_date'] = _aggregate_dates(
+ result['by_date'], granularity,
+ sum_fields=['count', 'success', 'failure'],
+ avg_fields=['median_mins', 'p95_mins'])
+ return _json(result)
+
+
+@app.route('/api/branches/lag')
+@auth.login_required
+def api_branch_lag():
+ date_from = request.args.get('from', (datetime.now() - timedelta(days=30)).strftime('%Y-%m-%d'))
+ date_to = request.args.get('to', datetime.now().strftime('%Y-%m-%d'))
+ return _json(github_data.get_branch_lag(date_from, date_to))
+
+
+@app.route('/api/prs/metrics')
+@auth.login_required
+def api_pr_metrics():
+ date_from = request.args.get('from', (datetime.now() - timedelta(days=30)).strftime('%Y-%m-%d'))
+ date_to = request.args.get('to', datetime.now().strftime('%Y-%m-%d'))
+ author = request.args.get('author', '')
+ ts_from = int(datetime.strptime(date_from, '%Y-%m-%d').timestamp() * 1000)
+ ts_to = int((datetime.strptime(date_to, '%Y-%m-%d') + timedelta(days=1)).timestamp() * 1000)
+ ci_runs = metrics.get_ci_runs(r, ts_from, ts_to)
+ return _json(github_data.get_pr_metrics(date_from, date_to, author, ci_runs))
+
+
+@app.route('/api/merge-queue/stats')
+@auth.login_required
+def api_merge_queue_stats():
+ date_from = request.args.get('from', (datetime.now() - timedelta(days=30)).strftime('%Y-%m-%d'))
+ date_to = request.args.get('to', datetime.now().strftime('%Y-%m-%d'))
+ return _json(github_data.get_merge_queue_stats(date_from, date_to))
+
+
+@app.route('/api/ci/flakes-by-command')
+@auth.login_required
+def api_flakes_by_command():
+ date_from = request.args.get('from', (datetime.now() - timedelta(days=30)).strftime('%Y-%m-%d'))
+ date_to = request.args.get('to', datetime.now().strftime('%Y-%m-%d'))
+ dashboard = request.args.get('dashboard', '')
+ metrics.sync_failed_tests_to_sqlite(r)
+ return _json(metrics.get_flakes_by_command(date_from, date_to, dashboard))
+
+
+# ---- Test timings ----
+
+@app.route('/api/tests/timings')
+@auth.login_required
+def api_test_timings():
+ """Test timing statistics: duration by test command, with trends."""
+ date_from = request.args.get('from', (datetime.now() - timedelta(days=30)).strftime('%Y-%m-%d'))
+ date_to = request.args.get('to', datetime.now().strftime('%Y-%m-%d'))
+ dashboard = request.args.get('dashboard', '')
+ status = request.args.get('status', '') # filter to specific status
+ test_cmd = request.args.get('test_cmd', '') # filter to specific test
+
+ conditions = ['duration_secs IS NOT NULL', 'duration_secs > 0',
+ 'timestamp >= ?', "timestamp < ? || 'T23:59:59'"]
+ params = [date_from, date_to]
+
+ if dashboard:
+ conditions.append('dashboard = ?')
+ params.append(dashboard)
+ if status:
+ conditions.append('status = ?')
+ params.append(status)
+ if test_cmd:
+ conditions.append('test_cmd = ?')
+ params.append(test_cmd)
+
+ where = 'WHERE ' + ' AND '.join(conditions)
+
+ # Per-test stats
+ by_test = db.query(f'''
+ SELECT test_cmd,
+ COUNT(*) as count,
+ ROUND(AVG(duration_secs), 1) as avg_secs,
+ ROUND(MIN(duration_secs), 1) as min_secs,
+ ROUND(MAX(duration_secs), 1) as max_secs,
+ SUM(CASE WHEN status = 'passed' THEN 1 ELSE 0 END) as passed,
+ SUM(CASE WHEN status = 'failed' THEN 1 ELSE 0 END) as failed,
+ SUM(CASE WHEN status = 'flaked' THEN 1 ELSE 0 END) as flaked,
+ dashboard
+ FROM test_events {where}
+ GROUP BY test_cmd
+ ORDER BY count DESC
+ LIMIT 200
+ ''', params)
+
+ # Add pass rate
+ for row in by_test:
+ total = row['passed'] + row['failed'] + row['flaked']
+ row['pass_rate'] = round(100.0 * row['passed'] / max(total, 1), 1)
+ row['total_time_secs'] = round(row['avg_secs'] * row['count'], 0)
+
+ # Daily time series (aggregate across all tests or filtered test)
+ by_date = db.query(f'''
+ SELECT substr(timestamp, 1, 10) as date,
+ COUNT(*) as count,
+ ROUND(AVG(duration_secs), 1) as avg_secs,
+ ROUND(MAX(duration_secs), 1) as max_secs,
+ SUM(CASE WHEN status = 'passed' THEN 1 ELSE 0 END) as passed,
+ SUM(CASE WHEN status = 'failed' THEN 1 ELSE 0 END) as failed,
+ SUM(CASE WHEN status = 'flaked' THEN 1 ELSE 0 END) as flaked
+ FROM test_events {where}
+ GROUP BY substr(timestamp, 1, 10)
+ ORDER BY date
+ ''', params)
+
+ # Summary
+ summary_rows = db.query(f'''
+ SELECT COUNT(*) as count,
+ ROUND(AVG(duration_secs), 1) as avg_secs,
+ ROUND(MAX(duration_secs), 1) as max_secs,
+ SUM(duration_secs) as total_secs,
+ SUM(CASE WHEN status = 'passed' THEN 1 ELSE 0 END) as passed,
+ SUM(CASE WHEN status = 'failed' THEN 1 ELSE 0 END) as failed,
+ SUM(CASE WHEN status = 'flaked' THEN 1 ELSE 0 END) as flaked
+ FROM test_events {where}
+ ''', params)
+ s = summary_rows[0] if summary_rows else {}
+
+ # Slowest individual test runs
+ slowest = db.query(f'''
+ SELECT test_cmd, status, duration_secs, dashboard,
+ substr(timestamp, 1, 10) as date, commit_author, log_url
+ FROM test_events {where}
+ ORDER BY duration_secs DESC
+ LIMIT 50
+ ''', params)
+
+ return _json({
+ 'by_test': by_test,
+ 'by_date': by_date,
+ 'slowest': slowest,
+ 'summary': {
+ 'total_runs': s.get('count', 0),
+ 'avg_duration_secs': s.get('avg_secs'),
+ 'max_duration_secs': s.get('max_secs'),
+ 'total_compute_secs': round(s.get('total_secs', 0) or 0, 0),
+ 'passed': s.get('passed', 0),
+ 'failed': s.get('failed', 0),
+ 'flaked': s.get('flaked', 0),
+ },
+ })
+
+
+# ---- Dashboard views ----
+
+@app.route('/ci-health')
+@auth.login_required
+def ci_health():
+ return redirect('/ci-insights')
+
+
+@app.route('/ci-insights')
+@auth.login_required
+def ci_insights():
+ path = Path(__file__).parent / 'views' / 'ci-insights.html'
+ if path.exists():
+ return path.read_text()
+ return "Dashboard not found", 404
+
+
+@app.route('/cost-overview')
+@auth.login_required
+def cost_overview():
+ path = Path(__file__).parent / 'views' / 'cost-overview.html'
+ if path.exists():
+ return path.read_text()
+ return "Dashboard not found", 404
+
+
+@app.route('/test-timings')
+@auth.login_required
+def test_timings():
+ path = Path(__file__).parent / 'views' / 'test-timings.html'
+ if path.exists():
+ return path.read_text()
+ return "Dashboard not found", 404
+
+
+if __name__ == '__main__':
+ app.run(host='0.0.0.0', port=8081)
diff --git a/ci3/ci-metrics/billing/__init__.py b/ci3/ci-metrics/billing/__init__.py
new file mode 100644
index 000000000000..e097751047c2
--- /dev/null
+++ b/ci3/ci-metrics/billing/__init__.py
@@ -0,0 +1,14 @@
+"""Billing package: GKE namespace billing and AWS cost data."""
+
+from billing.gcp import (
+ get_billing_files_in_range,
+ aggregate_billing_weekly,
+ aggregate_billing_monthly,
+ serve_billing_dashboard,
+)
+from billing.aws import (
+ get_costs_overview,
+ get_aws_cost_details,
+ decode_branch_info,
+ decode_instance_name,
+)
diff --git a/ci3/ci-metrics/billing/aws.py b/ci3/ci-metrics/billing/aws.py
new file mode 100644
index 000000000000..481393d74ec3
--- /dev/null
+++ b/ci3/ci-metrics/billing/aws.py
@@ -0,0 +1,347 @@
+"""AWS Cost Explorer fetch with in-memory cache.
+
+Fetches on first request, caches for 6 hours. No SQLite, no background threads.
+"""
+import threading
+import time
+from datetime import datetime, timedelta, timezone
+
+SERVICE_CATEGORY_MAP = {
+ # Compute
+ 'Amazon Elastic Compute Cloud - Compute': 'ec2',
+ 'EC2 - Other': 'ec2', # EBS volumes, snapshots, NAT gateways, data transfer
+ 'Amazon Elastic Container Service': 'ecs',
+ 'Amazon Elastic Kubernetes Service': 'eks',
+ 'Amazon EC2 Container Registry (ECR)': 'ecr',
+ 'AWS Lambda': 'lambda',
+ 'Amazon Lightsail': 'lightsail',
+ # Storage
+ 'Amazon Simple Storage Service': 's3',
+ 'Amazon Elastic File System': 'efs',
+ 'Amazon Elastic Block Store': 'ebs',
+ 'Amazon ElastiCache': 'elasticache',
+ 'Amazon Relational Database Service': 'rds',
+ 'Amazon DynamoDB': 'dynamodb',
+ 'AWS Backup': 'backup',
+ # Networking
+ 'Amazon CloudFront': 'cloudfront',
+ 'CloudFront Flat-Rate Plans': 'cloudfront',
+ 'Amazon Virtual Private Cloud': 'vpc',
+ 'Elastic Load Balancing': 'elb',
+ 'Amazon Elastic Load Balancing': 'elb',
+ 'Amazon Route 53': 'route53',
+ 'Amazon API Gateway': 'apigateway',
+ 'AWS Data Transfer': 'data_transfer',
+ 'AWS Global Accelerator': 'global_accelerator',
+ # Monitoring & Security
+ 'AmazonCloudWatch': 'cloudwatch',
+ 'AWS CloudTrail': 'cloudtrail',
+ 'AWS Secrets Manager': 'secrets',
+ 'AWS Key Management Service': 'kms',
+ 'AWS WAF': 'waf',
+ 'AWS Config': 'config',
+ 'AWS Certificate Manager': 'acm',
+ # CI/CD & Dev Tools
+ 'AWS CodeBuild': 'codebuild',
+ 'AWS CodePipeline': 'codepipeline',
+ 'AWS CloudFormation': 'cloudformation',
+ 'AWS Amplify': 'amplify',
+ # Data & Analytics
+ 'AWS Glue': 'glue',
+ # IoT
+ 'AWS IoT': 'iot',
+ 'Amazon Location Service': 'location',
+ # Messaging
+ 'Amazon Simple Notification Service': 'sns',
+ 'Amazon Simple Queue Service': 'sqs',
+ # Other
+ 'Tax': 'tax',
+ 'AWS Support (Business)': 'support',
+ 'AWS Support (Enterprise)': 'support',
+ 'AWS Cost Explorer': 'cost_explorer',
+}
+
+import re
+
+_cache = {'rows': [], 'ts': 0}
+_cache_lock = threading.Lock()
+_detail_cache = {'rows': [], 'ts': 0}
+_detail_cache_lock = threading.Lock()
+_CACHE_TTL = 6 * 3600
+
+# Known job postfixes from ci.sh (these become INSTANCE_POSTFIX)
+_JOB_POSTFIXES = re.compile(
+ r'_(x[0-9]+-(?:full|fast)|a[0-9]+-(?:full|fast)|n-deploy-[0-9]+|grind-test-[a-f0-9]+)$'
+)
+_ARCH_SUFFIXES = ('_amd64', '_arm64', '_x86_64', '_aarch64')
+
+
+def decode_instance_name(run: dict) -> str:
+ """Reconstruct the EC2 instance name from CI run metadata.
+
+ bootstrap_ec2 naming:
+ merge queue: pr-{number}_{arch}[_{postfix}]
+ branch: {sanitized_branch}_{arch}[_{postfix}]
+ """
+ name = run.get('name', '')
+ pr = run.get('pr_number')
+ arch = run.get('arch', 'amd64')
+ # Normalize arch names
+ if arch in ('x86_64', 'amd64'):
+ arch = 'amd64'
+ elif arch in ('aarch64', 'arm64'):
+ arch = 'arm64'
+ job = run.get('job_id', '')
+
+ if '(queue)' in name and pr:
+ base = f'pr-{pr}_{arch}'
+ elif pr:
+ base = f'pr-{pr}_{arch}'
+ else:
+ # Replicate: echo -n "$REF_NAME" | head -c 50 | tr -c 'a-zA-Z0-9-' '_'
+ sanitized = re.sub(r'[^a-zA-Z0-9-]', '_', name[:50])
+ base = f'{sanitized}_{arch}'
+ if job:
+ return f'{base}_{job}'
+ return base
+
+
+def decode_branch_info(run: dict) -> dict:
+ """Extract branch/PR/user context from a CI run."""
+ name = run.get('name', '')
+ dashboard = run.get('dashboard', '')
+ pr = run.get('pr_number')
+ author = run.get('author', 'unknown')
+
+ if '(queue)' in name or dashboard == 'next':
+ run_type = 'merge-queue'
+ branch = name.replace(' (queue)', '')
+ elif dashboard == 'prs':
+ run_type = 'pr'
+ branch = name
+ elif dashboard in ('nightly', 'releases', 'network', 'deflake'):
+ run_type = dashboard
+ branch = name
+ else:
+ run_type = 'other'
+ branch = name
+
+ return {
+ 'type': run_type,
+ 'branch': branch,
+ 'pr_number': pr,
+ 'author': author,
+ 'instance_name': decode_instance_name(run),
+ }
+
+
+def _fetch_aws_costs(date_from: str, date_to: str) -> list[dict]:
+ try:
+ import boto3
+ except ImportError:
+ print("[rk_aws_costs] boto3 not installed, skipping")
+ return []
+
+ try:
+ client = boto3.client('ce', region_name='us-east-2')
+ rows = []
+ next_token = None
+
+ while True:
+ kwargs = dict(
+ TimePeriod={'Start': date_from, 'End': date_to},
+ Granularity='DAILY',
+ Metrics=['UnblendedCost'],
+ GroupBy=[{'Type': 'DIMENSION', 'Key': 'SERVICE'}],
+ )
+ if next_token:
+ kwargs['NextPageToken'] = next_token
+
+ response = client.get_cost_and_usage(**kwargs)
+
+ for result in response['ResultsByTime']:
+ date = result['TimePeriod']['Start']
+ for group in result['Groups']:
+ service = group['Keys'][0]
+ amount = float(group['Metrics']['UnblendedCost']['Amount'])
+ if amount == 0:
+ continue
+ category = SERVICE_CATEGORY_MAP.get(service, 'other')
+ if category == 'other':
+ print(f"[rk_aws_costs] unmapped service: {service!r} (${amount:.2f})")
+ rows.append({
+ 'date': date,
+ 'service': service,
+ 'category': category,
+ 'amount_usd': round(amount, 4),
+ })
+
+ next_token = response.get('NextPageToken')
+ if not next_token:
+ break
+
+ return rows
+ except Exception as e:
+ print(f"[rk_aws_costs] Error: {e}")
+ return []
+
+
+def _ensure_cached():
+ now = time.time()
+ if _cache['rows'] and now - _cache['ts'] < _CACHE_TTL:
+ return
+ if not _cache_lock.acquire(blocking=False):
+ return
+ try:
+ today = datetime.now(timezone.utc).date()
+ rows = _fetch_aws_costs(
+ (today - timedelta(days=365)).isoformat(),
+ today.isoformat(),
+ )
+ if rows:
+ _cache['rows'] = rows
+ _cache['ts'] = now
+ finally:
+ _cache_lock.release()
+
+
+def get_aws_costs(date_from: str, date_to: str) -> list[dict]:
+ """Get AWS costs for date range. Blocks on first fetch, async refresh after."""
+ if not _cache['rows']:
+ _ensure_cached() # block on first load so dashboard isn't empty
+ else:
+ threading.Thread(target=_ensure_cached, daemon=True).start()
+ return [r for r in _cache['rows'] if date_from <= r['date'] <= date_to]
+
+
+def _fetch_aws_cost_details(date_from: str, date_to: str) -> list[dict]:
+ """Fetch per-resource (USAGE_TYPE) cost breakdown from AWS Cost Explorer."""
+ try:
+ import boto3
+ except ImportError:
+ return []
+
+ try:
+ client = boto3.client('ce', region_name='us-east-2')
+ rows = []
+ next_token = None
+
+ while True:
+ kwargs = dict(
+ TimePeriod={'Start': date_from, 'End': date_to},
+ Granularity='DAILY',
+ Metrics=['UnblendedCost'],
+ GroupBy=[
+ {'Type': 'DIMENSION', 'Key': 'SERVICE'},
+ {'Type': 'DIMENSION', 'Key': 'USAGE_TYPE'},
+ ],
+ )
+ if next_token:
+ kwargs['NextPageToken'] = next_token
+
+ response = client.get_cost_and_usage(**kwargs)
+
+ for result in response['ResultsByTime']:
+ date = result['TimePeriod']['Start']
+ for group in result['Groups']:
+ service = group['Keys'][0]
+ usage_type = group['Keys'][1]
+ amount = float(group['Metrics']['UnblendedCost']['Amount'])
+ if amount == 0:
+ continue
+ category = SERVICE_CATEGORY_MAP.get(service, 'other')
+ rows.append({
+ 'date': date,
+ 'service': service,
+ 'usage_type': usage_type,
+ 'category': category,
+ 'amount_usd': round(amount, 4),
+ })
+
+ next_token = response.get('NextPageToken')
+ if not next_token:
+ break
+
+ return rows
+ except Exception as e:
+ print(f"[rk_aws_costs] Detail fetch error: {e}")
+ return []
+
+
+def _ensure_detail_cached():
+ now = time.time()
+ if _detail_cache['rows'] and now - _detail_cache['ts'] < _CACHE_TTL:
+ return
+ if not _detail_cache_lock.acquire(blocking=False):
+ return
+ try:
+ today = datetime.now(timezone.utc).date()
+ rows = _fetch_aws_cost_details(
+ (today - timedelta(days=365)).isoformat(),
+ today.isoformat(),
+ )
+ if rows:
+ _detail_cache['rows'] = rows
+ _detail_cache['ts'] = now
+ finally:
+ _detail_cache_lock.release()
+
+
+def get_aws_cost_details(date_from: str, date_to: str) -> list[dict]:
+ """Get per-resource AWS cost details. Blocks on first fetch, async refresh after."""
+ if not _detail_cache['rows']:
+ _ensure_detail_cached()
+ else:
+ threading.Thread(target=_ensure_detail_cached, daemon=True).start()
+ return [r for r in _detail_cache['rows'] if date_from <= r['date'] <= date_to]
+
+
+def get_costs_overview(date_from: str, date_to: str) -> dict:
+ """Combined AWS + GCP cost overview. GCP data comes from billing JSON files."""
+ aws_rows = get_aws_costs(date_from, date_to)
+
+ # GCP data from billing files (already on disk, no SQLite needed)
+ gcp_by_date = {}
+ try:
+ from billing.gcp import get_billing_files_in_range
+ billing_data = get_billing_files_in_range(
+ datetime.strptime(date_from, '%Y-%m-%d'),
+ datetime.strptime(date_to, '%Y-%m-%d'),
+ )
+ for entry in billing_data:
+ d = entry['date']
+ if d not in gcp_by_date:
+ gcp_by_date[d] = {}
+ for ns_data in entry.get('namespaces', {}).values():
+ for cat, amt in ns_data.get('breakdown', {}).items():
+ gcp_by_date[d][cat] = gcp_by_date[d].get(cat, 0) + amt
+ except Exception as e:
+ print(f"[rk_aws_costs] GCP billing read failed: {e}")
+
+ by_date = {}
+ for r in aws_rows:
+ d = r['date']
+ if d not in by_date:
+ by_date[d] = {'date': d, 'aws': {}, 'gcp': {}, 'aws_total': 0, 'gcp_total': 0}
+ cat = r['category']
+ by_date[d]['aws'][cat] = by_date[d]['aws'].get(cat, 0) + r['amount_usd']
+ by_date[d]['aws_total'] += r['amount_usd']
+
+ for d, cats in gcp_by_date.items():
+ if d not in by_date:
+ by_date[d] = {'date': d, 'aws': {}, 'gcp': {}, 'aws_total': 0, 'gcp_total': 0}
+ by_date[d]['gcp'] = cats
+ by_date[d]['gcp_total'] = sum(cats.values())
+
+ sorted_dates = sorted(by_date.values(), key=lambda x: x['date'])
+ aws_total = sum(d['aws_total'] for d in sorted_dates)
+ gcp_total = sum(d['gcp_total'] for d in sorted_dates)
+
+ return {
+ 'by_date': sorted_dates,
+ 'totals': {
+ 'aws': round(aws_total, 2),
+ 'gcp': round(gcp_total, 2),
+ 'combined': round(aws_total + gcp_total, 2),
+ }
+ }
diff --git a/ci3/ci-metrics/billing/billing-dashboard.html b/ci3/ci-metrics/billing/billing-dashboard.html
new file mode 100644
index 000000000000..87193ffae207
--- /dev/null
+++ b/ci3/ci-metrics/billing/billing-dashboard.html
@@ -0,0 +1,415 @@
+
+
+
+
+ ACI - Namespace Billing
+
+
+
+
+ namespace billing
+
+
+
+
+
+ |
+
+
+ |
+
+
+
+ |
+
+
+
+
+
+ |
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
diff --git a/ci3/ci-metrics/billing/explore.py b/ci3/ci-metrics/billing/explore.py
new file mode 100644
index 000000000000..c591d8c847ef
--- /dev/null
+++ b/ci3/ci-metrics/billing/explore.py
@@ -0,0 +1,619 @@
+#!/usr/bin/env python3
+"""CLI tool to explore GCP billing data from the Cloud Billing BigQuery export.
+
+Queries the actual billing export tables (not usage metering) to get real
+invoice-level costs. Caches results in SQLite for fast re-queries.
+
+Usage:
+ python billing_explore.py discover # find billing export tables
+ python billing_explore.py fetch [--months N] # fetch & cache billing data
+ python billing_explore.py monthly # show monthly totals
+ python billing_explore.py monthly --by service # monthly by service
+ python billing_explore.py monthly --by sku # monthly by SKU
+ python billing_explore.py monthly --by project # monthly by project
+ python billing_explore.py daily [--month 2024-12] # daily for a month
+ python billing_explore.py top [--month 2024-12] # top costs for a month
+ python billing_explore.py compare # compare billing export vs usage metering
+"""
+import argparse
+import os
+import sqlite3
+import sys
+from datetime import datetime, timedelta, timezone
+
+DB_PATH = os.path.join(os.getenv('LOGS_DISK_PATH', '/tmp'), 'billing_explore.db')
+
+SCHEMA = """
+CREATE TABLE IF NOT EXISTS gcp_billing (
+ date TEXT NOT NULL,
+ project_id TEXT NOT NULL DEFAULT '',
+ service TEXT NOT NULL DEFAULT '',
+ sku TEXT NOT NULL DEFAULT '',
+ cost REAL NOT NULL DEFAULT 0,
+ credits REAL NOT NULL DEFAULT 0,
+ usage_amount REAL NOT NULL DEFAULT 0,
+ usage_unit TEXT NOT NULL DEFAULT '',
+ currency TEXT NOT NULL DEFAULT 'USD',
+ fetched_at TEXT NOT NULL,
+ PRIMARY KEY (date, project_id, service, sku)
+);
+CREATE INDEX IF NOT EXISTS idx_gcp_billing_date ON gcp_billing(date);
+CREATE INDEX IF NOT EXISTS idx_gcp_billing_service ON gcp_billing(service);
+
+CREATE TABLE IF NOT EXISTS gcp_billing_meta (
+ key TEXT PRIMARY KEY,
+ value TEXT
+);
+"""
+
+
+def get_db():
+ os.makedirs(os.path.dirname(DB_PATH) or '.', exist_ok=True)
+ conn = sqlite3.connect(DB_PATH)
+ conn.execute('PRAGMA busy_timeout = 5000')
+ conn.row_factory = sqlite3.Row
+ conn.executescript(SCHEMA)
+ return conn
+
+
+def fmt_usd(v):
+ if v >= 1000:
+ return f'${v:,.0f}'
+ if v >= 1:
+ return f'${v:,.2f}'
+ return f'${v:,.4f}'
+
+
+# ---- BigQuery Discovery ----
+
+def cmd_discover(args):
+ """Find billing export tables in the project."""
+ from google.cloud import bigquery
+ project = args.project
+ client = bigquery.Client(project=project)
+
+ print(f'Listing datasets in project: {project}')
+ datasets = list(client.list_datasets())
+ if not datasets:
+ print(' No datasets found.')
+ return
+
+ for ds in datasets:
+ ds_id = ds.dataset_id
+ tables = list(client.list_tables(ds.reference))
+ billing_tables = [t for t in tables if 'billing' in t.table_id.lower() or 'cost' in t.table_id.lower()]
+ if billing_tables:
+ print(f'\n Dataset: {ds_id}')
+ for t in billing_tables:
+ full = f'{project}.{ds_id}.{t.table_id}'
+ print(f' {full}')
+ # Show schema for first billing table
+ tbl = client.get_table(t.reference)
+ print(f' rows: {tbl.num_rows}, size: {tbl.num_bytes / 1e6:.1f} MB')
+ print(f' columns: {", ".join(f.name for f in tbl.schema[:15])}')
+ else:
+ # Check for usage metering tables too
+ usage_tables = [t for t in tables if 'gke_cluster' in t.table_id.lower()]
+ if usage_tables:
+ print(f'\n Dataset: {ds_id} (usage metering)')
+ for t in usage_tables:
+ print(f' {project}.{ds_id}.{t.table_id}')
+
+ # Also try common billing export naming patterns
+ print(f'\n Trying common billing export table patterns...')
+ for ds in datasets:
+ for t in client.list_tables(ds.reference):
+ if t.table_id.startswith('gcp_billing_export'):
+ full = f'{project}.{ds.dataset_id}.{t.table_id}'
+ print(f' FOUND: {full}')
+
+
+# ---- BigQuery Fetch ----
+
+def cmd_fetch(args):
+ """Fetch billing data from BigQuery and cache in SQLite."""
+ from google.cloud import bigquery
+
+ table = args.table
+ project = args.project
+ months = args.months
+
+ if not table:
+ print('ERROR: --table is required. Run "discover" first to find the billing export table.')
+ print(' e.g. --table project.dataset.gcp_billing_export_resource_v1_XXXXXX')
+ sys.exit(1)
+
+ client = bigquery.Client(project=project)
+ end_date = datetime.now(timezone.utc).date()
+ start_date = end_date - timedelta(days=months * 31)
+
+ print(f'Fetching billing data from {start_date} to {end_date}')
+ print(f'Table: {table}')
+
+ # Query the billing export table
+ # The standard billing export has: billing_account_id, service.description,
+ # sku.description, usage_start_time, project.id, cost, credits, usage.amount, usage.unit
+ query = f"""
+ SELECT
+ DATE(usage_start_time) AS date,
+ COALESCE(project.id, '') AS project_id,
+ COALESCE(service.description, '') AS service,
+ COALESCE(sku.description, '') AS sku,
+ SUM(cost) AS cost,
+ SUM(IFNULL((SELECT SUM(c.amount) FROM UNNEST(credits) c), 0)) AS credits,
+ SUM(usage.amount) AS usage_amount,
+ MAX(usage.unit) AS usage_unit
+ FROM `{table}`
+ WHERE DATE(usage_start_time) BETWEEN @start_date AND @end_date
+ GROUP BY date, project_id, service, sku
+ HAVING ABS(cost) > 0.0001 OR ABS(credits) > 0.0001
+ ORDER BY date, service, sku
+ """
+
+ job_config = bigquery.QueryJobConfig(
+ query_parameters=[
+ bigquery.ScalarQueryParameter('start_date', 'DATE', start_date.isoformat()),
+ bigquery.ScalarQueryParameter('end_date', 'DATE', end_date.isoformat()),
+ ]
+ )
+
+ print('Running query...')
+ result = list(client.query(query, job_config=job_config).result())
+ print(f'Got {len(result)} rows')
+
+ if not result:
+ print('No data returned. Check table name and date range.')
+ return
+
+ # Store in SQLite
+ db = get_db()
+ now = datetime.now(timezone.utc).isoformat()
+
+ db.execute('DELETE FROM gcp_billing WHERE date >= ? AND date <= ?',
+ (start_date.isoformat(), end_date.isoformat()))
+
+ for row in result:
+ db.execute('''
+ INSERT OR REPLACE INTO gcp_billing
+ (date, project_id, service, sku, cost, credits, usage_amount, usage_unit, fetched_at)
+ VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?)
+ ''', (
+ row.date.isoformat() if hasattr(row.date, 'isoformat') else str(row.date),
+ row.project_id or '',
+ row.service or '',
+ row.sku or '',
+ float(row.cost or 0),
+ float(row.credits or 0),
+ float(row.usage_amount or 0),
+ row.usage_unit or '',
+ now,
+ ))
+
+ db.commit()
+ db.execute("INSERT OR REPLACE INTO gcp_billing_meta VALUES ('last_fetch', ?)", (now,))
+ db.execute("INSERT OR REPLACE INTO gcp_billing_meta VALUES ('table', ?)", (table,))
+ db.commit()
+
+ print(f'Cached {len(result)} rows in {DB_PATH}')
+
+ # Show quick summary
+ rows = db.execute('''
+ SELECT substr(date, 1, 7) as month, SUM(cost) as cost, SUM(credits) as credits
+ FROM gcp_billing GROUP BY month ORDER BY month
+ ''').fetchall()
+ print(f'\n{"Month":<10} {"Gross":>12} {"Credits":>12} {"Net":>12}')
+ print('-' * 48)
+ for r in rows:
+ net = r['cost'] + r['credits']
+ print(f'{r["month"]:<10} {fmt_usd(r["cost"]):>12} {fmt_usd(r["credits"]):>12} {fmt_usd(net):>12}')
+
+
+# ---- Reports ----
+
+def cmd_monthly(args):
+ """Show monthly totals."""
+ db = get_db()
+ group_by = args.by
+
+ if group_by == 'service':
+ rows = db.execute('''
+ SELECT substr(date, 1, 7) as month, service,
+ SUM(cost) as cost, SUM(credits) as credits
+ FROM gcp_billing GROUP BY month, service ORDER BY month, cost DESC
+ ''').fetchall()
+
+ current_month = None
+ for r in rows:
+ if r['month'] != current_month:
+ current_month = r['month']
+ month_total = sum(row['cost'] + row['credits'] for row in rows if row['month'] == current_month)
+ print(f'\n {current_month} (net: {fmt_usd(month_total)})')
+ print(f' {"Service":<45} {"Gross":>10} {"Credits":>10} {"Net":>10}')
+ print(' ' + '-' * 77)
+ net = r['cost'] + r['credits']
+ if abs(net) >= 0.01:
+ print(f' {r["service"]:<45} {fmt_usd(r["cost"]):>10} {fmt_usd(r["credits"]):>10} {fmt_usd(net):>10}')
+
+ elif group_by == 'sku':
+ month_filter = args.month
+ if not month_filter:
+ # Use most recent month
+ row = db.execute('SELECT MAX(substr(date, 1, 7)) as m FROM gcp_billing').fetchone()
+ month_filter = row['m'] if row else None
+
+ if not month_filter:
+ print('No data.')
+ return
+
+ rows = db.execute('''
+ SELECT service, sku, SUM(cost) as cost, SUM(credits) as credits,
+ SUM(usage_amount) as usage_amount, MAX(usage_unit) as usage_unit
+ FROM gcp_billing WHERE substr(date, 1, 7) = ?
+ GROUP BY service, sku ORDER BY cost DESC
+ ''', (month_filter,)).fetchall()
+
+ total = sum(r['cost'] + r['credits'] for r in rows)
+ print(f'\n {month_filter} (net: {fmt_usd(total)})')
+ print(f' {"Service":<30} {"SKU":<40} {"Net":>10} {"Usage":>15}')
+ print(' ' + '-' * 97)
+ for r in rows[:40]:
+ net = r['cost'] + r['credits']
+ if abs(net) >= 0.01:
+ usage = f'{r["usage_amount"]:.1f} {r["usage_unit"]}' if r['usage_amount'] else ''
+ print(f' {r["service"][:29]:<30} {r["sku"][:39]:<40} {fmt_usd(net):>10} {usage:>15}')
+
+ elif group_by == 'project':
+ rows = db.execute('''
+ SELECT substr(date, 1, 7) as month, project_id,
+ SUM(cost) as cost, SUM(credits) as credits
+ FROM gcp_billing GROUP BY month, project_id ORDER BY month, cost DESC
+ ''').fetchall()
+
+ current_month = None
+ for r in rows:
+ if r['month'] != current_month:
+ current_month = r['month']
+ month_total = sum(row['cost'] + row['credits'] for row in rows if row['month'] == current_month)
+ print(f'\n {current_month} (net: {fmt_usd(month_total)})')
+ print(f' {"Project":<45} {"Net":>12}')
+ print(' ' + '-' * 59)
+ net = r['cost'] + r['credits']
+ if abs(net) >= 0.01:
+ print(f' {r["project_id"]:<45} {fmt_usd(net):>12}')
+
+ else:
+ # Default: just monthly totals
+ rows = db.execute('''
+ SELECT substr(date, 1, 7) as month,
+ SUM(cost) as cost, SUM(credits) as credits,
+ COUNT(DISTINCT date) as days
+ FROM gcp_billing GROUP BY month ORDER BY month
+ ''').fetchall()
+
+ print(f'\n {"Month":<10} {"Gross":>12} {"Credits":>12} {"Net":>12} {"Days":>6} {"Daily Avg":>12}')
+ print(' ' + '-' * 68)
+ grand_total = 0
+ for r in rows:
+ net = r['cost'] + r['credits']
+ daily = net / max(r['days'], 1)
+ grand_total += net
+ print(f' {r["month"]:<10} {fmt_usd(r["cost"]):>12} {fmt_usd(r["credits"]):>12} {fmt_usd(net):>12} {r["days"]:>6} {fmt_usd(daily):>12}')
+ print(' ' + '-' * 68)
+ print(f' {"TOTAL":<10} {"":>12} {"":>12} {fmt_usd(grand_total):>12}')
+
+
+def cmd_daily(args):
+ """Show daily costs for a month."""
+ db = get_db()
+ month = args.month
+ if not month:
+ row = db.execute('SELECT MAX(substr(date, 1, 7)) as m FROM gcp_billing').fetchone()
+ month = row['m'] if row else None
+
+ if not month:
+ print('No data.')
+ return
+
+ rows = db.execute('''
+ SELECT date, SUM(cost) as cost, SUM(credits) as credits
+ FROM gcp_billing WHERE substr(date, 1, 7) = ?
+ GROUP BY date ORDER BY date
+ ''', (month,)).fetchall()
+
+ total = 0
+ print(f'\n {"Date":<12} {"Gross":>10} {"Credits":>10} {"Net":>10}')
+ print(' ' + '-' * 44)
+ for r in rows:
+ net = r['cost'] + r['credits']
+ total += net
+ print(f' {r["date"]:<12} {fmt_usd(r["cost"]):>10} {fmt_usd(r["credits"]):>10} {fmt_usd(net):>10}')
+ print(' ' + '-' * 44)
+ print(f' {"TOTAL":<12} {"":>10} {"":>10} {fmt_usd(total):>10}')
+
+
+def cmd_top(args):
+ """Show top cost items for a month."""
+ db = get_db()
+ month = args.month
+ if not month:
+ row = db.execute('SELECT MAX(substr(date, 1, 7)) as m FROM gcp_billing').fetchone()
+ month = row['m'] if row else None
+
+ if not month:
+ print('No data.')
+ return
+
+ # Top services
+ services = db.execute('''
+ SELECT service, SUM(cost + credits) as net, SUM(cost) as gross
+ FROM gcp_billing WHERE substr(date, 1, 7) = ?
+ GROUP BY service ORDER BY net DESC LIMIT 15
+ ''', (month,)).fetchall()
+
+ total = sum(r['net'] for r in services)
+ print(f'\n Top services for {month} (total: {fmt_usd(total)})')
+ print(f' {"Service":<45} {"Net":>12} {"% of Total":>10}')
+ print(' ' + '-' * 69)
+ for r in services:
+ pct = 100 * r['net'] / max(total, 0.01)
+ if abs(r['net']) >= 0.01:
+ print(f' {r["service"]:<45} {fmt_usd(r["net"]):>12} {pct:>9.1f}%')
+
+ # Top SKUs
+ skus = db.execute('''
+ SELECT service, sku, SUM(cost + credits) as net
+ FROM gcp_billing WHERE substr(date, 1, 7) = ?
+ GROUP BY service, sku ORDER BY net DESC LIMIT 20
+ ''', (month,)).fetchall()
+
+ print(f'\n Top SKUs for {month}')
+ print(f' {"Service":<25} {"SKU":<40} {"Net":>12}')
+ print(' ' + '-' * 79)
+ for r in skus:
+ if abs(r['net']) >= 0.01:
+ print(f' {r["service"][:24]:<25} {r["sku"][:39]:<40} {fmt_usd(r["net"]):>12}')
+
+
+def cmd_compare(args):
+ """Compare billing export data vs usage metering estimates."""
+ db = get_db()
+
+ # Get billing export monthly totals
+ billing_rows = db.execute('''
+ SELECT substr(date, 1, 7) as month, SUM(cost + credits) as net
+ FROM gcp_billing GROUP BY month ORDER BY month
+ ''').fetchall()
+
+ if not billing_rows:
+ print('No billing export data cached. Run "fetch" first.')
+ return
+
+ # Get usage metering estimates
+ try:
+ from billing import gcp as _gcp_billing
+ _gcp_billing._ensure_cached()
+ metering_data = _gcp_billing._cache.get('data', [])
+ except Exception as e:
+ print(f'Could not load usage metering data: {e}')
+ metering_data = []
+
+ metering_monthly = {}
+ for entry in metering_data:
+ month = entry['date'][:7]
+ day_total = sum(ns.get('total', 0) for ns in entry.get('namespaces', {}).values())
+ metering_monthly[month] = metering_monthly.get(month, 0) + day_total
+
+ print(f'\n {"Month":<10} {"Billing Export":>15} {"Usage Metering":>15} {"Ratio":>8}')
+ print(' ' + '-' * 50)
+ for r in billing_rows:
+ billing = r['net']
+ metering = metering_monthly.get(r['month'], 0)
+ ratio = f'{billing / metering:.2f}x' if metering > 0 else '--'
+ print(f' {r["month"]:<10} {fmt_usd(billing):>15} {fmt_usd(metering):>15} {ratio:>8}')
+
+
+def cmd_status(args):
+ """Show what data we have cached."""
+ db = get_db()
+ meta = {r['key']: r['value'] for r in db.execute('SELECT * FROM gcp_billing_meta').fetchall()}
+ billing_count = db.execute('SELECT COUNT(*) as c FROM gcp_billing').fetchone()['c']
+ billing_range = db.execute('SELECT MIN(date) as mn, MAX(date) as mx FROM gcp_billing').fetchone()
+
+ print(f'\n Billing export cache:')
+ print(f' DB path: {DB_PATH}')
+ print(f' Table: {meta.get("table", "(not set)")}')
+ print(f' Last fetch: {meta.get("last_fetch", "(never)")}')
+ print(f' Rows: {billing_count}')
+ if billing_count:
+ print(f' Date range: {billing_range["mn"]} to {billing_range["mx"]}')
+
+ # Also check billing export table status
+ try:
+ from google.cloud import bigquery
+ client = bigquery.Client(project=args.project)
+ table_id = 'testnet-440309.testnet440309billing.gcp_billing_export_v1_01EA8B_291C89_753ABC'
+ t = client.get_table(table_id)
+ print(f'\n BigQuery billing export:')
+ print(f' Table: {table_id}')
+ print(f' Rows: {t.num_rows}')
+ print(f' Modified: {t.modified}')
+ if t.num_rows > 0:
+ print(f' STATUS: Data available! Run "fetch --table {table_id}" to cache it.')
+ else:
+ print(f' STATUS: Not yet populated. GCP takes up to 24h after enabling export.')
+ except Exception as e:
+ print(f'\n BigQuery check failed: {e}')
+
+
+def cmd_metering(args):
+ """Query both usage metering tables and compare with different approaches."""
+ from google.cloud import bigquery
+ project = args.project
+ client = bigquery.Client(project=project)
+ months = args.months
+
+ end_date = datetime.now(timezone.utc).date()
+ start_date = end_date - timedelta(days=months * 31)
+
+ # Table names
+ usage_table = f'{project}.egress_consumption.gke_cluster_resource_usage'
+ consumption_table = f'{project}.egress_consumption.gke_cluster_resource_consumption'
+
+ print(f'Date range: {start_date} to {end_date}')
+
+ # 1. Current approach: usage table with our SKU pricing
+ print('\n=== Approach 1: gke_cluster_resource_usage (requests) with hardcoded SKU prices ===')
+ _query_metering_table(client, usage_table, start_date, end_date, 'REQUESTS')
+
+ # 2. Consumption table with our SKU pricing
+ print('\n=== Approach 2: gke_cluster_resource_consumption (actual) with hardcoded SKU prices ===')
+ _query_metering_table(client, consumption_table, start_date, end_date, 'CONSUMPTION')
+
+ # 3. Raw totals: what does each table report?
+ print('\n=== Approach 3: Raw resource totals from both tables ===')
+ for tname, label in [(usage_table, 'REQUESTS'), (consumption_table, 'CONSUMPTION')]:
+ query = f"""
+ SELECT
+ FORMAT_DATE('%Y-%m', DATE(start_time)) AS month,
+ resource_name,
+ SUM(usage.amount) AS total_amount,
+ usage.unit
+ FROM `{tname}`
+ WHERE DATE(start_time) BETWEEN @start AND @end
+ GROUP BY month, resource_name, usage.unit
+ ORDER BY month, resource_name
+ """
+ job_config = bigquery.QueryJobConfig(query_parameters=[
+ bigquery.ScalarQueryParameter('start', 'DATE', start_date.isoformat()),
+ bigquery.ScalarQueryParameter('end', 'DATE', end_date.isoformat()),
+ ])
+ rows = list(client.query(query, job_config=job_config).result())
+ print(f'\n {label} table raw resources:')
+ print(f' {"Month":<10} {"Resource":<20} {"Amount":>20} {"Unit":<15}')
+ print(' ' + '-' * 67)
+ for r in rows:
+ print(f' {r.month:<10} {r.resource_name:<20} {r.total_amount:>20,.0f} {r.unit:<15}')
+
+ # 4. Count distinct SKUs
+ print('\n=== Approach 4: Distinct SKUs in usage table ===')
+ query = f"""
+ SELECT sku_id, resource_name, COUNT(*) as row_count,
+ SUM(usage.amount) as total_amount, usage.unit
+ FROM `{usage_table}`
+ WHERE DATE(start_time) BETWEEN @start AND @end
+ GROUP BY sku_id, resource_name, usage.unit
+ ORDER BY total_amount DESC
+ """
+ job_config = bigquery.QueryJobConfig(query_parameters=[
+ bigquery.ScalarQueryParameter('start', 'DATE', start_date.isoformat()),
+ bigquery.ScalarQueryParameter('end', 'DATE', end_date.isoformat()),
+ ])
+ rows = list(client.query(query, job_config=job_config).result())
+ # Import pricing to check
+ from billing.gcp import _SKU_PRICING
+ print(f' {"SKU ID":<20} {"Resource":<20} {"Rows":>10} {"Amount":>18} {"Unit":<12} {"Known?"}')
+ print(' ' + '-' * 90)
+ for r in rows:
+ known = 'YES' if r.sku_id in _SKU_PRICING else 'MISSING'
+ print(f' {r.sku_id:<20} {r.resource_name:<20} {r.row_count:>10,} {r.total_amount:>18,.0f} {r.unit:<12} {known}')
+
+
+def _query_metering_table(client, table, start_date, end_date, label):
+ """Query a metering table and compute costs using our SKU pricing."""
+ from google.cloud import bigquery
+ from billing.gcp import _SKU_PRICING, _usage_to_cost
+
+ query = f"""
+ SELECT
+ FORMAT_DATE('%Y-%m', DATE(start_time)) AS month,
+ namespace,
+ sku_id,
+ resource_name,
+ SUM(usage.amount) AS total_usage
+ FROM `{table}`
+ WHERE DATE(start_time) BETWEEN @start AND @end
+ GROUP BY month, namespace, sku_id, resource_name
+ ORDER BY month, namespace
+ """
+ job_config = bigquery.QueryJobConfig(query_parameters=[
+ bigquery.ScalarQueryParameter('start', 'DATE', start_date.isoformat()),
+ bigquery.ScalarQueryParameter('end', 'DATE', end_date.isoformat()),
+ ])
+ rows = list(client.query(query, job_config=job_config).result())
+
+ monthly = {}
+ monthly_by_cat = {}
+ missing_skus = set()
+ for r in rows:
+ cost, category = _usage_to_cost(r.sku_id, r.resource_name, float(r.total_usage))
+ if r.sku_id not in _SKU_PRICING:
+ missing_skus.add(r.sku_id)
+ month = r.month
+ monthly[month] = monthly.get(month, 0) + cost
+ key = (month, category)
+ monthly_by_cat[key] = monthly_by_cat.get(key, 0) + cost
+
+ print(f' {"Month":<10} {"Total":>12} {"compute_spot":>14} {"compute_od":>14} {"network":>10} {"storage":>10}')
+ print(' ' + '-' * 74)
+ for month in sorted(monthly.keys()):
+ total = monthly[month]
+ spot = monthly_by_cat.get((month, 'compute_spot'), 0)
+ od = monthly_by_cat.get((month, 'compute_ondemand'), 0)
+ net = monthly_by_cat.get((month, 'network'), 0)
+ stor = monthly_by_cat.get((month, 'storage'), 0)
+ print(f' {month:<10} {fmt_usd(total):>12} {fmt_usd(spot):>14} {fmt_usd(od):>14} {fmt_usd(net):>10} {fmt_usd(stor):>10}')
+
+ if missing_skus:
+ print(f'\n WARNING: {len(missing_skus)} unknown SKU IDs (not priced): {", ".join(sorted(missing_skus)[:5])}...')
+
+
+# ---- Main ----
+
+def main():
+ parser = argparse.ArgumentParser(description='Explore GCP billing data')
+ parser.add_argument('--project', default='testnet-440309', help='GCP project ID')
+ parser.add_argument('--table', default='', help='BigQuery billing export table')
+ sub = parser.add_subparsers(dest='command')
+
+ sub.add_parser('discover', help='Find billing export tables')
+
+ fetch_p = sub.add_parser('fetch', help='Fetch billing data from BigQuery')
+ fetch_p.add_argument('--months', type=int, default=6, help='How many months back to fetch')
+
+ monthly_p = sub.add_parser('monthly', help='Monthly totals')
+ monthly_p.add_argument('--by', choices=['service', 'sku', 'project'], default='', help='Group by')
+ monthly_p.add_argument('--month', default='', help='Filter to month (YYYY-MM)')
+
+ daily_p = sub.add_parser('daily', help='Daily costs')
+ daily_p.add_argument('--month', default='', help='Month to show (YYYY-MM)')
+
+ top_p = sub.add_parser('top', help='Top cost items')
+ top_p.add_argument('--month', default='', help='Month to show (YYYY-MM)')
+
+ sub.add_parser('compare', help='Compare billing export vs usage metering')
+ sub.add_parser('status', help='Show data status (what we have cached)')
+
+ meter_p = sub.add_parser('metering', help='Query both metering tables directly and compare')
+ meter_p.add_argument('--months', type=int, default=6, help='How many months back')
+
+ args = parser.parse_args()
+
+ if not args.command:
+ parser.print_help()
+ sys.exit(1)
+
+ cmds = {
+ 'discover': cmd_discover,
+ 'fetch': cmd_fetch,
+ 'monthly': cmd_monthly,
+ 'daily': cmd_daily,
+ 'top': cmd_top,
+ 'compare': cmd_compare,
+ 'metering': cmd_metering,
+ 'status': cmd_status,
+ }
+ cmds[args.command](args)
+
+
+if __name__ == '__main__':
+ main()
diff --git a/ci3/ci-metrics/billing/fetch_billing.py b/ci3/ci-metrics/billing/fetch_billing.py
new file mode 100644
index 000000000000..271a788fc6bd
--- /dev/null
+++ b/ci3/ci-metrics/billing/fetch_billing.py
@@ -0,0 +1,262 @@
+#!/usr/bin/env python3
+"""Fetch namespace billing data from GKE resource consumption metering in BigQuery.
+
+Queries the GKE cluster resource consumption table which records CPU and memory
+usage per namespace per pod. Actual GCP SKU prices (from the Cloud Billing
+Catalog API) are applied to convert resource usage into dollar costs.
+
+Categories produced:
+ - compute_spot (Spot / Preemptible VM cores + RAM)
+ - compute_ondemand (On-demand VM cores + RAM)
+
+Usage:
+ # Fetch last 30 days
+ python fetch-billing.py
+
+ # Specific range
+ python fetch-billing.py --from 2026-01-01 --to 2026-01-31
+
+ # Custom output directory
+ python fetch-billing.py --output-dir /tmp/billing
+
+Environment:
+ Requires Application Default Credentials or GOOGLE_APPLICATION_CREDENTIALS.
+ pip install google-cloud-bigquery
+"""
+import argparse
+import json
+import os
+import sys
+from datetime import datetime, timedelta
+
+from google.cloud import bigquery
+
+# ---- defaults ----
+DEFAULT_PROJECT = 'testnet-440309'
+DEFAULT_DATASET = 'egress_consumption'
+DEFAULT_TABLE_CONSUMPTION = 'gke_cluster_resource_consumption'
+DEFAULT_TABLE_USAGE = 'gke_cluster_resource_usage'
+DEFAULT_OUTPUT_DIR = os.path.join(
+ os.getenv('LOGS_DISK_PATH', '/logs-disk'), 'billing'
+)
+
+# ---- SKU pricing ----
+# Prices sourced from GCP Cloud Billing Catalog API for us-west1.
+SKU_PRICING = {
+ # Compute - Spot (per vCPU-hour / per GiB-hour)
+ 'E7FF-A0FB-FA82': {'price': 0.00497, 'resource': 'cpu', 'category': 'compute_spot'},
+ '48AB-89F5-9112': {'price': 0.000668, 'resource': 'memory', 'category': 'compute_spot'},
+ # Compute - On-demand T2D
+ 'EFE6-E23C-19CB': {'price': 0.027502, 'resource': 'cpu', 'category': 'compute_ondemand'},
+ 'FB05-036A-8982': {'price': 0.003686, 'resource': 'memory', 'category': 'compute_ondemand'},
+ # Compute - On-demand N2
+ 'BB77-5FDA-69D9': {'price': 0.031611, 'resource': 'cpu', 'category': 'compute_ondemand'},
+ '5B01-D157-A097': {'price': 0.004237, 'resource': 'memory', 'category': 'compute_ondemand'},
+ # Compute - On-demand N2D
+ 'A03E-E620-7389': {'price': 0.027502, 'resource': 'cpu', 'category': 'compute_ondemand'},
+ '5535-6D2D-4B50': {'price': 0.003686, 'resource': 'memory', 'category': 'compute_ondemand'},
+ # Network Egress (per GiB)
+ '0C3C-6B13-B1E8': {'price': 0.02, 'resource': 'networkEgress', 'category': 'network'},
+ '6B8F-E63D-832B': {'price': 0.0, 'resource': 'networkEgress', 'category': 'network'},
+ '92CB-C25F-B1D1': {'price': 0.0, 'resource': 'networkEgress', 'category': 'network'},
+ '984A-1F27-2D1F': {'price': 0.04, 'resource': 'networkEgress', 'category': 'network'},
+ '9DE9-9092-B3BC': {'price': 0.20, 'resource': 'networkEgress', 'category': 'network'},
+ 'C863-37DA-506E': {'price': 0.02, 'resource': 'networkEgress', 'category': 'network'},
+ 'C8EA-1A86-3D28': {'price': 0.02, 'resource': 'networkEgress', 'category': 'network'},
+ 'DE9E-AFBC-A15A': {'price': 0.01, 'resource': 'networkEgress', 'category': 'network'},
+ 'DFA5-B5C6-36D6': {'price': 0.085, 'resource': 'networkEgress', 'category': 'network'},
+ 'F274-1692-F213': {'price': 0.08, 'resource': 'networkEgress', 'category': 'network'},
+ 'FDBC-6E3B-D4D8': {'price': 0.15, 'resource': 'networkEgress', 'category': 'network'},
+ # Storage (per GiB-month)
+ 'D973-5D65-BAB2': {'price': 0.04, 'resource': 'storage', 'category': 'storage'},
+}
+
+
+def usage_to_cost(sku_id: str, resource_name: str, amount: float) -> tuple[float, str]:
+ """Convert raw usage amount to dollar cost. Returns (cost_usd, category)."""
+ info = SKU_PRICING.get(sku_id)
+ if not info:
+ return 0.0, 'other'
+
+ price = info['price']
+ if resource_name == 'cpu':
+ return (amount / 3600.0) * price, info['category']
+ elif resource_name == 'memory':
+ return (amount / 3600.0 / (1024 ** 3)) * price, info['category']
+ elif resource_name.startswith('networkEgress'):
+ return (amount / (1024 ** 3)) * price, info['category']
+ elif resource_name == 'storage':
+ gib_months = amount / (1024 ** 3) / (730 * 3600)
+ return gib_months * price, info['category']
+ return 0.0, info['category']
+
+
+# ---- BigQuery query ----
+
+def fetch_usage_rows(
+ client: bigquery.Client,
+ project: str,
+ dataset: str,
+ date_from: str,
+ date_to: str,
+) -> list[dict]:
+ """Query both metering tables for daily usage by namespace + SKU."""
+ consumption = f'{project}.{dataset}.{DEFAULT_TABLE_CONSUMPTION}'
+ usage = f'{project}.{dataset}.{DEFAULT_TABLE_USAGE}'
+ query = f"""
+ SELECT date, namespace, sku_id, resource_name, SUM(total_usage) AS total_usage FROM (
+ SELECT DATE(start_time) AS date, namespace, sku_id, resource_name, SUM(usage.amount) AS total_usage
+ FROM `{consumption}`
+ WHERE DATE(start_time) BETWEEN @date_from AND @date_to
+ GROUP BY date, namespace, sku_id, resource_name
+ UNION ALL
+ SELECT DATE(start_time) AS date, namespace, sku_id, resource_name, SUM(usage.amount) AS total_usage
+ FROM `{usage}`
+ WHERE DATE(start_time) BETWEEN @date_from AND @date_to
+ AND resource_name IN ('networkEgress', 'storage')
+ GROUP BY date, namespace, sku_id, resource_name
+ )
+ GROUP BY date, namespace, sku_id, resource_name
+ ORDER BY date, namespace
+ """
+ job_config = bigquery.QueryJobConfig(
+ query_parameters=[
+ bigquery.ScalarQueryParameter('date_from', 'DATE', date_from),
+ bigquery.ScalarQueryParameter('date_to', 'DATE', date_to),
+ ]
+ )
+ rows = client.query(query, job_config=job_config).result()
+ return [dict(row) for row in rows]
+
+
+# ---- aggregate into daily JSON ----
+
+def build_daily_files(rows: list[dict]) -> tuple[dict[str, dict], set[str]]:
+ """Convert raw usage rows into daily billing JSON structures.
+
+ Returns (days_dict, unknown_skus).
+ """
+ days: dict[str, dict] = {}
+ unknown_skus: set[str] = set()
+
+ for row in rows:
+ date_str = (
+ row['date'].isoformat()
+ if hasattr(row['date'], 'isoformat')
+ else str(row['date'])
+ )
+ ns = row['namespace']
+ sku_id = row['sku_id']
+ resource_name = row['resource_name']
+ amount = float(row['total_usage'])
+
+ cost, category = usage_to_cost(sku_id, resource_name, amount)
+
+ if sku_id not in SKU_PRICING:
+ unknown_skus.add(sku_id)
+
+ if cost <= 0:
+ continue
+
+ if date_str not in days:
+ days[date_str] = {'date': date_str, 'namespaces': {}}
+ if ns not in days[date_str]['namespaces']:
+ days[date_str]['namespaces'][ns] = {'total': 0, 'breakdown': {}}
+
+ entry = days[date_str]['namespaces'][ns]
+ entry['breakdown'][category] = (
+ entry['breakdown'].get(category, 0) + cost
+ )
+ entry['total'] += cost
+
+ # Round
+ for day in days.values():
+ for ns_data in day['namespaces'].values():
+ ns_data['total'] = round(ns_data['total'], 4)
+ ns_data['breakdown'] = {
+ k: round(v, 4) for k, v in ns_data['breakdown'].items()
+ }
+
+ return days, unknown_skus
+
+
+def write_files(days: dict[str, dict], output_dir: str) -> int:
+ os.makedirs(output_dir, exist_ok=True)
+ count = 0
+ for date_str, data in sorted(days.items()):
+ filepath = os.path.join(output_dir, f'{date_str}.json')
+ with open(filepath, 'w') as f:
+ json.dump(data, f, indent=2)
+ count += 1
+ return count
+
+
+# ---- CLI ----
+
+def main():
+ parser = argparse.ArgumentParser(
+ description='Fetch GKE namespace compute billing from resource consumption metering'
+ )
+ today = datetime.utcnow().strftime('%Y-%m-%d')
+ default_from = (datetime.utcnow() - timedelta(days=30)).strftime('%Y-%m-%d')
+
+ parser.add_argument('--from', dest='date_from', default=default_from,
+ help='Start date YYYY-MM-DD (default: 30 days ago)')
+ parser.add_argument('--to', dest='date_to', default=today,
+ help='End date YYYY-MM-DD (default: today)')
+ parser.add_argument('--project', default=DEFAULT_PROJECT,
+ help=f'GCP project ID (default: {DEFAULT_PROJECT})')
+ parser.add_argument('--dataset', default=DEFAULT_DATASET,
+ help=f'BigQuery dataset (default: {DEFAULT_DATASET})')
+ parser.add_argument('--output-dir', default=DEFAULT_OUTPUT_DIR,
+ help=f'Output directory (default: {DEFAULT_OUTPUT_DIR})')
+ args = parser.parse_args()
+
+ print(f'Connecting to BigQuery ({args.project})...')
+ client = bigquery.Client(project=args.project)
+
+ print(f'Fetching metering data {args.date_from} to {args.date_to}...')
+ print(f' consumption: {args.project}.{args.dataset}.{DEFAULT_TABLE_CONSUMPTION}')
+ print(f' usage: {args.project}.{args.dataset}.{DEFAULT_TABLE_USAGE}')
+ rows = fetch_usage_rows(
+ client, args.project, args.dataset,
+ args.date_from, args.date_to,
+ )
+ print(f'Got {len(rows)} aggregated rows')
+
+ if not rows:
+ print('No metering data found. Check that:')
+ print(' 1. GKE resource consumption metering is enabled')
+ print(' 2. The date range has data')
+ return
+
+ days, unknown_skus = build_daily_files(rows)
+ count = write_files(days, args.output_dir)
+ print(f'Wrote {count} daily billing files to {args.output_dir}')
+
+ if unknown_skus:
+ print(f'\nWARNING: {len(unknown_skus)} unknown SKU(s) had zero cost assigned:')
+ for s in sorted(unknown_skus):
+ print(f' {s}')
+ print('Add these to SKU_PRICING in fetch-billing.py with prices from')
+ print('the GCP Cloud Billing Catalog API.')
+
+ # Summary
+ total = sum(
+ ns['total'] for day in days.values()
+ for ns in day['namespaces'].values()
+ )
+ ns_set: set[str] = set()
+ cat_set: set[str] = set()
+ for day in days.values():
+ for ns_name, ns_data in day['namespaces'].items():
+ ns_set.add(ns_name)
+ cat_set.update(ns_data['breakdown'].keys())
+
+ print(f'\nTotal cost: ${total:,.2f}')
+ print(f'Namespaces ({len(ns_set)}): {sorted(ns_set)}')
+ print(f'Categories: {sorted(cat_set)}')
+
+
+if __name__ == '__main__':
+ main()
diff --git a/ci3/ci-metrics/billing/gcp.py b/ci3/ci-metrics/billing/gcp.py
new file mode 100644
index 000000000000..5254e20bbbf0
--- /dev/null
+++ b/ci3/ci-metrics/billing/gcp.py
@@ -0,0 +1,289 @@
+"""Namespace billing helpers for rkapp.
+
+Fetches GKE namespace billing from BigQuery with in-memory cache.
+Route definitions remain in rk.py; this module provides the logic.
+
+SKU pricing: Queries the Cloud Billing pricing export table in BigQuery
+if available, otherwise falls back to hardcoded rates. To enable the
+pricing export:
+ 1. Go to GCP Console > Billing > Billing export
+ 2. Enable "Detailed usage cost" and "Pricing" exports
+ 3. Set the dataset to the _BQ_DATASET below
+"""
+import threading
+import time
+from datetime import datetime, timedelta, timezone
+from pathlib import Path
+
+# BigQuery defaults
+_BQ_PROJECT = 'testnet-440309'
+_BQ_DATASET = 'egress_consumption'
+_BQ_TABLE_USAGE = 'gke_cluster_resource_usage'
+_BQ_TABLE_PRICING = 'cloud_pricing_export'
+
+# Hardcoded fallback SKU pricing (us-west1).
+# cpu: price per vCPU-hour, memory: price per GiB-hour
+# network: price per GiB, storage: price per GiB-month
+_HARDCODED_SKU_PRICING = {
+ # Compute - Spot
+ 'E7FF-A0FB-FA82': {'price': 0.00497, 'resource': 'cpu', 'category': 'compute_spot'},
+ '48AB-89F5-9112': {'price': 0.000668, 'resource': 'memory', 'category': 'compute_spot'},
+ # Compute - On-demand T2D
+ 'EFE6-E23C-19CB': {'price': 0.027502, 'resource': 'cpu', 'category': 'compute_ondemand'},
+ 'FB05-036A-8982': {'price': 0.003686, 'resource': 'memory', 'category': 'compute_ondemand'},
+ # Compute - On-demand N2
+ 'BB77-5FDA-69D9': {'price': 0.031611, 'resource': 'cpu', 'category': 'compute_ondemand'},
+ '5B01-D157-A097': {'price': 0.004237, 'resource': 'memory', 'category': 'compute_ondemand'},
+ # Compute - On-demand N2D
+ 'A03E-E620-7389': {'price': 0.027502, 'resource': 'cpu', 'category': 'compute_ondemand'},
+ '5535-6D2D-4B50': {'price': 0.003686, 'resource': 'memory', 'category': 'compute_ondemand'},
+ # Network Egress (price per GiB)
+ '0C3C-6B13-B1E8': {'price': 0.02, 'resource': 'networkEgress', 'category': 'network'},
+ '6B8F-E63D-832B': {'price': 0.0, 'resource': 'networkEgress', 'category': 'network'},
+ '92CB-C25F-B1D1': {'price': 0.0, 'resource': 'networkEgress', 'category': 'network'},
+ '984A-1F27-2D1F': {'price': 0.04, 'resource': 'networkEgress', 'category': 'network'},
+ '9DE9-9092-B3BC': {'price': 0.20, 'resource': 'networkEgress', 'category': 'network'},
+ 'C863-37DA-506E': {'price': 0.02, 'resource': 'networkEgress', 'category': 'network'},
+ 'C8EA-1A86-3D28': {'price': 0.02, 'resource': 'networkEgress', 'category': 'network'},
+ 'DE9E-AFBC-A15A': {'price': 0.01, 'resource': 'networkEgress', 'category': 'network'},
+ 'DFA5-B5C6-36D6': {'price': 0.085, 'resource': 'networkEgress', 'category': 'network'},
+ 'F274-1692-F213': {'price': 0.08, 'resource': 'networkEgress', 'category': 'network'},
+ 'FDBC-6E3B-D4D8': {'price': 0.15, 'resource': 'networkEgress', 'category': 'network'},
+ # Storage (price per GiB-month)
+ 'D973-5D65-BAB2': {'price': 0.04, 'resource': 'storage', 'category': 'storage'},
+}
+
+# Resource name to category mapping for SKUs discovered from BigQuery
+_RESOURCE_CATEGORIES = {
+ ('cpu', True): 'compute_spot',
+ ('cpu', False): 'compute_ondemand',
+ ('memory', True): 'compute_spot',
+ ('memory', False): 'compute_ondemand',
+}
+
+# Active SKU pricing — updated from BigQuery if available
+_SKU_PRICING = dict(_HARDCODED_SKU_PRICING)
+
+# In-memory caches
+_cache = {'data': [], 'ts': 0}
+_cache_lock = threading.Lock()
+_CACHE_TTL = 6 * 3600 # 6 hours
+
+_pricing_cache = {'ts': 0}
+_pricing_lock = threading.Lock()
+_PRICING_CACHE_TTL = 24 * 3600 # 24 hours
+
+
+def _refresh_sku_pricing():
+ """Try to fetch SKU pricing from BigQuery pricing export table."""
+ global _SKU_PRICING
+ now = time.time()
+ if _pricing_cache['ts'] and now - _pricing_cache['ts'] < _PRICING_CACHE_TTL:
+ return
+ if not _pricing_lock.acquire(blocking=False):
+ return
+ try:
+ if _pricing_cache['ts'] and time.time() - _pricing_cache['ts'] < _PRICING_CACHE_TTL:
+ return
+ from google.cloud import bigquery
+ client = bigquery.Client(project=_BQ_PROJECT)
+ table = f'{_BQ_PROJECT}.{_BQ_DATASET}.{_BQ_TABLE_PRICING}'
+
+ # Get the known SKU IDs we need pricing for
+ sku_ids = list(_HARDCODED_SKU_PRICING.keys())
+ placeholders = ', '.join(f"'{s}'" for s in sku_ids)
+
+ query = f"""
+ SELECT sku.id AS sku_id,
+ pricing.effective_price AS price,
+ sku.description AS description
+ FROM `{table}`
+ WHERE sku.id IN ({placeholders})
+ AND service.description = 'Compute Engine'
+ QUALIFY ROW_NUMBER() OVER (PARTITION BY sku.id ORDER BY export_time DESC) = 1
+ """
+ rows = list(client.query(query).result())
+ if rows:
+ updated = dict(_HARDCODED_SKU_PRICING)
+ for row in rows:
+ sid = row.sku_id
+ if sid in updated:
+ updated[sid] = {**updated[sid], 'price': float(row.price)}
+ _SKU_PRICING = updated
+ _pricing_cache['ts'] = time.time()
+ print(f"[rk_billing] Updated {len(rows)} SKU prices from BigQuery")
+ else:
+ _pricing_cache['ts'] = time.time()
+ print("[rk_billing] No pricing rows returned, using hardcoded rates")
+ except Exception as e:
+ # Table probably doesn't exist yet — use hardcoded rates
+ _pricing_cache['ts'] = time.time()
+ print(f"[rk_billing] SKU pricing query failed (using hardcoded): {e}")
+ finally:
+ _pricing_lock.release()
+
+
+# ---- BigQuery fetch ----
+
+def _usage_to_cost(sku_id, resource_name, amount):
+ info = _SKU_PRICING.get(sku_id)
+ if not info:
+ return 0.0, 'other'
+ price = info['price']
+ if resource_name == 'cpu':
+ # cpu-seconds -> hours
+ return (amount / 3600.0) * price, info['category']
+ elif resource_name == 'memory':
+ # byte-seconds -> GiB-hours
+ return (amount / 3600.0 / (1024 ** 3)) * price, info['category']
+ elif resource_name.startswith('networkEgress'):
+ # bytes -> GiB
+ return (amount / (1024 ** 3)) * price, info['category']
+ elif resource_name == 'storage':
+ # byte-seconds -> GiB-months (730 hours/month)
+ gib_months = amount / (1024 ** 3) / (730 * 3600)
+ return gib_months * price, info['category']
+ return 0.0, info['category']
+
+
+def _fetch_from_bigquery(date_from_str, date_to_str):
+ """Query BigQuery for usage data, return list of daily billing entries."""
+ try:
+ from google.cloud import bigquery
+ except ImportError:
+ print("[rk_billing] google-cloud-bigquery not installed")
+ return []
+
+ try:
+ client = bigquery.Client(project=_BQ_PROJECT)
+ # Use the usage table for all resources (actual consumption, not just requests).
+ # The consumption table only records resource *requests* which can be far lower
+ # than actual usage (e.g. prove-n-tps-real: $2.87 requests vs $138.72 actual).
+ usage = f'{_BQ_PROJECT}.{_BQ_DATASET}.{_BQ_TABLE_USAGE}'
+ query = f"""
+ SELECT DATE(start_time) AS date, namespace, sku_id, resource_name,
+ SUM(usage.amount) AS total_usage
+ FROM `{usage}`
+ WHERE DATE(start_time) BETWEEN @date_from AND @date_to
+ GROUP BY date, namespace, sku_id, resource_name
+ ORDER BY date, namespace
+ """
+ job_config = bigquery.QueryJobConfig(
+ query_parameters=[
+ bigquery.ScalarQueryParameter('date_from', 'DATE', date_from_str),
+ bigquery.ScalarQueryParameter('date_to', 'DATE', date_to_str),
+ ]
+ )
+ rows = list(client.query(query, job_config=job_config).result())
+ except Exception as e:
+ print(f"[rk_billing] BigQuery fetch failed: {e}")
+ return []
+
+ # Build daily structures
+ days = {}
+ for row in rows:
+ date_str = row.date.isoformat() if hasattr(row.date, 'isoformat') else str(row.date)
+ ns = row.namespace
+ cost, category = _usage_to_cost(row.sku_id, row.resource_name, float(row.total_usage))
+ if cost <= 0:
+ continue
+ if date_str not in days:
+ days[date_str] = {'date': date_str, 'namespaces': {}}
+ if ns not in days[date_str]['namespaces']:
+ days[date_str]['namespaces'][ns] = {'total': 0, 'breakdown': {}}
+ entry = days[date_str]['namespaces'][ns]
+ entry['breakdown'][category] = entry['breakdown'].get(category, 0) + cost
+ entry['total'] += cost
+
+ # Round values
+ for data in days.values():
+ for ns_data in data['namespaces'].values():
+ ns_data['total'] = round(ns_data['total'], 4)
+ ns_data['breakdown'] = {k: round(v, 4) for k, v in ns_data['breakdown'].items()}
+
+ return sorted(days.values(), key=lambda x: x['date'])
+
+
+def _ensure_cached():
+ now = time.time()
+ if _cache['data'] and now - _cache['ts'] < _CACHE_TTL:
+ return
+ if not _cache_lock.acquire(blocking=False):
+ return
+ try:
+ yesterday = datetime.now(timezone.utc).date() - timedelta(days=1)
+ date_from = (yesterday - timedelta(days=365)).isoformat()
+ date_to = yesterday.isoformat()
+ print(f"[rk_billing] Fetching billing data from BigQuery ({date_from} to {date_to})...")
+ data = _fetch_from_bigquery(date_from, date_to)
+ if data:
+ _cache['data'] = data
+ _cache['ts'] = now
+ print(f"[rk_billing] Cached {len(data)} days of billing data")
+ finally:
+ _cache_lock.release()
+
+
+# ---- Public API ----
+
+def get_billing_files_in_range(date_from, date_to):
+ """Return billing data for dates in range. Fetches from BigQuery with in-memory cache."""
+ # Refresh SKU pricing from BigQuery (async, falls back to hardcoded)
+ threading.Thread(target=_refresh_sku_pricing, daemon=True).start()
+
+ if not _cache['data']:
+ _ensure_cached() # block on first load so dashboard isn't empty
+ else:
+ threading.Thread(target=_ensure_cached, daemon=True).start()
+
+ # Convert datetime args to date strings for filtering
+ from_str = date_from.strftime('%Y-%m-%d') if hasattr(date_from, 'strftime') else str(date_from)
+ to_str = date_to.strftime('%Y-%m-%d') if hasattr(date_to, 'strftime') else str(date_to)
+
+ return [e for e in _cache['data'] if from_str <= e['date'] <= to_str]
+
+
+def _merge_ns_billing(target, ns_data):
+ target['total'] += ns_data.get('total', 0)
+ for cat, val in ns_data.get('breakdown', {}).items():
+ target['breakdown'][cat] = target['breakdown'].get(cat, 0) + val
+
+
+def aggregate_billing_weekly(daily_data):
+ if not daily_data:
+ return []
+ weeks = {}
+ for entry in daily_data:
+ d = datetime.strptime(entry['date'], '%Y-%m-%d')
+ week_start = d - timedelta(days=d.weekday())
+ week_key = week_start.strftime('%Y-%m-%d')
+ if week_key not in weeks:
+ weeks[week_key] = {'date': week_key, 'namespaces': {}}
+ for ns, ns_data in entry.get('namespaces', {}).items():
+ if ns not in weeks[week_key]['namespaces']:
+ weeks[week_key]['namespaces'][ns] = {'total': 0, 'breakdown': {}}
+ _merge_ns_billing(weeks[week_key]['namespaces'][ns], ns_data)
+ return sorted(weeks.values(), key=lambda x: x['date'])
+
+
+def aggregate_billing_monthly(daily_data):
+ if not daily_data:
+ return []
+ months = {}
+ for entry in daily_data:
+ month_key = entry['date'][:7] + '-01'
+ if month_key not in months:
+ months[month_key] = {'date': month_key, 'namespaces': {}}
+ for ns, ns_data in entry.get('namespaces', {}).items():
+ if ns not in months[month_key]['namespaces']:
+ months[month_key]['namespaces'][ns] = {'total': 0, 'breakdown': {}}
+ _merge_ns_billing(months[month_key]['namespaces'][ns], ns_data)
+ return sorted(months.values(), key=lambda x: x['date'])
+
+
+def serve_billing_dashboard():
+ billing_html_path = Path(__file__).parent / 'billing-dashboard.html'
+ if billing_html_path.exists():
+ with billing_html_path.open('r') as f:
+ return f.read()
+ return None
diff --git a/ci3/ci-metrics/ci-run-seed.json.gz b/ci3/ci-metrics/ci-run-seed.json.gz
new file mode 100644
index 000000000000..a971ad10d38b
Binary files /dev/null and b/ci3/ci-metrics/ci-run-seed.json.gz differ
diff --git a/ci3/ci-metrics/db.py b/ci3/ci-metrics/db.py
new file mode 100644
index 000000000000..93e970fe3a56
--- /dev/null
+++ b/ci3/ci-metrics/db.py
@@ -0,0 +1,107 @@
+"""SQLite database for CI metrics storage.
+
+Stores test events (from Redis pub/sub) and merge queue daily stats
+(backfilled from GitHub API).
+"""
+import os
+import sqlite3
+import threading
+
+_DB_PATH = os.path.join(os.getenv('LOGS_DISK_PATH', '/logs-disk'), 'metrics.db')
+_local = threading.local()
+
+SCHEMA = """
+PRAGMA journal_mode=WAL;
+
+CREATE TABLE IF NOT EXISTS test_events (
+ id INTEGER PRIMARY KEY AUTOINCREMENT,
+ status TEXT NOT NULL,
+ test_cmd TEXT NOT NULL,
+ log_url TEXT,
+ ref_name TEXT NOT NULL,
+ commit_hash TEXT,
+ commit_author TEXT,
+ commit_msg TEXT,
+ exit_code INTEGER,
+ duration_secs REAL,
+ is_scenario INTEGER DEFAULT 0,
+ owners TEXT,
+ flake_group_id TEXT,
+ dashboard TEXT NOT NULL DEFAULT '',
+ timestamp TEXT NOT NULL
+);
+CREATE INDEX IF NOT EXISTS idx_test_events_status ON test_events(status);
+CREATE INDEX IF NOT EXISTS idx_test_events_ts ON test_events(timestamp);
+CREATE INDEX IF NOT EXISTS idx_test_events_cmd ON test_events(test_cmd);
+CREATE INDEX IF NOT EXISTS idx_test_events_dashboard ON test_events(dashboard);
+
+CREATE TABLE IF NOT EXISTS merge_queue_daily (
+ date TEXT PRIMARY KEY,
+ total INTEGER NOT NULL DEFAULT 0,
+ success INTEGER NOT NULL DEFAULT 0,
+ failure INTEGER NOT NULL DEFAULT 0,
+ cancelled INTEGER NOT NULL DEFAULT 0,
+ in_progress INTEGER NOT NULL DEFAULT 0
+);
+
+CREATE TABLE IF NOT EXISTS ci_runs (
+ dashboard TEXT NOT NULL,
+ name TEXT NOT NULL DEFAULT '',
+ timestamp_ms INTEGER NOT NULL,
+ complete_ms INTEGER,
+ status TEXT,
+ author TEXT,
+ pr_number INTEGER,
+ instance_type TEXT,
+ instance_vcpus INTEGER,
+ spot INTEGER DEFAULT 0,
+ cost_usd REAL,
+ job_id TEXT DEFAULT '',
+ arch TEXT DEFAULT '',
+ synced_at TEXT NOT NULL,
+ PRIMARY KEY (dashboard, timestamp_ms, name)
+);
+CREATE INDEX IF NOT EXISTS idx_ci_runs_ts ON ci_runs(timestamp_ms);
+CREATE INDEX IF NOT EXISTS idx_ci_runs_name ON ci_runs(name);
+CREATE INDEX IF NOT EXISTS idx_ci_runs_dashboard ON ci_runs(dashboard);
+"""
+
+
+_MIGRATIONS = [
+ # Add columns introduced after initial schema
+ "ALTER TABLE ci_runs ADD COLUMN instance_vcpus INTEGER",
+ "ALTER TABLE ci_runs ADD COLUMN job_id TEXT DEFAULT ''",
+ "ALTER TABLE ci_runs ADD COLUMN arch TEXT DEFAULT ''",
+ "CREATE INDEX IF NOT EXISTS idx_ci_runs_dashboard ON ci_runs(dashboard)",
+]
+
+
+def get_db() -> sqlite3.Connection:
+ conn = getattr(_local, 'conn', None)
+ if conn is None:
+ os.makedirs(os.path.dirname(_DB_PATH), exist_ok=True)
+ conn = sqlite3.connect(_DB_PATH)
+ conn.execute('PRAGMA busy_timeout = 5000')
+ conn.row_factory = sqlite3.Row
+ conn.executescript(SCHEMA)
+ # Run migrations (ignore "duplicate column" errors for idempotency)
+ for sql in _MIGRATIONS:
+ try:
+ conn.execute(sql)
+ except sqlite3.OperationalError:
+ pass
+ conn.commit()
+ _local.conn = conn
+ return conn
+
+
+def query(sql: str, params=()) -> list[dict]:
+ conn = get_db()
+ rows = conn.execute(sql, params).fetchall()
+ return [dict(r) for r in rows]
+
+
+def execute(sql: str, params=()):
+ conn = get_db()
+ conn.execute(sql, params)
+ conn.commit()
diff --git a/ci3/ci-metrics/ec2_pricing.py b/ci3/ci-metrics/ec2_pricing.py
new file mode 100644
index 000000000000..ace55ea4f40a
--- /dev/null
+++ b/ci3/ci-metrics/ec2_pricing.py
@@ -0,0 +1,232 @@
+"""EC2 instance pricing: live on-demand + spot rates with TTL cache.
+
+Queries the AWS Pricing API (on-demand) and EC2 describe_spot_price_history
+(spot) for us-east-2 instance rates. Caches results for 24 hours and falls
+back to hardcoded values if the APIs are unavailable.
+
+Exports:
+ get_instance_rate(instance_type, is_spot) -> float
+ get_fallback_vcpu_rate(is_spot) -> float
+"""
+import json
+import threading
+import time
+from datetime import datetime, timezone
+
+# ---- Hardcoded fallback rates (us-east-2, USD/hr) ----
+
+_HARDCODED_RATES = {
+ ('m6a.48xlarge', True): 8.31, # spot
+ ('m6a.48xlarge', False): 16.56, # on-demand
+ ('m6a.32xlarge', True): 5.54,
+ ('m6a.32xlarge', False): 11.04,
+ ('m6a.16xlarge', True): 2.77,
+ ('m6a.16xlarge', False): 5.52,
+ ('m7a.48xlarge', True): 8.31,
+ ('m7a.48xlarge', False): 16.56,
+ ('m7a.16xlarge', True): 2.77,
+ ('m7a.16xlarge', False): 5.52,
+ ('m7i.48xlarge', True): 8.31,
+ ('m7i.48xlarge', False): 16.56,
+ ('r7g.16xlarge', True): 1.97,
+ ('r7g.16xlarge', False): 3.94,
+}
+_FALLBACK_VCPU_HOUR = {True: 0.0433, False: 0.0864}
+
+# ---- Cache state ----
+
+_REGION = 'us-east-2'
+_LOCATION = 'US East (Ohio)' # Pricing API uses location names, not codes
+_CACHE_TTL = 24 * 3600 # 24 hours
+
+_cache = {
+ 'ondemand': {}, # instance_type -> USD/hr
+ 'spot': {}, # instance_type -> USD/hr
+ 'ts': 0, # last successful fetch time
+}
+_cache_lock = threading.Lock()
+
+
+# ---- On-demand pricing (AWS Pricing API) ----
+
+def _fetch_ondemand_rate(pricing_client, instance_type: str) -> float | None:
+ """Fetch on-demand hourly rate for a single instance type from AWS Pricing API.
+
+ The Pricing API is only available in us-east-1 and ap-south-1.
+ """
+ try:
+ response = pricing_client.get_products(
+ ServiceCode='AmazonEC2',
+ Filters=[
+ {'Type': 'TERM_MATCH', 'Field': 'instanceType', 'Value': instance_type},
+ {'Type': 'TERM_MATCH', 'Field': 'location', 'Value': _LOCATION},
+ {'Type': 'TERM_MATCH', 'Field': 'operatingSystem', 'Value': 'Linux'},
+ {'Type': 'TERM_MATCH', 'Field': 'preInstalledSw', 'Value': 'NA'},
+ {'Type': 'TERM_MATCH', 'Field': 'tenancy', 'Value': 'Shared'},
+ {'Type': 'TERM_MATCH', 'Field': 'capacitystatus', 'Value': 'Used'},
+ ],
+ MaxResults=10,
+ )
+ for price_item in response.get('PriceList', []):
+ product = json.loads(price_item) if isinstance(price_item, str) else price_item
+ on_demand = product.get('terms', {}).get('OnDemand', {})
+ for term in on_demand.values():
+ for dim in term.get('priceDimensions', {}).values():
+ price = dim.get('pricePerUnit', {}).get('USD')
+ if price and float(price) > 0:
+ return float(price)
+ except Exception as e:
+ print(f"[ec2_pricing] on-demand fetch error for {instance_type}: {e}")
+ return None
+
+
+def _fetch_all_ondemand(instance_types: list[str]) -> dict[str, float]:
+ """Fetch on-demand rates for all instance types. Returns {type: rate}."""
+ try:
+ import boto3
+ except ImportError:
+ print("[ec2_pricing] boto3 not installed, skipping on-demand fetch")
+ return {}
+
+ results = {}
+ try:
+ # Pricing API is only in us-east-1 and ap-south-1
+ pricing = boto3.client('pricing', region_name='us-east-1')
+ for itype in instance_types:
+ rate = _fetch_ondemand_rate(pricing, itype)
+ if rate is not None:
+ results[itype] = rate
+ except Exception as e:
+ print(f"[ec2_pricing] on-demand client error: {e}")
+ return results
+
+
+# ---- Spot pricing (EC2 describe_spot_price_history) ----
+
+def _fetch_all_spot(instance_types: list[str]) -> dict[str, float]:
+ """Fetch current spot prices for all instance types. Returns {type: rate}.
+
+ Uses describe_spot_price_history with StartTime=now to get the most recent
+ price. Takes the minimum across availability zones.
+ """
+ try:
+ import boto3
+ except ImportError:
+ print("[ec2_pricing] boto3 not installed, skipping spot fetch")
+ return {}
+
+ results = {}
+ try:
+ ec2 = boto3.client('ec2', region_name=_REGION)
+ for itype in instance_types:
+ try:
+ response = ec2.describe_spot_price_history(
+ InstanceTypes=[itype],
+ ProductDescriptions=['Linux/UNIX'],
+ StartTime=datetime.now(timezone.utc),
+ MaxResults=10,
+ )
+ prices = []
+ for entry in response.get('SpotPriceHistory', []):
+ try:
+ prices.append(float(entry['SpotPrice']))
+ except (KeyError, ValueError):
+ continue
+ if prices:
+ # Use the minimum AZ price (what our fleet would target)
+ results[itype] = min(prices)
+ except Exception as e:
+ print(f"[ec2_pricing] spot fetch error for {itype}: {e}")
+ except Exception as e:
+ print(f"[ec2_pricing] spot client error: {e}")
+ return results
+
+
+# ---- Cache refresh ----
+
+def _get_known_instance_types() -> list[str]:
+ """Return the set of instance types we need pricing for."""
+ return sorted({itype for itype, _ in _HARDCODED_RATES})
+
+
+def _refresh_cache():
+ """Fetch fresh pricing data and update the cache. Thread-safe."""
+ now = time.time()
+ if _cache['ts'] and now - _cache['ts'] < _CACHE_TTL:
+ return
+ if not _cache_lock.acquire(blocking=False):
+ return # another thread is already refreshing
+ try:
+ # Double-check after acquiring lock
+ if _cache['ts'] and time.time() - _cache['ts'] < _CACHE_TTL:
+ return
+
+ instance_types = _get_known_instance_types()
+ ondemand = _fetch_all_ondemand(instance_types)
+ spot = _fetch_all_spot(instance_types)
+
+ # Only update cache if we got at least some data
+ if ondemand or spot:
+ if ondemand:
+ _cache['ondemand'] = ondemand
+ if spot:
+ _cache['spot'] = spot
+ _cache['ts'] = time.time()
+ print(f"[ec2_pricing] Cache refreshed: {len(ondemand)} on-demand, {len(spot)} spot rates")
+ else:
+ print("[ec2_pricing] No pricing data returned, keeping existing cache/fallbacks")
+ except Exception as e:
+ print(f"[ec2_pricing] Cache refresh error: {e}")
+ finally:
+ _cache_lock.release()
+
+
+def _ensure_cached():
+ """Ensure cache is populated. Blocks on first call, async refresh after."""
+ if not _cache['ts']:
+ _refresh_cache() # block on first load
+ else:
+ threading.Thread(target=_refresh_cache, daemon=True).start()
+
+
+# ---- Public API ----
+
+def get_instance_rate(instance_type: str, is_spot: bool) -> float:
+ """Get the hourly rate for an EC2 instance type.
+
+ Tries live pricing cache first, falls back to hardcoded rates.
+
+ Args:
+ instance_type: EC2 instance type (e.g. 'm6a.48xlarge')
+ is_spot: True for spot pricing, False for on-demand
+
+ Returns:
+ Hourly rate in USD.
+ """
+ _ensure_cached()
+
+ # Try live cache
+ cache_key = 'spot' if is_spot else 'ondemand'
+ rate = _cache[cache_key].get(instance_type)
+ if rate is not None:
+ return rate
+
+ # Fall back to hardcoded
+ rate = _HARDCODED_RATES.get((instance_type, is_spot))
+ if rate is not None:
+ return rate
+
+ # Unknown instance type -- return 0 (caller should use vCPU fallback)
+ return 0.0
+
+
+def get_fallback_vcpu_rate(is_spot: bool) -> float:
+ """Get the per-vCPU hourly rate for unknown instance types.
+
+ Args:
+ is_spot: True for spot, False for on-demand
+
+ Returns:
+ Per-vCPU hourly rate in USD.
+ """
+ return _FALLBACK_VCPU_HOUR[is_spot]
diff --git a/ci3/ci-metrics/github_data.py b/ci3/ci-metrics/github_data.py
new file mode 100644
index 000000000000..8824d187cb81
--- /dev/null
+++ b/ci3/ci-metrics/github_data.py
@@ -0,0 +1,666 @@
+"""GitHub API polling with in-memory cache.
+
+Fetches PR lifecycle, deployment runs, branch lag, and merge queue stats via `gh` CLI.
+Most data cached in memory with TTL. Merge queue stats persisted to SQLite daily.
+"""
+import json
+import subprocess
+import threading
+import time
+from datetime import datetime, timedelta, timezone
+
+REPO = 'AztecProtocol/aztec-packages'
+
+BRANCH_PAIRS = [
+ ('next', 'staging-public'),
+ ('next', 'testnet'),
+ ('staging-public', 'testnet'),
+]
+
+DEPLOY_WORKFLOWS = [
+ 'deploy-staging-networks.yml',
+ 'deploy-network.yml',
+ 'deploy-next-net.yml',
+]
+
+_CACHE_TTL = 3600 # 1 hour
+_pr_cache = {'data': [], 'ts': 0}
+_deploy_cache = {'data': [], 'ts': 0}
+_lag_cache = {'data': [], 'ts': 0}
+_pr_author_cache = {} # {pr_number: {'author': str, 'title': str, 'branch': str}}
+_pr_lock = threading.Lock()
+_deploy_lock = threading.Lock()
+_lag_lock = threading.Lock()
+
+
+def _gh(args: list[str]) -> str | None:
+ try:
+ result = subprocess.run(
+ ['gh'] + args,
+ capture_output=True, text=True, timeout=30
+ )
+ if result.returncode == 0:
+ return result.stdout.strip()
+ except (FileNotFoundError, subprocess.TimeoutExpired) as e:
+ print(f"[rk_github] gh error: {e}")
+ return None
+
+
+# ---- PR lifecycle ----
+
+def _fetch_and_process_prs() -> list[dict]:
+ out = _gh([
+ 'pr', 'list', '--repo', REPO, '--state', 'merged',
+ '--limit', '500',
+ '--json', 'number,author,title,createdAt,mergedAt,closedAt,baseRefName,'
+ 'headRefName,additions,deletions,changedFiles,isDraft,reviewDecision,labels'
+ ])
+ if not out:
+ return []
+ try:
+ prs = json.loads(out)
+ except json.JSONDecodeError:
+ return []
+
+ for pr in prs:
+ author = pr.get('author', {})
+ if isinstance(author, dict):
+ pr['author'] = author.get('login', 'unknown')
+ # Extract label names from label objects
+ labels = pr.get('labels', [])
+ if labels and isinstance(labels[0], dict):
+ pr['labels'] = [l.get('name', '') for l in labels]
+ created = pr.get('createdAt', '')
+ merged = pr.get('mergedAt')
+ if created and merged:
+ try:
+ c = datetime.fromisoformat(created.replace('Z', '+00:00'))
+ m = datetime.fromisoformat(merged.replace('Z', '+00:00'))
+ pr['merge_time_hrs'] = round((m - c).total_seconds() / 3600, 2)
+ except (ValueError, TypeError):
+ pr['merge_time_hrs'] = None
+ else:
+ pr['merge_time_hrs'] = None
+ pr['merged_date'] = merged[:10] if merged else None
+ pr['size'] = (pr.get('additions', 0) or 0) + (pr.get('deletions', 0) or 0)
+ return prs
+
+
+def _ensure_prs():
+ now = time.time()
+ if _pr_cache['data'] and now - _pr_cache['ts'] < _CACHE_TTL:
+ return
+ if not _pr_lock.acquire(blocking=False):
+ return
+ try:
+ prs = _fetch_and_process_prs()
+ if prs:
+ _pr_cache['data'] = prs
+ _pr_cache['ts'] = now
+ finally:
+ _pr_lock.release()
+
+
+# ---- Deployments ----
+
+def _fetch_all_deploys() -> list[dict]:
+ all_runs = []
+ for workflow in DEPLOY_WORKFLOWS:
+ out = _gh([
+ 'run', 'list', '--repo', REPO,
+ '--workflow', workflow, '--limit', '50',
+ '--json', 'databaseId,status,conclusion,createdAt,updatedAt,headBranch,name'
+ ])
+ if not out:
+ continue
+ try:
+ runs = json.loads(out)
+ except json.JSONDecodeError:
+ continue
+ for run in runs:
+ started = run.get('createdAt', '')
+ completed = run.get('updatedAt')
+ duration = None
+ if started and completed:
+ try:
+ s = datetime.fromisoformat(started.replace('Z', '+00:00'))
+ c = datetime.fromisoformat(completed.replace('Z', '+00:00'))
+ duration = round((c - s).total_seconds(), 1)
+ except (ValueError, TypeError):
+ pass
+ all_runs.append({
+ 'run_id': str(run.get('databaseId', '')),
+ 'workflow_name': workflow.replace('.yml', ''),
+ 'ref_name': run.get('headBranch', ''),
+ 'status': run.get('conclusion', run.get('status', 'unknown')),
+ 'started_at': started,
+ 'completed_at': completed,
+ 'duration_secs': duration,
+ 'started_date': started[:10] if started else None,
+ })
+ return all_runs
+
+
+def _ensure_deploys():
+ now = time.time()
+ if _deploy_cache['data'] and now - _deploy_cache['ts'] < _CACHE_TTL:
+ return
+ if not _deploy_lock.acquire(blocking=False):
+ return
+ try:
+ deploys = _fetch_all_deploys()
+ if deploys:
+ _deploy_cache['data'] = deploys
+ _deploy_cache['ts'] = now
+ finally:
+ _deploy_lock.release()
+
+
+# ---- Branch lag ----
+
+def _fetch_branch_lag() -> list[dict]:
+ results = []
+ today = datetime.now(timezone.utc).date().isoformat()
+ for source, target in BRANCH_PAIRS:
+ out = _gh([
+ 'api', f'repos/{REPO}/compare/{target}...{source}',
+ '--jq', '.ahead_by'
+ ])
+ if not out:
+ continue
+ try:
+ commits_behind = int(out)
+ except (ValueError, TypeError):
+ continue
+
+ days_behind = None
+ out2 = _gh([
+ 'api', f'repos/{REPO}/compare/{target}...{source}',
+ '--jq', '.commits[0].commit.committer.date'
+ ])
+ if out2:
+ try:
+ oldest = datetime.fromisoformat(out2.replace('Z', '+00:00'))
+ days_behind = round((datetime.now(timezone.utc) - oldest).total_seconds() / 86400, 1)
+ except (ValueError, TypeError):
+ pass
+
+ results.append({
+ 'date': today,
+ 'source': source,
+ 'target': target,
+ 'commits_behind': commits_behind,
+ 'days_behind': days_behind,
+ })
+ return results
+
+
+def _ensure_lag():
+ now = time.time()
+ if _lag_cache['data'] and now - _lag_cache['ts'] < _CACHE_TTL:
+ return
+ if not _lag_lock.acquire(blocking=False):
+ return
+ try:
+ lag = _fetch_branch_lag()
+ if lag:
+ _lag_cache['data'] = lag
+ _lag_cache['ts'] = now
+ finally:
+ _lag_lock.release()
+
+
+# ---- Query functions for API endpoints ----
+
+def get_deployment_speed(date_from: str, date_to: str, workflow: str = '') -> dict:
+ if not _deploy_cache['data']:
+ _ensure_deploys()
+ else:
+ threading.Thread(target=_ensure_deploys, daemon=True).start()
+ deploys = [d for d in _deploy_cache['data']
+ if d.get('started_date') and date_from <= d['started_date'] <= date_to]
+ if workflow:
+ deploys = [d for d in deploys if d['workflow_name'] == workflow]
+
+ # Group by date
+ by_date_map = {}
+ for d in deploys:
+ date = d['started_date']
+ if date not in by_date_map:
+ by_date_map[date] = {'durations': [], 'success': 0, 'failure': 0, 'count': 0}
+ by_date_map[date]['count'] += 1
+ if d['duration_secs'] is not None:
+ by_date_map[date]['durations'].append(d['duration_secs'] / 60.0)
+ if d['status'] == 'success':
+ by_date_map[date]['success'] += 1
+ elif d['status'] == 'failure':
+ by_date_map[date]['failure'] += 1
+
+ by_date = []
+ for date in sorted(by_date_map):
+ b = by_date_map[date]
+ durs = sorted(b['durations'])
+ by_date.append({
+ 'date': date,
+ 'median_mins': round(durs[len(durs)//2], 1) if durs else None,
+ 'p95_mins': round(durs[int(len(durs)*0.95)], 1) if durs else None,
+ 'count': b['count'],
+ 'success': b['success'],
+ 'failure': b['failure'],
+ })
+
+ all_durs = sorted([d['duration_secs']/60.0 for d in deploys if d['duration_secs'] is not None])
+ total = len(deploys)
+ success = sum(1 for d in deploys if d['status'] == 'success')
+
+ recent = [{'run_id': d['run_id'], 'workflow_name': d['workflow_name'],
+ 'status': d['status'], 'duration_mins': round(d['duration_secs']/60.0, 1) if d['duration_secs'] else None,
+ 'started_at': d['started_at'], 'ref_name': d['ref_name']}
+ for d in sorted(deploys, key=lambda x: x['started_at'], reverse=True)[:50]]
+
+ return {
+ 'by_date': by_date,
+ 'summary': {
+ 'median_mins': round(all_durs[len(all_durs)//2], 1) if all_durs else None,
+ 'p95_mins': round(all_durs[int(len(all_durs)*0.95)], 1) if all_durs else None,
+ 'success_rate': round(100.0 * success / max(total, 1), 1),
+ 'total': total,
+ },
+ 'recent': recent,
+ }
+
+
+def get_branch_lag(date_from: str, date_to: str) -> dict:
+ if not _lag_cache['data']:
+ _ensure_lag()
+ else:
+ threading.Thread(target=_ensure_lag, daemon=True).start()
+ pairs = []
+ for source, target in BRANCH_PAIRS:
+ matching = [l for l in _lag_cache['data']
+ if l['source'] == source and l['target'] == target]
+ current = matching[-1] if matching else {'commits_behind': 0, 'days_behind': 0}
+ pairs.append({
+ 'source': source,
+ 'target': target,
+ 'current': {'commits_behind': current.get('commits_behind', 0),
+ 'days_behind': current.get('days_behind', 0)},
+ 'history': [{'date': l['date'], 'commits_behind': l['commits_behind'],
+ 'days_behind': l['days_behind']} for l in matching],
+ })
+ return {'pairs': pairs}
+
+
+def get_pr_author(pr_number) -> dict | None:
+ """Look up PR author/title by number. Results are cached permanently (PR data doesn't change)."""
+ pr_number = int(pr_number) if pr_number else None
+ if not pr_number:
+ return None
+ if pr_number in _pr_author_cache:
+ return _pr_author_cache[pr_number]
+
+ # Check merged PR cache first (already fetched)
+ for pr in _pr_cache.get('data', []):
+ if pr.get('number') == pr_number:
+ info = {'author': pr.get('author', 'unknown'), 'title': pr.get('title', ''),
+ 'branch': pr.get('headRefName', ''),
+ 'additions': pr.get('additions', 0), 'deletions': pr.get('deletions', 0)}
+ _pr_author_cache[pr_number] = info
+ return info
+
+ # Fetch from GitHub API
+ out = _gh(['pr', 'view', str(pr_number), '--repo', REPO,
+ '--json', 'author,title,headRefName,additions,deletions'])
+ if out:
+ try:
+ data = json.loads(out)
+ author = data.get('author', {})
+ if isinstance(author, dict):
+ author = author.get('login', 'unknown')
+ info = {'author': author, 'title': data.get('title', ''),
+ 'branch': data.get('headRefName', ''),
+ 'additions': data.get('additions', 0), 'deletions': data.get('deletions', 0)}
+ _pr_author_cache[pr_number] = info
+ return info
+ except (json.JSONDecodeError, KeyError):
+ pass
+ return None
+
+
+def batch_get_pr_authors(pr_numbers: set) -> dict:
+ """Fetch authors for multiple PR numbers, using cache. Returns {pr_number: info}."""
+ result = {}
+ to_fetch = []
+ for prn in pr_numbers:
+ if not prn:
+ continue
+ prn = int(prn)
+ if prn in _pr_author_cache:
+ result[prn] = _pr_author_cache[prn]
+ else:
+ to_fetch.append(prn)
+
+ # Check merged PR cache first
+ for pr in _pr_cache.get('data', []):
+ num = pr.get('number')
+ if num in to_fetch:
+ info = {'author': pr.get('author', 'unknown'), 'title': pr.get('title', ''),
+ 'branch': pr.get('headRefName', ''),
+ 'additions': pr.get('additions', 0), 'deletions': pr.get('deletions', 0)}
+ _pr_author_cache[num] = info
+ result[num] = info
+ to_fetch.remove(num)
+
+ # Fetch remaining individually (with a cap to avoid API abuse)
+ for prn in to_fetch[:50]:
+ info = get_pr_author(prn)
+ if info:
+ result[prn] = info
+
+ return result
+
+
+def get_branch_pr_map() -> dict:
+ """Return {branch_name: pr_number} from the PR cache. Call _ensure_prs first."""
+ if not _pr_cache['data']:
+ _ensure_prs()
+ else:
+ threading.Thread(target=_ensure_prs, daemon=True).start()
+ return {pr['headRefName']: pr['number']
+ for pr in _pr_cache.get('data', [])
+ if pr.get('headRefName')}
+
+
+def get_pr_metrics(date_from: str, date_to: str, author: str = '',
+ ci_runs: list = None) -> dict:
+ """Get PR metrics. ci_runs should be passed from the caller (read from Redis)."""
+ if not _pr_cache['data']:
+ _ensure_prs()
+ else:
+ threading.Thread(target=_ensure_prs, daemon=True).start()
+
+ prs = [p for p in _pr_cache['data']
+ if p.get('merged_date') and date_from <= p['merged_date'] <= date_to]
+ if author:
+ prs = [p for p in prs if p.get('author') == author]
+
+ # Compute per-PR CI cost and duration from ci_runs
+ pr_costs = {}
+ pr_run_counts = {}
+ pr_ci_time = {} # total CI compute hours per PR
+ if ci_runs:
+ for run in ci_runs:
+ prn = run.get('pr_number')
+ if not prn:
+ continue
+ if run.get('cost_usd') is not None:
+ pr_costs[prn] = pr_costs.get(prn, 0) + run['cost_usd']
+ pr_run_counts[prn] = pr_run_counts.get(prn, 0) + 1
+ c = run.get('complete')
+ t = run.get('timestamp')
+ if c and t:
+ pr_ci_time[prn] = pr_ci_time.get(prn, 0) + (c - t) / 3_600_000
+
+ for pr in prs:
+ prn = pr.get('number')
+ pr['ci_cost_usd'] = round(pr_costs.get(prn, 0), 2)
+ pr['ci_runs_count'] = pr_run_counts.get(prn, 0)
+ pr['ci_time_hrs'] = round(pr_ci_time.get(prn, 0), 2)
+
+ # Group by date
+ by_date_map = {}
+ for pr in prs:
+ date = pr['merged_date']
+ if date not in by_date_map:
+ by_date_map[date] = {'costs': [], 'merge_times': [], 'ci_times': [],
+ 'run_counts': [], 'count': 0}
+ by_date_map[date]['count'] += 1
+ by_date_map[date]['costs'].append(pr['ci_cost_usd'])
+ by_date_map[date]['ci_times'].append(pr.get('ci_time_hrs', 0))
+ by_date_map[date]['run_counts'].append(pr.get('ci_runs_count', 0))
+ if pr.get('merge_time_hrs') is not None:
+ by_date_map[date]['merge_times'].append(pr['merge_time_hrs'])
+
+ def _median(vals):
+ s = sorted(vals)
+ n = len(s)
+ if n == 0:
+ return None
+ if n % 2 == 1:
+ return s[n // 2]
+ return (s[n // 2 - 1] + s[n // 2]) / 2
+
+ by_date = []
+ for d, v in sorted(by_date_map.items()):
+ by_date.append({
+ 'date': d,
+ 'pr_count': v['count'],
+ 'avg_cost': round(sum(v['costs']) / max(len(v['costs']), 1), 2),
+ 'median_merge_time_hrs': round(_median(v['merge_times']), 1) if v['merge_times'] else None,
+ 'avg_ci_time_hrs': round(sum(v['ci_times']) / max(len(v['ci_times']), 1), 2),
+ 'avg_runs': round(sum(v['run_counts']) / max(len(v['run_counts']), 1), 1),
+ })
+
+ # By author (all PRs in range, not filtered by author)
+ all_prs_in_range = [p for p in _pr_cache['data']
+ if p.get('merged_date') and date_from <= p['merged_date'] <= date_to]
+
+ author_map = {}
+ for pr in all_prs_in_range:
+ prn = pr.get('number')
+ a = pr.get('author', 'unknown')
+ if a not in author_map:
+ author_map[a] = {'total_cost': 0, 'pr_count': 0, 'merge_times': [],
+ 'total_ci_time': 0, 'total_runs': 0}
+ author_map[a]['total_cost'] += round(pr_costs.get(prn, 0), 2)
+ author_map[a]['pr_count'] += 1
+ author_map[a]['total_ci_time'] += round(pr_ci_time.get(prn, 0), 2)
+ author_map[a]['total_runs'] += pr_run_counts.get(prn, 0)
+ if pr.get('merge_time_hrs') is not None:
+ author_map[a]['merge_times'].append(pr['merge_time_hrs'])
+
+ by_author = []
+ for a, v in sorted(author_map.items(), key=lambda x: -x[1]['total_cost'])[:20]:
+ by_author.append({
+ 'author': a,
+ 'total_cost': round(v['total_cost'], 2),
+ 'pr_count': v['pr_count'],
+ 'avg_merge_time_hrs': round(_median(v['merge_times']), 1) if v['merge_times'] else None,
+ 'avg_ci_time_hrs': round(v['total_ci_time'] / max(v['pr_count'], 1), 2),
+ 'avg_runs_per_pr': round(v['total_runs'] / max(v['pr_count'], 1), 1),
+ })
+
+ all_costs = [p.get('ci_cost_usd', 0) for p in prs]
+ all_merge = [p['merge_time_hrs'] for p in prs if p.get('merge_time_hrs') is not None]
+ all_run_counts = [p.get('ci_runs_count', 0) for p in prs]
+ all_ci_times = [p.get('ci_time_hrs', 0) for p in prs]
+
+ return {
+ 'by_date': by_date,
+ 'by_author': by_author,
+ 'summary': {
+ 'avg_cost_per_pr': round(sum(all_costs)/max(len(all_costs),1), 2) if all_costs else 0,
+ 'median_merge_time_hrs': round(_median(all_merge), 1) if all_merge else None,
+ 'total_prs': len(prs),
+ 'total_cost': round(sum(all_costs), 2),
+ 'avg_ci_runs_per_pr': round(sum(all_run_counts)/max(len(all_run_counts),1), 1) if all_run_counts else 0,
+ 'avg_ci_time_hrs': round(sum(all_ci_times)/max(len(all_ci_times),1), 2) if all_ci_times else 0,
+ },
+ }
+
+
+# ---- Merge queue failure rate ----
+
+CI3_WORKFLOW = 'ci3.yml'
+
+def _fetch_merge_queue_runs(date_str: str) -> dict:
+ """Fetch merge_group workflow runs for a single date. Returns daily summary."""
+ out = _gh([
+ 'api', '--paginate',
+ f'repos/{REPO}/actions/workflows/{CI3_WORKFLOW}/runs'
+ f'?event=merge_group&created={date_str}&per_page=100',
+ '--jq', '.workflow_runs[] | [.conclusion, .status] | @tsv',
+ ])
+ summary = {'date': date_str, 'total': 0, 'success': 0, 'failure': 0,
+ 'cancelled': 0, 'in_progress': 0}
+ if not out:
+ return summary
+ for line in out.strip().split('\n'):
+ if not line.strip():
+ continue
+ parts = line.split('\t')
+ conclusion = parts[0] if parts[0] else ''
+ status = parts[1] if len(parts) > 1 else ''
+ summary['total'] += 1
+ if conclusion == 'success':
+ summary['success'] += 1
+ elif conclusion == 'failure':
+ summary['failure'] += 1
+ elif conclusion == 'cancelled':
+ summary['cancelled'] += 1
+ elif status in ('in_progress', 'queued', 'waiting'):
+ summary['in_progress'] += 1
+ else:
+ summary['failure'] += 1 # treat unknown conclusions as failures
+ return summary
+
+
+def _load_backfill_json():
+ """Load seed data from merge-queue-backfill.json if SQLite is empty."""
+ import db
+ from pathlib import Path
+ conn = db.get_db()
+
+ count = conn.execute('SELECT COUNT(*) as c FROM merge_queue_daily').fetchone()['c']
+ if count > 0:
+ return
+
+ seed = Path(__file__).parent / 'merge-queue-backfill.json'
+ if not seed.exists():
+ return
+
+ import json
+ with seed.open() as f:
+ data = json.load(f)
+
+ print(f"[rk_github] Loading {len(data)} days from merge-queue-backfill.json...")
+ for ds, summary in data.items():
+ conn.execute(
+ 'INSERT OR REPLACE INTO merge_queue_daily (date, total, success, failure, cancelled, in_progress) '
+ 'VALUES (?, ?, ?, ?, ?, ?)',
+ (ds, summary['total'], summary['success'], summary['failure'],
+ summary['cancelled'], summary['in_progress']))
+ conn.commit()
+
+
+def _backfill_merge_queue():
+ """Backfill missing merge queue daily stats into SQLite."""
+ import db
+ conn = db.get_db()
+
+ # Load seed data on first run
+ _load_backfill_json()
+
+ # Find which dates we already have
+ existing = {row['date'] for row in
+ conn.execute('SELECT date FROM merge_queue_daily').fetchall()}
+
+ yesterday = (datetime.now(timezone.utc) - timedelta(days=1)).date()
+ # Backfill up to 365 days
+ start = yesterday - timedelta(days=365)
+ current = start
+
+ missing = []
+ while current <= yesterday:
+ ds = current.isoformat()
+ if ds not in existing:
+ missing.append(ds)
+ current += timedelta(days=1)
+
+ if not missing:
+ return
+
+ print(f"[rk_github] Backfilling {len(missing)} days of merge queue stats...")
+ for ds in missing:
+ summary = _fetch_merge_queue_runs(ds)
+ if summary['total'] == 0:
+ conn.execute(
+ 'INSERT OR REPLACE INTO merge_queue_daily (date, total, success, failure, cancelled, in_progress) '
+ 'VALUES (?, 0, 0, 0, 0, 0)', (ds,))
+ else:
+ conn.execute(
+ 'INSERT OR REPLACE INTO merge_queue_daily (date, total, success, failure, cancelled, in_progress) '
+ 'VALUES (?, ?, ?, ?, ?, ?)',
+ (ds, summary['total'], summary['success'], summary['failure'],
+ summary['cancelled'], summary['in_progress']))
+ conn.commit()
+
+
+def refresh_merge_queue_today():
+ """Refresh today's (and yesterday's) merge queue stats. Called periodically."""
+ import db
+ conn = db.get_db()
+ today = datetime.now(timezone.utc).date().isoformat()
+ yesterday = (datetime.now(timezone.utc) - timedelta(days=1)).date().isoformat()
+
+ for ds in [yesterday, today]:
+ summary = _fetch_merge_queue_runs(ds)
+ conn.execute(
+ 'INSERT OR REPLACE INTO merge_queue_daily (date, total, success, failure, cancelled, in_progress) '
+ 'VALUES (?, ?, ?, ?, ?, ?)',
+ (ds, summary['total'], summary['success'], summary['failure'],
+ summary['cancelled'], summary['in_progress']))
+ conn.commit()
+
+
+_mq_backfill_lock = threading.Lock()
+_mq_last_refresh = 0
+_MQ_REFRESH_TTL = 3600 # refresh today's data every hour
+
+
+def ensure_merge_queue_data():
+ """Ensure merge queue data is backfilled and today is fresh."""
+ global _mq_last_refresh
+ now = time.time()
+ if now - _mq_last_refresh < _MQ_REFRESH_TTL:
+ return
+ if not _mq_backfill_lock.acquire(blocking=False):
+ return
+ try:
+ _backfill_merge_queue()
+ refresh_merge_queue_today()
+ _mq_last_refresh = now
+ finally:
+ _mq_backfill_lock.release()
+
+
+def get_merge_queue_stats(date_from: str, date_to: str) -> dict:
+ """Get merge queue failure rate by day. Triggers backfill if needed."""
+ # Ensure data is populated (async after first load)
+ import db
+ conn = db.get_db()
+ count = conn.execute('SELECT COUNT(*) as c FROM merge_queue_daily').fetchone()['c']
+ if count == 0:
+ ensure_merge_queue_data() # block on first load
+ else:
+ threading.Thread(target=ensure_merge_queue_data, daemon=True).start()
+
+ rows = db.query(
+ 'SELECT date, total, success, failure, cancelled, in_progress '
+ 'FROM merge_queue_daily WHERE date >= ? AND date <= ? ORDER BY date',
+ (date_from, date_to))
+
+ total_runs = sum(r['total'] for r in rows)
+ total_fail = sum(r['failure'] for r in rows)
+ total_success = sum(r['success'] for r in rows)
+
+ return {
+ 'by_date': rows,
+ 'summary': {
+ 'total_runs': total_runs,
+ 'total_success': total_success,
+ 'total_failure': total_fail,
+ 'failure_rate': round(total_fail / max(total_runs, 1) * 100, 1),
+ 'days': len([r for r in rows if r['total'] > 0]),
+ },
+ }
diff --git a/ci3/ci-metrics/merge-queue-backfill.json b/ci3/ci-metrics/merge-queue-backfill.json
new file mode 100644
index 000000000000..079077590581
--- /dev/null
+++ b/ci3/ci-metrics/merge-queue-backfill.json
@@ -0,0 +1,2564 @@
+{
+ "2025-02-10": {
+ "total": 0,
+ "success": 0,
+ "failure": 0,
+ "cancelled": 0,
+ "in_progress": 0
+ },
+ "2025-02-11": {
+ "total": 0,
+ "success": 0,
+ "failure": 0,
+ "cancelled": 0,
+ "in_progress": 0
+ },
+ "2025-02-12": {
+ "total": 0,
+ "success": 0,
+ "failure": 0,
+ "cancelled": 0,
+ "in_progress": 0
+ },
+ "2025-02-13": {
+ "total": 0,
+ "success": 0,
+ "failure": 0,
+ "cancelled": 0,
+ "in_progress": 0
+ },
+ "2025-02-14": {
+ "total": 0,
+ "success": 0,
+ "failure": 0,
+ "cancelled": 0,
+ "in_progress": 0
+ },
+ "2025-02-15": {
+ "total": 0,
+ "success": 0,
+ "failure": 0,
+ "cancelled": 0,
+ "in_progress": 0
+ },
+ "2025-02-16": {
+ "total": 0,
+ "success": 0,
+ "failure": 0,
+ "cancelled": 0,
+ "in_progress": 0
+ },
+ "2025-02-17": {
+ "total": 0,
+ "success": 0,
+ "failure": 0,
+ "cancelled": 0,
+ "in_progress": 0
+ },
+ "2025-02-18": {
+ "total": 0,
+ "success": 0,
+ "failure": 0,
+ "cancelled": 0,
+ "in_progress": 0
+ },
+ "2025-02-19": {
+ "total": 0,
+ "success": 0,
+ "failure": 0,
+ "cancelled": 0,
+ "in_progress": 0
+ },
+ "2025-02-20": {
+ "total": 0,
+ "success": 0,
+ "failure": 0,
+ "cancelled": 0,
+ "in_progress": 0
+ },
+ "2025-02-21": {
+ "total": 0,
+ "success": 0,
+ "failure": 0,
+ "cancelled": 0,
+ "in_progress": 0
+ },
+ "2025-02-22": {
+ "total": 0,
+ "success": 0,
+ "failure": 0,
+ "cancelled": 0,
+ "in_progress": 0
+ },
+ "2025-02-23": {
+ "total": 0,
+ "success": 0,
+ "failure": 0,
+ "cancelled": 0,
+ "in_progress": 0
+ },
+ "2025-02-24": {
+ "total": 0,
+ "success": 0,
+ "failure": 0,
+ "cancelled": 0,
+ "in_progress": 0
+ },
+ "2025-02-25": {
+ "total": 0,
+ "success": 0,
+ "failure": 0,
+ "cancelled": 0,
+ "in_progress": 0
+ },
+ "2025-02-26": {
+ "total": 0,
+ "success": 0,
+ "failure": 0,
+ "cancelled": 0,
+ "in_progress": 0
+ },
+ "2025-02-27": {
+ "total": 0,
+ "success": 0,
+ "failure": 0,
+ "cancelled": 0,
+ "in_progress": 0
+ },
+ "2025-02-28": {
+ "total": 2,
+ "success": 2,
+ "failure": 0,
+ "cancelled": 0,
+ "in_progress": 0
+ },
+ "2025-03-01": {
+ "total": 0,
+ "success": 0,
+ "failure": 0,
+ "cancelled": 0,
+ "in_progress": 0
+ },
+ "2025-03-02": {
+ "total": 0,
+ "success": 0,
+ "failure": 0,
+ "cancelled": 0,
+ "in_progress": 0
+ },
+ "2025-03-03": {
+ "total": 1,
+ "success": 0,
+ "failure": 1,
+ "cancelled": 0,
+ "in_progress": 0
+ },
+ "2025-03-04": {
+ "total": 0,
+ "success": 0,
+ "failure": 0,
+ "cancelled": 0,
+ "in_progress": 0
+ },
+ "2025-03-05": {
+ "total": 0,
+ "success": 0,
+ "failure": 0,
+ "cancelled": 0,
+ "in_progress": 0
+ },
+ "2025-03-06": {
+ "total": 0,
+ "success": 0,
+ "failure": 0,
+ "cancelled": 0,
+ "in_progress": 0
+ },
+ "2025-03-07": {
+ "total": 0,
+ "success": 0,
+ "failure": 0,
+ "cancelled": 0,
+ "in_progress": 0
+ },
+ "2025-03-08": {
+ "total": 0,
+ "success": 0,
+ "failure": 0,
+ "cancelled": 0,
+ "in_progress": 0
+ },
+ "2025-03-09": {
+ "total": 0,
+ "success": 0,
+ "failure": 0,
+ "cancelled": 0,
+ "in_progress": 0
+ },
+ "2025-03-10": {
+ "total": 0,
+ "success": 0,
+ "failure": 0,
+ "cancelled": 0,
+ "in_progress": 0
+ },
+ "2025-03-11": {
+ "total": 0,
+ "success": 0,
+ "failure": 0,
+ "cancelled": 0,
+ "in_progress": 0
+ },
+ "2025-03-12": {
+ "total": 0,
+ "success": 0,
+ "failure": 0,
+ "cancelled": 0,
+ "in_progress": 0
+ },
+ "2025-03-13": {
+ "total": 0,
+ "success": 0,
+ "failure": 0,
+ "cancelled": 0,
+ "in_progress": 0
+ },
+ "2025-03-14": {
+ "total": 0,
+ "success": 0,
+ "failure": 0,
+ "cancelled": 0,
+ "in_progress": 0
+ },
+ "2025-03-15": {
+ "total": 0,
+ "success": 0,
+ "failure": 0,
+ "cancelled": 0,
+ "in_progress": 0
+ },
+ "2025-03-16": {
+ "total": 0,
+ "success": 0,
+ "failure": 0,
+ "cancelled": 0,
+ "in_progress": 0
+ },
+ "2025-03-17": {
+ "total": 0,
+ "success": 0,
+ "failure": 0,
+ "cancelled": 0,
+ "in_progress": 0
+ },
+ "2025-03-18": {
+ "total": 0,
+ "success": 0,
+ "failure": 0,
+ "cancelled": 0,
+ "in_progress": 0
+ },
+ "2025-03-19": {
+ "total": 0,
+ "success": 0,
+ "failure": 0,
+ "cancelled": 0,
+ "in_progress": 0
+ },
+ "2025-03-20": {
+ "total": 0,
+ "success": 0,
+ "failure": 0,
+ "cancelled": 0,
+ "in_progress": 0
+ },
+ "2025-03-21": {
+ "total": 0,
+ "success": 0,
+ "failure": 0,
+ "cancelled": 0,
+ "in_progress": 0
+ },
+ "2025-03-22": {
+ "total": 0,
+ "success": 0,
+ "failure": 0,
+ "cancelled": 0,
+ "in_progress": 0
+ },
+ "2025-03-23": {
+ "total": 0,
+ "success": 0,
+ "failure": 0,
+ "cancelled": 0,
+ "in_progress": 0
+ },
+ "2025-03-24": {
+ "total": 0,
+ "success": 0,
+ "failure": 0,
+ "cancelled": 0,
+ "in_progress": 0
+ },
+ "2025-03-25": {
+ "total": 0,
+ "success": 0,
+ "failure": 0,
+ "cancelled": 0,
+ "in_progress": 0
+ },
+ "2025-03-26": {
+ "total": 0,
+ "success": 0,
+ "failure": 0,
+ "cancelled": 0,
+ "in_progress": 0
+ },
+ "2025-03-27": {
+ "total": 0,
+ "success": 0,
+ "failure": 0,
+ "cancelled": 0,
+ "in_progress": 0
+ },
+ "2025-03-28": {
+ "total": 0,
+ "success": 0,
+ "failure": 0,
+ "cancelled": 0,
+ "in_progress": 0
+ },
+ "2025-03-29": {
+ "total": 0,
+ "success": 0,
+ "failure": 0,
+ "cancelled": 0,
+ "in_progress": 0
+ },
+ "2025-03-30": {
+ "total": 0,
+ "success": 0,
+ "failure": 0,
+ "cancelled": 0,
+ "in_progress": 0
+ },
+ "2025-03-31": {
+ "total": 0,
+ "success": 0,
+ "failure": 0,
+ "cancelled": 0,
+ "in_progress": 0
+ },
+ "2025-04-01": {
+ "total": 3,
+ "success": 2,
+ "failure": 1,
+ "cancelled": 0,
+ "in_progress": 0
+ },
+ "2025-04-02": {
+ "total": 31,
+ "success": 19,
+ "failure": 12,
+ "cancelled": 0,
+ "in_progress": 0
+ },
+ "2025-04-03": {
+ "total": 113,
+ "success": 58,
+ "failure": 55,
+ "cancelled": 0,
+ "in_progress": 0
+ },
+ "2025-04-04": {
+ "total": 69,
+ "success": 50,
+ "failure": 19,
+ "cancelled": 0,
+ "in_progress": 0
+ },
+ "2025-04-05": {
+ "total": 4,
+ "success": 4,
+ "failure": 0,
+ "cancelled": 0,
+ "in_progress": 0
+ },
+ "2025-04-06": {
+ "total": 1,
+ "success": 1,
+ "failure": 0,
+ "cancelled": 0,
+ "in_progress": 0
+ },
+ "2025-04-07": {
+ "total": 42,
+ "success": 32,
+ "failure": 10,
+ "cancelled": 0,
+ "in_progress": 0
+ },
+ "2025-04-08": {
+ "total": 27,
+ "success": 19,
+ "failure": 8,
+ "cancelled": 0,
+ "in_progress": 0
+ },
+ "2025-04-09": {
+ "total": 29,
+ "success": 26,
+ "failure": 3,
+ "cancelled": 0,
+ "in_progress": 0
+ },
+ "2025-04-10": {
+ "total": 42,
+ "success": 35,
+ "failure": 7,
+ "cancelled": 0,
+ "in_progress": 0
+ },
+ "2025-04-11": {
+ "total": 51,
+ "success": 36,
+ "failure": 15,
+ "cancelled": 0,
+ "in_progress": 0
+ },
+ "2025-04-12": {
+ "total": 0,
+ "success": 0,
+ "failure": 0,
+ "cancelled": 0,
+ "in_progress": 0
+ },
+ "2025-04-13": {
+ "total": 0,
+ "success": 0,
+ "failure": 0,
+ "cancelled": 0,
+ "in_progress": 0
+ },
+ "2025-04-14": {
+ "total": 24,
+ "success": 19,
+ "failure": 4,
+ "cancelled": 1,
+ "in_progress": 0
+ },
+ "2025-04-15": {
+ "total": 41,
+ "success": 22,
+ "failure": 19,
+ "cancelled": 0,
+ "in_progress": 0
+ },
+ "2025-04-16": {
+ "total": 26,
+ "success": 21,
+ "failure": 5,
+ "cancelled": 0,
+ "in_progress": 0
+ },
+ "2025-04-17": {
+ "total": 29,
+ "success": 28,
+ "failure": 1,
+ "cancelled": 0,
+ "in_progress": 0
+ },
+ "2025-04-18": {
+ "total": 10,
+ "success": 10,
+ "failure": 0,
+ "cancelled": 0,
+ "in_progress": 0
+ },
+ "2025-04-19": {
+ "total": 4,
+ "success": 4,
+ "failure": 0,
+ "cancelled": 0,
+ "in_progress": 0
+ },
+ "2025-04-20": {
+ "total": 2,
+ "success": 2,
+ "failure": 0,
+ "cancelled": 0,
+ "in_progress": 0
+ },
+ "2025-04-21": {
+ "total": 5,
+ "success": 5,
+ "failure": 0,
+ "cancelled": 0,
+ "in_progress": 0
+ },
+ "2025-04-22": {
+ "total": 49,
+ "success": 33,
+ "failure": 15,
+ "cancelled": 1,
+ "in_progress": 0
+ },
+ "2025-04-23": {
+ "total": 32,
+ "success": 28,
+ "failure": 4,
+ "cancelled": 0,
+ "in_progress": 0
+ },
+ "2025-04-24": {
+ "total": 29,
+ "success": 26,
+ "failure": 3,
+ "cancelled": 0,
+ "in_progress": 0
+ },
+ "2025-04-25": {
+ "total": 28,
+ "success": 26,
+ "failure": 2,
+ "cancelled": 0,
+ "in_progress": 0
+ },
+ "2025-04-26": {
+ "total": 1,
+ "success": 1,
+ "failure": 0,
+ "cancelled": 0,
+ "in_progress": 0
+ },
+ "2025-04-27": {
+ "total": 1,
+ "success": 1,
+ "failure": 0,
+ "cancelled": 0,
+ "in_progress": 0
+ },
+ "2025-04-28": {
+ "total": 26,
+ "success": 20,
+ "failure": 6,
+ "cancelled": 0,
+ "in_progress": 0
+ },
+ "2025-04-29": {
+ "total": 60,
+ "success": 26,
+ "failure": 34,
+ "cancelled": 0,
+ "in_progress": 0
+ },
+ "2025-04-30": {
+ "total": 47,
+ "success": 33,
+ "failure": 14,
+ "cancelled": 0,
+ "in_progress": 0
+ },
+ "2025-05-01": {
+ "total": 31,
+ "success": 27,
+ "failure": 4,
+ "cancelled": 0,
+ "in_progress": 0
+ },
+ "2025-05-02": {
+ "total": 8,
+ "success": 8,
+ "failure": 0,
+ "cancelled": 0,
+ "in_progress": 0
+ },
+ "2025-05-03": {
+ "total": 2,
+ "success": 2,
+ "failure": 0,
+ "cancelled": 0,
+ "in_progress": 0
+ },
+ "2025-05-04": {
+ "total": 7,
+ "success": 7,
+ "failure": 0,
+ "cancelled": 0,
+ "in_progress": 0
+ },
+ "2025-05-05": {
+ "total": 14,
+ "success": 11,
+ "failure": 3,
+ "cancelled": 0,
+ "in_progress": 0
+ },
+ "2025-05-06": {
+ "total": 18,
+ "success": 16,
+ "failure": 2,
+ "cancelled": 0,
+ "in_progress": 0
+ },
+ "2025-05-07": {
+ "total": 22,
+ "success": 20,
+ "failure": 2,
+ "cancelled": 0,
+ "in_progress": 0
+ },
+ "2025-05-08": {
+ "total": 18,
+ "success": 15,
+ "failure": 3,
+ "cancelled": 0,
+ "in_progress": 0
+ },
+ "2025-05-09": {
+ "total": 36,
+ "success": 27,
+ "failure": 9,
+ "cancelled": 0,
+ "in_progress": 0
+ },
+ "2025-05-10": {
+ "total": 2,
+ "success": 1,
+ "failure": 1,
+ "cancelled": 0,
+ "in_progress": 0
+ },
+ "2025-05-11": {
+ "total": 0,
+ "success": 0,
+ "failure": 0,
+ "cancelled": 0,
+ "in_progress": 0
+ },
+ "2025-05-12": {
+ "total": 47,
+ "success": 30,
+ "failure": 17,
+ "cancelled": 0,
+ "in_progress": 0
+ },
+ "2025-05-13": {
+ "total": 134,
+ "success": 65,
+ "failure": 69,
+ "cancelled": 0,
+ "in_progress": 0
+ },
+ "2025-05-14": {
+ "total": 51,
+ "success": 34,
+ "failure": 17,
+ "cancelled": 0,
+ "in_progress": 0
+ },
+ "2025-05-15": {
+ "total": 22,
+ "success": 9,
+ "failure": 12,
+ "cancelled": 1,
+ "in_progress": 0
+ },
+ "2025-05-16": {
+ "total": 21,
+ "success": 15,
+ "failure": 6,
+ "cancelled": 0,
+ "in_progress": 0
+ },
+ "2025-05-17": {
+ "total": 2,
+ "success": 1,
+ "failure": 1,
+ "cancelled": 0,
+ "in_progress": 0
+ },
+ "2025-05-18": {
+ "total": 0,
+ "success": 0,
+ "failure": 0,
+ "cancelled": 0,
+ "in_progress": 0
+ },
+ "2025-05-19": {
+ "total": 10,
+ "success": 9,
+ "failure": 1,
+ "cancelled": 0,
+ "in_progress": 0
+ },
+ "2025-05-20": {
+ "total": 30,
+ "success": 15,
+ "failure": 15,
+ "cancelled": 0,
+ "in_progress": 0
+ },
+ "2025-05-21": {
+ "total": 26,
+ "success": 12,
+ "failure": 14,
+ "cancelled": 0,
+ "in_progress": 0
+ },
+ "2025-05-22": {
+ "total": 51,
+ "success": 21,
+ "failure": 30,
+ "cancelled": 0,
+ "in_progress": 0
+ },
+ "2025-05-23": {
+ "total": 67,
+ "success": 13,
+ "failure": 53,
+ "cancelled": 1,
+ "in_progress": 0
+ },
+ "2025-05-24": {
+ "total": 5,
+ "success": 2,
+ "failure": 3,
+ "cancelled": 0,
+ "in_progress": 0
+ },
+ "2025-05-25": {
+ "total": 5,
+ "success": 0,
+ "failure": 5,
+ "cancelled": 0,
+ "in_progress": 0
+ },
+ "2025-05-26": {
+ "total": 10,
+ "success": 7,
+ "failure": 3,
+ "cancelled": 0,
+ "in_progress": 0
+ },
+ "2025-05-27": {
+ "total": 61,
+ "success": 12,
+ "failure": 49,
+ "cancelled": 0,
+ "in_progress": 0
+ },
+ "2025-05-28": {
+ "total": 56,
+ "success": 15,
+ "failure": 41,
+ "cancelled": 0,
+ "in_progress": 0
+ },
+ "2025-05-29": {
+ "total": 77,
+ "success": 24,
+ "failure": 52,
+ "cancelled": 1,
+ "in_progress": 0
+ },
+ "2025-05-30": {
+ "total": 25,
+ "success": 15,
+ "failure": 10,
+ "cancelled": 0,
+ "in_progress": 0
+ },
+ "2025-05-31": {
+ "total": 6,
+ "success": 3,
+ "failure": 3,
+ "cancelled": 0,
+ "in_progress": 0
+ },
+ "2025-06-01": {
+ "total": 2,
+ "success": 2,
+ "failure": 0,
+ "cancelled": 0,
+ "in_progress": 0
+ },
+ "2025-06-02": {
+ "total": 50,
+ "success": 20,
+ "failure": 29,
+ "cancelled": 1,
+ "in_progress": 0
+ },
+ "2025-06-03": {
+ "total": 57,
+ "success": 22,
+ "failure": 35,
+ "cancelled": 0,
+ "in_progress": 0
+ },
+ "2025-06-04": {
+ "total": 219,
+ "success": 22,
+ "failure": 196,
+ "cancelled": 1,
+ "in_progress": 0
+ },
+ "2025-06-05": {
+ "total": 166,
+ "success": 19,
+ "failure": 147,
+ "cancelled": 0,
+ "in_progress": 0
+ },
+ "2025-06-06": {
+ "total": 73,
+ "success": 27,
+ "failure": 45,
+ "cancelled": 1,
+ "in_progress": 0
+ },
+ "2025-06-07": {
+ "total": 0,
+ "success": 0,
+ "failure": 0,
+ "cancelled": 0,
+ "in_progress": 0
+ },
+ "2025-06-08": {
+ "total": 1,
+ "success": 1,
+ "failure": 0,
+ "cancelled": 0,
+ "in_progress": 0
+ },
+ "2025-06-09": {
+ "total": 124,
+ "success": 31,
+ "failure": 93,
+ "cancelled": 0,
+ "in_progress": 0
+ },
+ "2025-06-10": {
+ "total": 44,
+ "success": 29,
+ "failure": 15,
+ "cancelled": 0,
+ "in_progress": 0
+ },
+ "2025-06-11": {
+ "total": 19,
+ "success": 16,
+ "failure": 3,
+ "cancelled": 0,
+ "in_progress": 0
+ },
+ "2025-06-12": {
+ "total": 26,
+ "success": 14,
+ "failure": 12,
+ "cancelled": 0,
+ "in_progress": 0
+ },
+ "2025-06-13": {
+ "total": 29,
+ "success": 24,
+ "failure": 5,
+ "cancelled": 0,
+ "in_progress": 0
+ },
+ "2025-06-14": {
+ "total": 2,
+ "success": 2,
+ "failure": 0,
+ "cancelled": 0,
+ "in_progress": 0
+ },
+ "2025-06-15": {
+ "total": 1,
+ "success": 0,
+ "failure": 1,
+ "cancelled": 0,
+ "in_progress": 0
+ },
+ "2025-06-16": {
+ "total": 44,
+ "success": 21,
+ "failure": 23,
+ "cancelled": 0,
+ "in_progress": 0
+ },
+ "2025-06-17": {
+ "total": 29,
+ "success": 15,
+ "failure": 14,
+ "cancelled": 0,
+ "in_progress": 0
+ },
+ "2025-06-18": {
+ "total": 38,
+ "success": 25,
+ "failure": 13,
+ "cancelled": 0,
+ "in_progress": 0
+ },
+ "2025-06-19": {
+ "total": 15,
+ "success": 11,
+ "failure": 4,
+ "cancelled": 0,
+ "in_progress": 0
+ },
+ "2025-06-20": {
+ "total": 27,
+ "success": 21,
+ "failure": 6,
+ "cancelled": 0,
+ "in_progress": 0
+ },
+ "2025-06-21": {
+ "total": 1,
+ "success": 1,
+ "failure": 0,
+ "cancelled": 0,
+ "in_progress": 0
+ },
+ "2025-06-22": {
+ "total": 0,
+ "success": 0,
+ "failure": 0,
+ "cancelled": 0,
+ "in_progress": 0
+ },
+ "2025-06-23": {
+ "total": 30,
+ "success": 14,
+ "failure": 16,
+ "cancelled": 0,
+ "in_progress": 0
+ },
+ "2025-06-24": {
+ "total": 26,
+ "success": 17,
+ "failure": 9,
+ "cancelled": 0,
+ "in_progress": 0
+ },
+ "2025-06-25": {
+ "total": 26,
+ "success": 20,
+ "failure": 6,
+ "cancelled": 0,
+ "in_progress": 0
+ },
+ "2025-06-26": {
+ "total": 44,
+ "success": 21,
+ "failure": 22,
+ "cancelled": 1,
+ "in_progress": 0
+ },
+ "2025-06-27": {
+ "total": 18,
+ "success": 13,
+ "failure": 5,
+ "cancelled": 0,
+ "in_progress": 0
+ },
+ "2025-06-28": {
+ "total": 1,
+ "success": 1,
+ "failure": 0,
+ "cancelled": 0,
+ "in_progress": 0
+ },
+ "2025-06-29": {
+ "total": 3,
+ "success": 3,
+ "failure": 0,
+ "cancelled": 0,
+ "in_progress": 0
+ },
+ "2025-06-30": {
+ "total": 27,
+ "success": 17,
+ "failure": 10,
+ "cancelled": 0,
+ "in_progress": 0
+ },
+ "2025-07-01": {
+ "total": 26,
+ "success": 12,
+ "failure": 13,
+ "cancelled": 1,
+ "in_progress": 0
+ },
+ "2025-07-02": {
+ "total": 42,
+ "success": 25,
+ "failure": 17,
+ "cancelled": 0,
+ "in_progress": 0
+ },
+ "2025-07-03": {
+ "total": 17,
+ "success": 12,
+ "failure": 5,
+ "cancelled": 0,
+ "in_progress": 0
+ },
+ "2025-07-04": {
+ "total": 15,
+ "success": 12,
+ "failure": 3,
+ "cancelled": 0,
+ "in_progress": 0
+ },
+ "2025-07-05": {
+ "total": 4,
+ "success": 3,
+ "failure": 1,
+ "cancelled": 0,
+ "in_progress": 0
+ },
+ "2025-07-06": {
+ "total": 0,
+ "success": 0,
+ "failure": 0,
+ "cancelled": 0,
+ "in_progress": 0
+ },
+ "2025-07-07": {
+ "total": 20,
+ "success": 14,
+ "failure": 6,
+ "cancelled": 0,
+ "in_progress": 0
+ },
+ "2025-07-08": {
+ "total": 33,
+ "success": 19,
+ "failure": 14,
+ "cancelled": 0,
+ "in_progress": 0
+ },
+ "2025-07-09": {
+ "total": 19,
+ "success": 13,
+ "failure": 6,
+ "cancelled": 0,
+ "in_progress": 0
+ },
+ "2025-07-10": {
+ "total": 22,
+ "success": 14,
+ "failure": 7,
+ "cancelled": 1,
+ "in_progress": 0
+ },
+ "2025-07-11": {
+ "total": 6,
+ "success": 6,
+ "failure": 0,
+ "cancelled": 0,
+ "in_progress": 0
+ },
+ "2025-07-12": {
+ "total": 0,
+ "success": 0,
+ "failure": 0,
+ "cancelled": 0,
+ "in_progress": 0
+ },
+ "2025-07-13": {
+ "total": 0,
+ "success": 0,
+ "failure": 0,
+ "cancelled": 0,
+ "in_progress": 0
+ },
+ "2025-07-14": {
+ "total": 29,
+ "success": 21,
+ "failure": 8,
+ "cancelled": 0,
+ "in_progress": 0
+ },
+ "2025-07-15": {
+ "total": 49,
+ "success": 22,
+ "failure": 27,
+ "cancelled": 0,
+ "in_progress": 0
+ },
+ "2025-07-16": {
+ "total": 47,
+ "success": 21,
+ "failure": 26,
+ "cancelled": 0,
+ "in_progress": 0
+ },
+ "2025-07-17": {
+ "total": 18,
+ "success": 10,
+ "failure": 8,
+ "cancelled": 0,
+ "in_progress": 0
+ },
+ "2025-07-18": {
+ "total": 13,
+ "success": 12,
+ "failure": 1,
+ "cancelled": 0,
+ "in_progress": 0
+ },
+ "2025-07-19": {
+ "total": 2,
+ "success": 2,
+ "failure": 0,
+ "cancelled": 0,
+ "in_progress": 0
+ },
+ "2025-07-20": {
+ "total": 0,
+ "success": 0,
+ "failure": 0,
+ "cancelled": 0,
+ "in_progress": 0
+ },
+ "2025-07-21": {
+ "total": 26,
+ "success": 22,
+ "failure": 4,
+ "cancelled": 0,
+ "in_progress": 0
+ },
+ "2025-07-22": {
+ "total": 25,
+ "success": 19,
+ "failure": 6,
+ "cancelled": 0,
+ "in_progress": 0
+ },
+ "2025-07-23": {
+ "total": 33,
+ "success": 16,
+ "failure": 15,
+ "cancelled": 2,
+ "in_progress": 0
+ },
+ "2025-07-24": {
+ "total": 61,
+ "success": 26,
+ "failure": 35,
+ "cancelled": 0,
+ "in_progress": 0
+ },
+ "2025-07-25": {
+ "total": 35,
+ "success": 17,
+ "failure": 16,
+ "cancelled": 2,
+ "in_progress": 0
+ },
+ "2025-07-26": {
+ "total": 1,
+ "success": 1,
+ "failure": 0,
+ "cancelled": 0,
+ "in_progress": 0
+ },
+ "2025-07-27": {
+ "total": 0,
+ "success": 0,
+ "failure": 0,
+ "cancelled": 0,
+ "in_progress": 0
+ },
+ "2025-07-28": {
+ "total": 23,
+ "success": 22,
+ "failure": 1,
+ "cancelled": 0,
+ "in_progress": 0
+ },
+ "2025-07-29": {
+ "total": 52,
+ "success": 21,
+ "failure": 31,
+ "cancelled": 0,
+ "in_progress": 0
+ },
+ "2025-07-30": {
+ "total": 30,
+ "success": 15,
+ "failure": 14,
+ "cancelled": 1,
+ "in_progress": 0
+ },
+ "2025-07-31": {
+ "total": 35,
+ "success": 23,
+ "failure": 12,
+ "cancelled": 0,
+ "in_progress": 0
+ },
+ "2025-08-01": {
+ "total": 13,
+ "success": 13,
+ "failure": 0,
+ "cancelled": 0,
+ "in_progress": 0
+ },
+ "2025-08-02": {
+ "total": 0,
+ "success": 0,
+ "failure": 0,
+ "cancelled": 0,
+ "in_progress": 0
+ },
+ "2025-08-03": {
+ "total": 4,
+ "success": 4,
+ "failure": 0,
+ "cancelled": 0,
+ "in_progress": 0
+ },
+ "2025-08-04": {
+ "total": 16,
+ "success": 15,
+ "failure": 1,
+ "cancelled": 0,
+ "in_progress": 0
+ },
+ "2025-08-05": {
+ "total": 14,
+ "success": 10,
+ "failure": 4,
+ "cancelled": 0,
+ "in_progress": 0
+ },
+ "2025-08-06": {
+ "total": 23,
+ "success": 16,
+ "failure": 7,
+ "cancelled": 0,
+ "in_progress": 0
+ },
+ "2025-08-07": {
+ "total": 19,
+ "success": 7,
+ "failure": 12,
+ "cancelled": 0,
+ "in_progress": 0
+ },
+ "2025-08-08": {
+ "total": 24,
+ "success": 15,
+ "failure": 9,
+ "cancelled": 0,
+ "in_progress": 0
+ },
+ "2025-08-09": {
+ "total": 0,
+ "success": 0,
+ "failure": 0,
+ "cancelled": 0,
+ "in_progress": 0
+ },
+ "2025-08-10": {
+ "total": 4,
+ "success": 2,
+ "failure": 2,
+ "cancelled": 0,
+ "in_progress": 0
+ },
+ "2025-08-11": {
+ "total": 13,
+ "success": 12,
+ "failure": 1,
+ "cancelled": 0,
+ "in_progress": 0
+ },
+ "2025-08-12": {
+ "total": 9,
+ "success": 9,
+ "failure": 0,
+ "cancelled": 0,
+ "in_progress": 0
+ },
+ "2025-08-13": {
+ "total": 14,
+ "success": 12,
+ "failure": 2,
+ "cancelled": 0,
+ "in_progress": 0
+ },
+ "2025-08-14": {
+ "total": 18,
+ "success": 16,
+ "failure": 2,
+ "cancelled": 0,
+ "in_progress": 0
+ },
+ "2025-08-15": {
+ "total": 38,
+ "success": 30,
+ "failure": 8,
+ "cancelled": 0,
+ "in_progress": 0
+ },
+ "2025-08-16": {
+ "total": 2,
+ "success": 2,
+ "failure": 0,
+ "cancelled": 0,
+ "in_progress": 0
+ },
+ "2025-08-17": {
+ "total": 0,
+ "success": 0,
+ "failure": 0,
+ "cancelled": 0,
+ "in_progress": 0
+ },
+ "2025-08-18": {
+ "total": 19,
+ "success": 12,
+ "failure": 7,
+ "cancelled": 0,
+ "in_progress": 0
+ },
+ "2025-08-19": {
+ "total": 11,
+ "success": 7,
+ "failure": 4,
+ "cancelled": 0,
+ "in_progress": 0
+ },
+ "2025-08-20": {
+ "total": 11,
+ "success": 9,
+ "failure": 2,
+ "cancelled": 0,
+ "in_progress": 0
+ },
+ "2025-08-21": {
+ "total": 19,
+ "success": 15,
+ "failure": 3,
+ "cancelled": 1,
+ "in_progress": 0
+ },
+ "2025-08-22": {
+ "total": 32,
+ "success": 24,
+ "failure": 8,
+ "cancelled": 0,
+ "in_progress": 0
+ },
+ "2025-08-23": {
+ "total": 6,
+ "success": 5,
+ "failure": 1,
+ "cancelled": 0,
+ "in_progress": 0
+ },
+ "2025-08-24": {
+ "total": 2,
+ "success": 2,
+ "failure": 0,
+ "cancelled": 0,
+ "in_progress": 0
+ },
+ "2025-08-25": {
+ "total": 13,
+ "success": 11,
+ "failure": 2,
+ "cancelled": 0,
+ "in_progress": 0
+ },
+ "2025-08-26": {
+ "total": 17,
+ "success": 10,
+ "failure": 7,
+ "cancelled": 0,
+ "in_progress": 0
+ },
+ "2025-08-27": {
+ "total": 20,
+ "success": 11,
+ "failure": 9,
+ "cancelled": 0,
+ "in_progress": 0
+ },
+ "2025-08-28": {
+ "total": 36,
+ "success": 18,
+ "failure": 17,
+ "cancelled": 1,
+ "in_progress": 0
+ },
+ "2025-08-29": {
+ "total": 39,
+ "success": 28,
+ "failure": 11,
+ "cancelled": 0,
+ "in_progress": 0
+ },
+ "2025-08-30": {
+ "total": 4,
+ "success": 2,
+ "failure": 2,
+ "cancelled": 0,
+ "in_progress": 0
+ },
+ "2025-08-31": {
+ "total": 1,
+ "success": 1,
+ "failure": 0,
+ "cancelled": 0,
+ "in_progress": 0
+ },
+ "2025-09-01": {
+ "total": 20,
+ "success": 15,
+ "failure": 5,
+ "cancelled": 0,
+ "in_progress": 0
+ },
+ "2025-09-02": {
+ "total": 25,
+ "success": 16,
+ "failure": 9,
+ "cancelled": 0,
+ "in_progress": 0
+ },
+ "2025-09-03": {
+ "total": 30,
+ "success": 19,
+ "failure": 11,
+ "cancelled": 0,
+ "in_progress": 0
+ },
+ "2025-09-04": {
+ "total": 29,
+ "success": 15,
+ "failure": 14,
+ "cancelled": 0,
+ "in_progress": 0
+ },
+ "2025-09-05": {
+ "total": 32,
+ "success": 14,
+ "failure": 18,
+ "cancelled": 0,
+ "in_progress": 0
+ },
+ "2025-09-06": {
+ "total": 1,
+ "success": 1,
+ "failure": 0,
+ "cancelled": 0,
+ "in_progress": 0
+ },
+ "2025-09-07": {
+ "total": 1,
+ "success": 1,
+ "failure": 0,
+ "cancelled": 0,
+ "in_progress": 0
+ },
+ "2025-09-08": {
+ "total": 18,
+ "success": 12,
+ "failure": 5,
+ "cancelled": 1,
+ "in_progress": 0
+ },
+ "2025-09-09": {
+ "total": 25,
+ "success": 14,
+ "failure": 11,
+ "cancelled": 0,
+ "in_progress": 0
+ },
+ "2025-09-10": {
+ "total": 38,
+ "success": 23,
+ "failure": 15,
+ "cancelled": 0,
+ "in_progress": 0
+ },
+ "2025-09-11": {
+ "total": 39,
+ "success": 18,
+ "failure": 21,
+ "cancelled": 0,
+ "in_progress": 0
+ },
+ "2025-09-12": {
+ "total": 34,
+ "success": 21,
+ "failure": 13,
+ "cancelled": 0,
+ "in_progress": 0
+ },
+ "2025-09-13": {
+ "total": 1,
+ "success": 0,
+ "failure": 1,
+ "cancelled": 0,
+ "in_progress": 0
+ },
+ "2025-09-14": {
+ "total": 1,
+ "success": 1,
+ "failure": 0,
+ "cancelled": 0,
+ "in_progress": 0
+ },
+ "2025-09-15": {
+ "total": 22,
+ "success": 11,
+ "failure": 11,
+ "cancelled": 0,
+ "in_progress": 0
+ },
+ "2025-09-16": {
+ "total": 25,
+ "success": 15,
+ "failure": 10,
+ "cancelled": 0,
+ "in_progress": 0
+ },
+ "2025-09-17": {
+ "total": 24,
+ "success": 17,
+ "failure": 7,
+ "cancelled": 0,
+ "in_progress": 0
+ },
+ "2025-09-18": {
+ "total": 24,
+ "success": 17,
+ "failure": 6,
+ "cancelled": 1,
+ "in_progress": 0
+ },
+ "2025-09-19": {
+ "total": 16,
+ "success": 9,
+ "failure": 7,
+ "cancelled": 0,
+ "in_progress": 0
+ },
+ "2025-09-20": {
+ "total": 8,
+ "success": 3,
+ "failure": 5,
+ "cancelled": 0,
+ "in_progress": 0
+ },
+ "2025-09-21": {
+ "total": 0,
+ "success": 0,
+ "failure": 0,
+ "cancelled": 0,
+ "in_progress": 0
+ },
+ "2025-09-22": {
+ "total": 45,
+ "success": 19,
+ "failure": 26,
+ "cancelled": 0,
+ "in_progress": 0
+ },
+ "2025-09-23": {
+ "total": 23,
+ "success": 17,
+ "failure": 6,
+ "cancelled": 0,
+ "in_progress": 0
+ },
+ "2025-09-24": {
+ "total": 17,
+ "success": 13,
+ "failure": 4,
+ "cancelled": 0,
+ "in_progress": 0
+ },
+ "2025-09-25": {
+ "total": 47,
+ "success": 26,
+ "failure": 21,
+ "cancelled": 0,
+ "in_progress": 0
+ },
+ "2025-09-26": {
+ "total": 22,
+ "success": 21,
+ "failure": 1,
+ "cancelled": 0,
+ "in_progress": 0
+ },
+ "2025-09-27": {
+ "total": 4,
+ "success": 3,
+ "failure": 1,
+ "cancelled": 0,
+ "in_progress": 0
+ },
+ "2025-09-28": {
+ "total": 0,
+ "success": 0,
+ "failure": 0,
+ "cancelled": 0,
+ "in_progress": 0
+ },
+ "2025-09-29": {
+ "total": 20,
+ "success": 12,
+ "failure": 8,
+ "cancelled": 0,
+ "in_progress": 0
+ },
+ "2025-09-30": {
+ "total": 46,
+ "success": 21,
+ "failure": 25,
+ "cancelled": 0,
+ "in_progress": 0
+ },
+ "2025-10-01": {
+ "total": 23,
+ "success": 16,
+ "failure": 6,
+ "cancelled": 1,
+ "in_progress": 0
+ },
+ "2025-10-02": {
+ "total": 30,
+ "success": 17,
+ "failure": 13,
+ "cancelled": 0,
+ "in_progress": 0
+ },
+ "2025-10-03": {
+ "total": 10,
+ "success": 9,
+ "failure": 1,
+ "cancelled": 0,
+ "in_progress": 0
+ },
+ "2025-10-04": {
+ "total": 4,
+ "success": 4,
+ "failure": 0,
+ "cancelled": 0,
+ "in_progress": 0
+ },
+ "2025-10-05": {
+ "total": 0,
+ "success": 0,
+ "failure": 0,
+ "cancelled": 0,
+ "in_progress": 0
+ },
+ "2025-10-06": {
+ "total": 25,
+ "success": 9,
+ "failure": 15,
+ "cancelled": 1,
+ "in_progress": 0
+ },
+ "2025-10-07": {
+ "total": 42,
+ "success": 12,
+ "failure": 29,
+ "cancelled": 1,
+ "in_progress": 0
+ },
+ "2025-10-08": {
+ "total": 21,
+ "success": 11,
+ "failure": 10,
+ "cancelled": 0,
+ "in_progress": 0
+ },
+ "2025-10-09": {
+ "total": 61,
+ "success": 2,
+ "failure": 59,
+ "cancelled": 0,
+ "in_progress": 0
+ },
+ "2025-10-10": {
+ "total": 47,
+ "success": 13,
+ "failure": 34,
+ "cancelled": 0,
+ "in_progress": 0
+ },
+ "2025-10-11": {
+ "total": 0,
+ "success": 0,
+ "failure": 0,
+ "cancelled": 0,
+ "in_progress": 0
+ },
+ "2025-10-12": {
+ "total": 1,
+ "success": 0,
+ "failure": 1,
+ "cancelled": 0,
+ "in_progress": 0
+ },
+ "2025-10-13": {
+ "total": 32,
+ "success": 18,
+ "failure": 14,
+ "cancelled": 0,
+ "in_progress": 0
+ },
+ "2025-10-14": {
+ "total": 31,
+ "success": 16,
+ "failure": 15,
+ "cancelled": 0,
+ "in_progress": 0
+ },
+ "2025-10-15": {
+ "total": 33,
+ "success": 22,
+ "failure": 11,
+ "cancelled": 0,
+ "in_progress": 0
+ },
+ "2025-10-16": {
+ "total": 19,
+ "success": 12,
+ "failure": 7,
+ "cancelled": 0,
+ "in_progress": 0
+ },
+ "2025-10-17": {
+ "total": 20,
+ "success": 12,
+ "failure": 7,
+ "cancelled": 1,
+ "in_progress": 0
+ },
+ "2025-10-18": {
+ "total": 1,
+ "success": 0,
+ "failure": 1,
+ "cancelled": 0,
+ "in_progress": 0
+ },
+ "2025-10-19": {
+ "total": 0,
+ "success": 0,
+ "failure": 0,
+ "cancelled": 0,
+ "in_progress": 0
+ },
+ "2025-10-20": {
+ "total": 37,
+ "success": 14,
+ "failure": 23,
+ "cancelled": 0,
+ "in_progress": 0
+ },
+ "2025-10-21": {
+ "total": 21,
+ "success": 12,
+ "failure": 9,
+ "cancelled": 0,
+ "in_progress": 0
+ },
+ "2025-10-22": {
+ "total": 24,
+ "success": 11,
+ "failure": 13,
+ "cancelled": 0,
+ "in_progress": 0
+ },
+ "2025-10-23": {
+ "total": 61,
+ "success": 17,
+ "failure": 44,
+ "cancelled": 0,
+ "in_progress": 0
+ },
+ "2025-10-24": {
+ "total": 30,
+ "success": 18,
+ "failure": 12,
+ "cancelled": 0,
+ "in_progress": 0
+ },
+ "2025-10-25": {
+ "total": 3,
+ "success": 3,
+ "failure": 0,
+ "cancelled": 0,
+ "in_progress": 0
+ },
+ "2025-10-26": {
+ "total": 2,
+ "success": 2,
+ "failure": 0,
+ "cancelled": 0,
+ "in_progress": 0
+ },
+ "2025-10-27": {
+ "total": 9,
+ "success": 9,
+ "failure": 0,
+ "cancelled": 0,
+ "in_progress": 0
+ },
+ "2025-10-28": {
+ "total": 18,
+ "success": 16,
+ "failure": 2,
+ "cancelled": 0,
+ "in_progress": 0
+ },
+ "2025-10-29": {
+ "total": 19,
+ "success": 14,
+ "failure": 5,
+ "cancelled": 0,
+ "in_progress": 0
+ },
+ "2025-10-30": {
+ "total": 17,
+ "success": 16,
+ "failure": 1,
+ "cancelled": 0,
+ "in_progress": 0
+ },
+ "2025-10-31": {
+ "total": 15,
+ "success": 14,
+ "failure": 1,
+ "cancelled": 0,
+ "in_progress": 0
+ },
+ "2025-11-01": {
+ "total": 4,
+ "success": 1,
+ "failure": 3,
+ "cancelled": 0,
+ "in_progress": 0
+ },
+ "2025-11-02": {
+ "total": 2,
+ "success": 2,
+ "failure": 0,
+ "cancelled": 0,
+ "in_progress": 0
+ },
+ "2025-11-03": {
+ "total": 14,
+ "success": 13,
+ "failure": 1,
+ "cancelled": 0,
+ "in_progress": 0
+ },
+ "2025-11-04": {
+ "total": 19,
+ "success": 16,
+ "failure": 1,
+ "cancelled": 2,
+ "in_progress": 0
+ },
+ "2025-11-05": {
+ "total": 13,
+ "success": 10,
+ "failure": 3,
+ "cancelled": 0,
+ "in_progress": 0
+ },
+ "2025-11-06": {
+ "total": 24,
+ "success": 11,
+ "failure": 13,
+ "cancelled": 0,
+ "in_progress": 0
+ },
+ "2025-11-07": {
+ "total": 19,
+ "success": 14,
+ "failure": 5,
+ "cancelled": 0,
+ "in_progress": 0
+ },
+ "2025-11-08": {
+ "total": 3,
+ "success": 2,
+ "failure": 1,
+ "cancelled": 0,
+ "in_progress": 0
+ },
+ "2025-11-09": {
+ "total": 2,
+ "success": 1,
+ "failure": 1,
+ "cancelled": 0,
+ "in_progress": 0
+ },
+ "2025-11-10": {
+ "total": 47,
+ "success": 13,
+ "failure": 33,
+ "cancelled": 1,
+ "in_progress": 0
+ },
+ "2025-11-11": {
+ "total": 15,
+ "success": 11,
+ "failure": 4,
+ "cancelled": 0,
+ "in_progress": 0
+ },
+ "2025-11-12": {
+ "total": 42,
+ "success": 22,
+ "failure": 20,
+ "cancelled": 0,
+ "in_progress": 0
+ },
+ "2025-11-13": {
+ "total": 17,
+ "success": 12,
+ "failure": 4,
+ "cancelled": 1,
+ "in_progress": 0
+ },
+ "2025-11-14": {
+ "total": 22,
+ "success": 15,
+ "failure": 7,
+ "cancelled": 0,
+ "in_progress": 0
+ },
+ "2025-11-15": {
+ "total": 3,
+ "success": 3,
+ "failure": 0,
+ "cancelled": 0,
+ "in_progress": 0
+ },
+ "2025-11-16": {
+ "total": 3,
+ "success": 3,
+ "failure": 0,
+ "cancelled": 0,
+ "in_progress": 0
+ },
+ "2025-11-17": {
+ "total": 9,
+ "success": 7,
+ "failure": 2,
+ "cancelled": 0,
+ "in_progress": 0
+ },
+ "2025-11-18": {
+ "total": 19,
+ "success": 12,
+ "failure": 7,
+ "cancelled": 0,
+ "in_progress": 0
+ },
+ "2025-11-19": {
+ "total": 18,
+ "success": 13,
+ "failure": 5,
+ "cancelled": 0,
+ "in_progress": 0
+ },
+ "2025-11-20": {
+ "total": 9,
+ "success": 8,
+ "failure": 1,
+ "cancelled": 0,
+ "in_progress": 0
+ },
+ "2025-11-21": {
+ "total": 16,
+ "success": 12,
+ "failure": 3,
+ "cancelled": 1,
+ "in_progress": 0
+ },
+ "2025-11-22": {
+ "total": 5,
+ "success": 2,
+ "failure": 3,
+ "cancelled": 0,
+ "in_progress": 0
+ },
+ "2025-11-23": {
+ "total": 2,
+ "success": 2,
+ "failure": 0,
+ "cancelled": 0,
+ "in_progress": 0
+ },
+ "2025-11-24": {
+ "total": 8,
+ "success": 7,
+ "failure": 1,
+ "cancelled": 0,
+ "in_progress": 0
+ },
+ "2025-11-25": {
+ "total": 11,
+ "success": 10,
+ "failure": 1,
+ "cancelled": 0,
+ "in_progress": 0
+ },
+ "2025-11-26": {
+ "total": 17,
+ "success": 16,
+ "failure": 0,
+ "cancelled": 1,
+ "in_progress": 0
+ },
+ "2025-11-27": {
+ "total": 17,
+ "success": 15,
+ "failure": 2,
+ "cancelled": 0,
+ "in_progress": 0
+ },
+ "2025-11-28": {
+ "total": 11,
+ "success": 6,
+ "failure": 5,
+ "cancelled": 0,
+ "in_progress": 0
+ },
+ "2025-11-29": {
+ "total": 2,
+ "success": 2,
+ "failure": 0,
+ "cancelled": 0,
+ "in_progress": 0
+ },
+ "2025-11-30": {
+ "total": 0,
+ "success": 0,
+ "failure": 0,
+ "cancelled": 0,
+ "in_progress": 0
+ },
+ "2025-12-01": {
+ "total": 13,
+ "success": 12,
+ "failure": 1,
+ "cancelled": 0,
+ "in_progress": 0
+ },
+ "2025-12-02": {
+ "total": 8,
+ "success": 8,
+ "failure": 0,
+ "cancelled": 0,
+ "in_progress": 0
+ },
+ "2025-12-03": {
+ "total": 17,
+ "success": 10,
+ "failure": 7,
+ "cancelled": 0,
+ "in_progress": 0
+ },
+ "2025-12-04": {
+ "total": 11,
+ "success": 8,
+ "failure": 3,
+ "cancelled": 0,
+ "in_progress": 0
+ },
+ "2025-12-05": {
+ "total": 12,
+ "success": 11,
+ "failure": 1,
+ "cancelled": 0,
+ "in_progress": 0
+ },
+ "2025-12-06": {
+ "total": 1,
+ "success": 1,
+ "failure": 0,
+ "cancelled": 0,
+ "in_progress": 0
+ },
+ "2025-12-07": {
+ "total": 0,
+ "success": 0,
+ "failure": 0,
+ "cancelled": 0,
+ "in_progress": 0
+ },
+ "2025-12-08": {
+ "total": 17,
+ "success": 14,
+ "failure": 3,
+ "cancelled": 0,
+ "in_progress": 0
+ },
+ "2025-12-09": {
+ "total": 23,
+ "success": 14,
+ "failure": 9,
+ "cancelled": 0,
+ "in_progress": 0
+ },
+ "2025-12-10": {
+ "total": 43,
+ "success": 21,
+ "failure": 20,
+ "cancelled": 2,
+ "in_progress": 0
+ },
+ "2025-12-11": {
+ "total": 28,
+ "success": 19,
+ "failure": 9,
+ "cancelled": 0,
+ "in_progress": 0
+ },
+ "2025-12-12": {
+ "total": 14,
+ "success": 12,
+ "failure": 2,
+ "cancelled": 0,
+ "in_progress": 0
+ },
+ "2025-12-13": {
+ "total": 2,
+ "success": 0,
+ "failure": 2,
+ "cancelled": 0,
+ "in_progress": 0
+ },
+ "2025-12-14": {
+ "total": 0,
+ "success": 0,
+ "failure": 0,
+ "cancelled": 0,
+ "in_progress": 0
+ },
+ "2025-12-15": {
+ "total": 41,
+ "success": 15,
+ "failure": 26,
+ "cancelled": 0,
+ "in_progress": 0
+ },
+ "2025-12-16": {
+ "total": 25,
+ "success": 21,
+ "failure": 4,
+ "cancelled": 0,
+ "in_progress": 0
+ },
+ "2025-12-17": {
+ "total": 10,
+ "success": 8,
+ "failure": 2,
+ "cancelled": 0,
+ "in_progress": 0
+ },
+ "2025-12-18": {
+ "total": 20,
+ "success": 14,
+ "failure": 5,
+ "cancelled": 1,
+ "in_progress": 0
+ },
+ "2025-12-19": {
+ "total": 13,
+ "success": 11,
+ "failure": 2,
+ "cancelled": 0,
+ "in_progress": 0
+ },
+ "2025-12-20": {
+ "total": 7,
+ "success": 3,
+ "failure": 4,
+ "cancelled": 0,
+ "in_progress": 0
+ },
+ "2025-12-21": {
+ "total": 0,
+ "success": 0,
+ "failure": 0,
+ "cancelled": 0,
+ "in_progress": 0
+ },
+ "2025-12-22": {
+ "total": 20,
+ "success": 16,
+ "failure": 3,
+ "cancelled": 1,
+ "in_progress": 0
+ },
+ "2025-12-23": {
+ "total": 28,
+ "success": 19,
+ "failure": 9,
+ "cancelled": 0,
+ "in_progress": 0
+ },
+ "2025-12-24": {
+ "total": 13,
+ "success": 8,
+ "failure": 5,
+ "cancelled": 0,
+ "in_progress": 0
+ },
+ "2025-12-25": {
+ "total": 3,
+ "success": 1,
+ "failure": 2,
+ "cancelled": 0,
+ "in_progress": 0
+ },
+ "2025-12-26": {
+ "total": 6,
+ "success": 3,
+ "failure": 3,
+ "cancelled": 0,
+ "in_progress": 0
+ },
+ "2025-12-27": {
+ "total": 0,
+ "success": 0,
+ "failure": 0,
+ "cancelled": 0,
+ "in_progress": 0
+ },
+ "2025-12-28": {
+ "total": 1,
+ "success": 1,
+ "failure": 0,
+ "cancelled": 0,
+ "in_progress": 0
+ },
+ "2025-12-29": {
+ "total": 4,
+ "success": 2,
+ "failure": 2,
+ "cancelled": 0,
+ "in_progress": 0
+ },
+ "2025-12-30": {
+ "total": 3,
+ "success": 1,
+ "failure": 2,
+ "cancelled": 0,
+ "in_progress": 0
+ },
+ "2025-12-31": {
+ "total": 2,
+ "success": 1,
+ "failure": 1,
+ "cancelled": 0,
+ "in_progress": 0
+ },
+ "2026-01-01": {
+ "total": 2,
+ "success": 1,
+ "failure": 1,
+ "cancelled": 0,
+ "in_progress": 0
+ },
+ "2026-01-02": {
+ "total": 12,
+ "success": 8,
+ "failure": 4,
+ "cancelled": 0,
+ "in_progress": 0
+ },
+ "2026-01-03": {
+ "total": 3,
+ "success": 1,
+ "failure": 2,
+ "cancelled": 0,
+ "in_progress": 0
+ },
+ "2026-01-04": {
+ "total": 3,
+ "success": 3,
+ "failure": 0,
+ "cancelled": 0,
+ "in_progress": 0
+ },
+ "2026-01-05": {
+ "total": 34,
+ "success": 27,
+ "failure": 7,
+ "cancelled": 0,
+ "in_progress": 0
+ },
+ "2026-01-06": {
+ "total": 45,
+ "success": 25,
+ "failure": 20,
+ "cancelled": 0,
+ "in_progress": 0
+ },
+ "2026-01-07": {
+ "total": 17,
+ "success": 13,
+ "failure": 4,
+ "cancelled": 0,
+ "in_progress": 0
+ },
+ "2026-01-08": {
+ "total": 36,
+ "success": 24,
+ "failure": 12,
+ "cancelled": 0,
+ "in_progress": 0
+ },
+ "2026-01-09": {
+ "total": 25,
+ "success": 17,
+ "failure": 7,
+ "cancelled": 1,
+ "in_progress": 0
+ },
+ "2026-01-10": {
+ "total": 5,
+ "success": 2,
+ "failure": 3,
+ "cancelled": 0,
+ "in_progress": 0
+ },
+ "2026-01-11": {
+ "total": 0,
+ "success": 0,
+ "failure": 0,
+ "cancelled": 0,
+ "in_progress": 0
+ },
+ "2026-01-12": {
+ "total": 32,
+ "success": 17,
+ "failure": 15,
+ "cancelled": 0,
+ "in_progress": 0
+ },
+ "2026-01-13": {
+ "total": 44,
+ "success": 22,
+ "failure": 22,
+ "cancelled": 0,
+ "in_progress": 0
+ },
+ "2026-01-14": {
+ "total": 114,
+ "success": 32,
+ "failure": 82,
+ "cancelled": 0,
+ "in_progress": 0
+ },
+ "2026-01-15": {
+ "total": 54,
+ "success": 22,
+ "failure": 31,
+ "cancelled": 1,
+ "in_progress": 0
+ },
+ "2026-01-16": {
+ "total": 70,
+ "success": 27,
+ "failure": 40,
+ "cancelled": 3,
+ "in_progress": 0
+ },
+ "2026-01-17": {
+ "total": 6,
+ "success": 4,
+ "failure": 2,
+ "cancelled": 0,
+ "in_progress": 0
+ },
+ "2026-01-18": {
+ "total": 0,
+ "success": 0,
+ "failure": 0,
+ "cancelled": 0,
+ "in_progress": 0
+ },
+ "2026-01-19": {
+ "total": 28,
+ "success": 25,
+ "failure": 3,
+ "cancelled": 0,
+ "in_progress": 0
+ },
+ "2026-01-20": {
+ "total": 42,
+ "success": 30,
+ "failure": 12,
+ "cancelled": 0,
+ "in_progress": 0
+ },
+ "2026-01-21": {
+ "total": 51,
+ "success": 31,
+ "failure": 20,
+ "cancelled": 0,
+ "in_progress": 0
+ },
+ "2026-01-22": {
+ "total": 32,
+ "success": 25,
+ "failure": 5,
+ "cancelled": 2,
+ "in_progress": 0
+ },
+ "2026-01-23": {
+ "total": 28,
+ "success": 25,
+ "failure": 3,
+ "cancelled": 0,
+ "in_progress": 0
+ },
+ "2026-01-24": {
+ "total": 6,
+ "success": 4,
+ "failure": 2,
+ "cancelled": 0,
+ "in_progress": 0
+ },
+ "2026-01-25": {
+ "total": 3,
+ "success": 2,
+ "failure": 1,
+ "cancelled": 0,
+ "in_progress": 0
+ },
+ "2026-01-26": {
+ "total": 89,
+ "success": 33,
+ "failure": 56,
+ "cancelled": 0,
+ "in_progress": 0
+ },
+ "2026-01-27": {
+ "total": 24,
+ "success": 21,
+ "failure": 2,
+ "cancelled": 1,
+ "in_progress": 0
+ },
+ "2026-01-28": {
+ "total": 48,
+ "success": 28,
+ "failure": 20,
+ "cancelled": 0,
+ "in_progress": 0
+ },
+ "2026-01-29": {
+ "total": 24,
+ "success": 18,
+ "failure": 6,
+ "cancelled": 0,
+ "in_progress": 0
+ },
+ "2026-01-30": {
+ "total": 31,
+ "success": 24,
+ "failure": 7,
+ "cancelled": 0,
+ "in_progress": 0
+ },
+ "2026-01-31": {
+ "total": 1,
+ "success": 1,
+ "failure": 0,
+ "cancelled": 0,
+ "in_progress": 0
+ },
+ "2026-02-01": {
+ "total": 0,
+ "success": 0,
+ "failure": 0,
+ "cancelled": 0,
+ "in_progress": 0
+ },
+ "2026-02-02": {
+ "total": 14,
+ "success": 12,
+ "failure": 2,
+ "cancelled": 0,
+ "in_progress": 0
+ },
+ "2026-02-03": {
+ "total": 27,
+ "success": 18,
+ "failure": 9,
+ "cancelled": 0,
+ "in_progress": 0
+ },
+ "2026-02-04": {
+ "total": 30,
+ "success": 16,
+ "failure": 14,
+ "cancelled": 0,
+ "in_progress": 0
+ },
+ "2026-02-05": {
+ "total": 33,
+ "success": 19,
+ "failure": 14,
+ "cancelled": 0,
+ "in_progress": 0
+ },
+ "2026-02-06": {
+ "total": 20,
+ "success": 15,
+ "failure": 5,
+ "cancelled": 0,
+ "in_progress": 0
+ },
+ "2026-02-07": {
+ "total": 8,
+ "success": 4,
+ "failure": 4,
+ "cancelled": 0,
+ "in_progress": 0
+ },
+ "2026-02-08": {
+ "total": 5,
+ "success": 2,
+ "failure": 2,
+ "cancelled": 1,
+ "in_progress": 0
+ },
+ "2026-02-09": {
+ "total": 15,
+ "success": 11,
+ "failure": 4,
+ "cancelled": 0,
+ "in_progress": 0
+ },
+ "2026-02-10": {
+ "total": 24,
+ "success": 20,
+ "failure": 4,
+ "cancelled": 0,
+ "in_progress": 0
+ }
+}
\ No newline at end of file
diff --git a/ci3/ci-metrics/metrics.py b/ci3/ci-metrics/metrics.py
new file mode 100644
index 000000000000..5c0d1610e06b
--- /dev/null
+++ b/ci3/ci-metrics/metrics.py
@@ -0,0 +1,602 @@
+"""CI metrics: direct Redis reads + test event listener.
+
+Reads CI run data directly from Redis sorted sets on each request.
+Test events stored in SQLite since they only arrive via pub/sub.
+CI runs periodically synced from Redis to SQLite for flake correlation.
+"""
+import json
+import re
+import time
+import threading
+from datetime import datetime, timedelta, timezone
+
+import db
+import github_data
+import ec2_pricing
+
+SECTIONS = ['next', 'prs', 'master', 'staging', 'releases', 'nightly', 'network', 'deflake', 'local']
+
+_PR_RE = re.compile(r'(?:pr-|#)(\d+)', re.IGNORECASE)
+_ANSI_RE = re.compile(r'\x1b\[[^m]*m|\x1b\]8;;[^\x07]*\x07')
+_URL_PR_RE = re.compile(r'/pull/(\d+)')
+
+
+def compute_run_cost(data: dict) -> float | None:
+ complete = data.get('complete')
+ ts = data.get('timestamp')
+ if not complete or not ts:
+ return None
+ hours = (complete - ts) / 3_600_000
+ instance_type = data.get('instance_type', 'unknown')
+ is_spot = bool(data.get('spot'))
+ rate = ec2_pricing.get_instance_rate(instance_type, is_spot)
+ if not rate:
+ vcpus = data.get('instance_vcpus', 192)
+ rate = vcpus * ec2_pricing.get_fallback_vcpu_rate(is_spot)
+ return round(hours * rate, 4)
+
+
+def extract_pr_number(name: str) -> int | None:
+ m = _PR_RE.search(name)
+ if m:
+ return int(m.group(1))
+ # Try matching GitHub PR URL in ANSI-encoded strings
+ m = _URL_PR_RE.search(name)
+ if m:
+ return int(m.group(1))
+ # Strip ANSI codes and retry
+ clean = _ANSI_RE.sub('', name)
+ m = _PR_RE.search(clean)
+ return int(m.group(1)) if m else None
+
+
+def _get_ci_runs_from_redis(redis_conn, date_from_ms=None, date_to_ms=None):
+ """Read CI runs from Redis sorted sets."""
+ branch_pr_map = github_data.get_branch_pr_map()
+
+ runs = []
+ for section in SECTIONS:
+ key = f'ci-run-{section}'
+ try:
+ if date_from_ms is not None or date_to_ms is not None:
+ lo = date_from_ms if date_from_ms is not None else '-inf'
+ hi = date_to_ms if date_to_ms is not None else '+inf'
+ entries = redis_conn.zrangebyscore(key, lo, hi, withscores=True)
+ else:
+ entries = redis_conn.zrange(key, 0, -1, withscores=True)
+ for entry_bytes, score in entries:
+ try:
+ raw = entry_bytes.decode() if isinstance(entry_bytes, bytes) else entry_bytes
+ data = json.loads(raw)
+ data.setdefault('dashboard', section)
+ data['cost_usd'] = compute_run_cost(data)
+ data['pr_number'] = (
+ extract_pr_number(data.get('name', ''))
+ or extract_pr_number(data.get('msg', ''))
+ or (int(data['pr_number']) if data.get('pr_number') else None)
+ or branch_pr_map.get(data.get('name'))
+ )
+ runs.append(data)
+ except Exception:
+ continue
+ except Exception as e:
+ print(f"[rk_metrics] Error reading {key}: {e}")
+ return runs
+
+
+def _get_ci_runs_from_sqlite(date_from_ms=None, date_to_ms=None):
+ """Read CI runs from SQLite (persistent store)."""
+ conditions = []
+ params = []
+ if date_from_ms is not None:
+ conditions.append('timestamp_ms >= ?')
+ params.append(date_from_ms)
+ if date_to_ms is not None:
+ conditions.append('timestamp_ms <= ?')
+ params.append(date_to_ms)
+ where = ('WHERE ' + ' AND '.join(conditions)) if conditions else ''
+ rows = db.query(f'SELECT * FROM ci_runs {where} ORDER BY timestamp_ms', params)
+ runs = []
+ for row in rows:
+ runs.append({
+ 'dashboard': row['dashboard'],
+ 'name': row['name'],
+ 'timestamp': row['timestamp_ms'],
+ 'complete': row['complete_ms'],
+ 'status': row['status'],
+ 'author': row['author'],
+ 'pr_number': row['pr_number'],
+ 'instance_type': row['instance_type'],
+ 'instance_vcpus': row.get('instance_vcpus'),
+ 'spot': bool(row['spot']),
+ 'cost_usd': row['cost_usd'],
+ 'job_id': row.get('job_id', ''),
+ 'arch': row.get('arch', ''),
+ })
+ return runs
+
+
+def get_ci_runs(redis_conn, date_from_ms=None, date_to_ms=None):
+ """Read CI runs from Redis, backfilled with SQLite for data that Redis has flushed."""
+ redis_runs = _get_ci_runs_from_redis(redis_conn, date_from_ms, date_to_ms)
+
+ # Find the earliest timestamp in Redis to know what SQLite needs to fill
+ redis_keys = set()
+ redis_min_ts = float('inf')
+ for run in redis_runs:
+ ts = run.get('timestamp', 0)
+ redis_keys.add((run.get('dashboard', ''), ts, run.get('name', '')))
+ if ts < redis_min_ts:
+ redis_min_ts = ts
+
+ # If requesting data older than what Redis has, backfill from SQLite
+ sqlite_runs = []
+ need_sqlite = (date_from_ms is not None and date_from_ms < redis_min_ts) or not redis_runs
+ if need_sqlite:
+ sqlite_to = int(redis_min_ts) if redis_runs else date_to_ms
+ sqlite_runs = _get_ci_runs_from_sqlite(date_from_ms, sqlite_to)
+ # Deduplicate: only include SQLite runs not already in Redis
+ sqlite_runs = [r for r in sqlite_runs
+ if (r.get('dashboard', ''), r.get('timestamp', 0), r.get('name', ''))
+ not in redis_keys]
+
+ return sqlite_runs + redis_runs
+
+
+def _ts_to_date(ts_ms):
+ return datetime.fromtimestamp(ts_ms / 1000, tz=timezone.utc).strftime('%Y-%m-%d')
+
+
+# ---- Test event handling (only thing needing SQLite) ----
+
+def _handle_test_event(channel: str, data: dict):
+ status = channel.split(':')[-1]
+ # Handle field name mismatches: run_test_cmd publishes 'cmd' for failed/flaked
+ # but 'test_cmd' for started events. Same for 'log_key' vs 'log_url'.
+ test_cmd = data.get('test_cmd') or data.get('cmd', '')
+ log_url = data.get('log_url') or data.get('log_key')
+ if log_url and not log_url.startswith('http'):
+ log_url = f'http://ci.aztec-labs.com/{log_url}'
+ db.execute('''
+ INSERT INTO test_events
+ (status, test_cmd, log_url, ref_name, commit_hash, commit_author,
+ commit_msg, exit_code, duration_secs, is_scenario, owners,
+ flake_group_id, dashboard, timestamp)
+ VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
+ ''', (
+ status,
+ test_cmd,
+ log_url,
+ data.get('ref_name', ''),
+ data.get('commit_hash'),
+ data.get('commit_author'),
+ data.get('commit_msg'),
+ data.get('exit_code'),
+ data.get('duration_seconds'),
+ 1 if data.get('is_scenario_test') else 0,
+ json.dumps(data['owners']) if data.get('owners') else None,
+ data.get('flake_group_id'),
+ data.get('dashboard', ''),
+ data.get('timestamp', datetime.now(timezone.utc).isoformat()),
+ ))
+
+
+def start_test_listener(redis_conn):
+ """Subscribe to test event channels only. Reconnects on failure."""
+ channels = [b'ci:test:started', b'ci:test:passed', b'ci:test:failed', b'ci:test:flaked']
+
+ def listener():
+ backoff = 1
+ while True:
+ try:
+ pubsub = redis_conn.pubsub()
+ pubsub.subscribe(*channels)
+ backoff = 1 # reset on successful connection
+ for message in pubsub.listen():
+ if message['type'] != 'message':
+ continue
+ channel = message['channel']
+ if isinstance(channel, bytes):
+ channel = channel.decode()
+ try:
+ payload = message['data']
+ if isinstance(payload, bytes):
+ payload = payload.decode()
+ _handle_test_event(channel, json.loads(payload))
+ except Exception as e:
+ print(f"[rk_metrics] Error parsing test event: {e}")
+ except Exception as e:
+ print(f"[rk_metrics] Test listener error (reconnecting in {backoff}s): {e}")
+ time.sleep(backoff)
+ backoff = min(backoff * 2, 60)
+
+ t = threading.Thread(target=listener, daemon=True, name='test-listener')
+ t.start()
+ return t
+
+
+# ---- Sync failed_tests_{section} lists from Redis into SQLite ----
+
+_ANSI_STRIP = re.compile(r'\x1b\[[^m]*m|\x1b\]8;;[^\x07]*\x07')
+_GRIND_CMD_RE = re.compile(r'/grind\?cmd=([^&\x07"]+)')
+_LOG_KEY_RE = re.compile(r'ci\.aztec-labs\.com/([a-f0-9]{16})')
+_INLINE_CMD_RE = re.compile(r'(?:grind\)|[0-9a-f]{16}\)):?\s+(.+?)\s+\(\d+s\)')
+_DURATION_RE = re.compile(r'\((\d+)s\)')
+_AUTHOR_MSG_RE = re.compile(r'\(code: \d+\)\s+\((.+?): (.+?)\)\s*$')
+_FLAKE_GROUP_RE = re.compile(r'group:(\S+)')
+
+_failed_tests_sync_ts = 0
+_FAILED_TESTS_SYNC_TTL = 3600 # 1 hour
+
+
+def _parse_failed_test_entry(raw: str, section: str) -> dict | None:
+ """Parse an ANSI-formatted failed_tests_{section} entry into structured data."""
+ from urllib.parse import unquote
+ clean = _ANSI_STRIP.sub('', raw)
+
+ # Status
+ if 'FLAKED' in clean:
+ status = 'flaked'
+ elif 'FAILED' in clean:
+ status = 'failed'
+ else:
+ return None
+
+ # Timestamp: "02-11 15:11:00: ..."
+ ts_match = re.match(r'(\d{2}-\d{2} \d{2}:\d{2}:\d{2})', clean)
+ if not ts_match:
+ return None
+ # Assume current year for MM-DD HH:MM:SS; handle year rollover
+ now = datetime.now(timezone.utc)
+ year = now.year
+ ts_str = f'{year}-{ts_match.group(1)}'
+ try:
+ parsed_dt = datetime.strptime(ts_str, '%Y-%m-%d %H:%M:%S').replace(tzinfo=timezone.utc)
+ # If parsed date is in the future, it's from the previous year
+ if parsed_dt > now + timedelta(days=1):
+ parsed_dt = parsed_dt.replace(year=year - 1)
+ timestamp = parsed_dt.isoformat()
+ except ValueError:
+ return None
+
+ # Log key
+ log_key = None
+ m = _LOG_KEY_RE.search(raw)
+ if m:
+ log_key = m.group(1)
+
+ # Test command: try grind link first, then inline text
+ test_cmd = ''
+ m = _GRIND_CMD_RE.search(raw)
+ if m:
+ cmd_raw = unquote(m.group(1))
+ # Format: "hash:KEY=VAL:KEY=VAL actual_command"
+ # Strip the hash:KEY=VAL prefix to get the actual test command
+ parts = cmd_raw.split(' ', 1)
+ if len(parts) == 2 and ':' in parts[0]:
+ test_cmd = parts[1].strip()
+ else:
+ test_cmd = cmd_raw
+ else:
+ # Fallback: extract from inline text after log key
+ m = _INLINE_CMD_RE.search(clean)
+ if m:
+ test_cmd = m.group(1).strip()
+
+ # Duration
+ duration = None
+ m = _DURATION_RE.search(clean)
+ if m:
+ duration = float(m.group(1))
+
+ # Author and commit message
+ author, msg = None, None
+ m = _AUTHOR_MSG_RE.search(clean)
+ if m:
+ author = m.group(1)
+ msg = m.group(2)
+
+ # Flake group
+ flake_group = None
+ m = _FLAKE_GROUP_RE.search(clean)
+ if m:
+ flake_group = m.group(1)
+
+ return {
+ 'status': status,
+ 'test_cmd': test_cmd,
+ 'log_url': f'http://ci.aztec-labs.com/{log_key}' if log_key else None,
+ 'log_key': log_key,
+ 'ref_name': section, # section is the best ref we have from these lists
+ 'commit_author': author,
+ 'commit_msg': msg,
+ 'duration_secs': duration,
+ 'flake_group_id': flake_group,
+ 'timestamp': timestamp,
+ 'dashboard': section,
+ }
+
+
+def sync_failed_tests_to_sqlite(redis_conn):
+ """Read failed_tests_{section} lists from Redis and insert into test_events."""
+ global _failed_tests_sync_ts
+ now = time.time()
+ if now - _failed_tests_sync_ts < _FAILED_TESTS_SYNC_TTL:
+ return
+ _failed_tests_sync_ts = now
+
+ conn = db.get_db()
+ # Track existing entries to avoid duplicates: log_url for entries that have one,
+ # (test_cmd, timestamp, dashboard) composite key for entries without log_url
+ existing_urls = {row['log_url'] for row in conn.execute(
+ "SELECT DISTINCT log_url FROM test_events WHERE log_url IS NOT NULL"
+ ).fetchall()}
+ existing_keys = {(row['test_cmd'], row['timestamp'], row['dashboard']) for row in conn.execute(
+ "SELECT test_cmd, timestamp, dashboard FROM test_events WHERE log_url IS NULL"
+ ).fetchall()}
+
+ total = 0
+ for section in SECTIONS:
+ key = f'failed_tests_{section}'
+ try:
+ entries = redis_conn.lrange(key, 0, -1)
+ except Exception as e:
+ print(f"[rk_metrics] Error reading {key}: {e}")
+ continue
+
+ for entry_bytes in entries:
+ raw = entry_bytes.decode() if isinstance(entry_bytes, bytes) else entry_bytes
+ parsed = _parse_failed_test_entry(raw, section)
+ if not parsed:
+ continue
+ if parsed['log_url']:
+ if parsed['log_url'] in existing_urls:
+ continue
+ existing_urls.add(parsed['log_url'])
+ else:
+ composite = (parsed['test_cmd'], parsed['timestamp'], parsed['dashboard'])
+ if composite in existing_keys:
+ continue
+ existing_keys.add(composite)
+ try:
+ conn.execute('''
+ INSERT INTO test_events
+ (status, test_cmd, log_url, ref_name, commit_author,
+ commit_msg, duration_secs, flake_group_id, dashboard,
+ timestamp)
+ VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
+ ''', (
+ parsed['status'], parsed['test_cmd'], parsed['log_url'],
+ parsed['ref_name'], parsed['commit_author'],
+ parsed['commit_msg'], parsed['duration_secs'],
+ parsed['flake_group_id'], parsed['dashboard'],
+ parsed['timestamp'],
+ ))
+ total += 1
+ except Exception as e:
+ print(f"[rk_metrics] Error inserting test event: {e}")
+ conn.commit()
+ if total:
+ print(f"[rk_metrics] Synced {total} test events from Redis lists")
+
+
+# ---- Seed loading ----
+
+def _load_seed_data():
+ """Load CI runs and test events from ci-run-seed.json.gz if SQLite is empty."""
+ import gzip
+ from pathlib import Path
+
+ conn = db.get_db()
+ ci_count = conn.execute('SELECT COUNT(*) as c FROM ci_runs').fetchone()['c']
+ te_count = conn.execute('SELECT COUNT(*) as c FROM test_events').fetchone()['c']
+ if ci_count > 0 and te_count > 0:
+ return
+
+ seed = Path(__file__).parent / 'ci-run-seed.json.gz'
+ if not seed.exists():
+ return
+
+ with gzip.open(seed, 'rt') as f:
+ data = json.load(f)
+
+ now_iso = datetime.now(timezone.utc).isoformat()
+
+ if ci_count == 0 and data.get('ci_runs'):
+ runs = data['ci_runs']
+ for run in runs:
+ try:
+ conn.execute('''
+ INSERT OR IGNORE INTO ci_runs
+ (dashboard, name, timestamp_ms, complete_ms, status, author,
+ pr_number, instance_type, instance_vcpus, spot, cost_usd,
+ job_id, arch, synced_at)
+ VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
+ ''', (
+ run.get('dashboard', ''),
+ run.get('name', ''),
+ run.get('timestamp', 0),
+ run.get('complete'),
+ run.get('status'),
+ run.get('author'),
+ run.get('pr_number'),
+ run.get('instance_type'),
+ run.get('instance_vcpus'),
+ 1 if run.get('spot') else 0,
+ run.get('cost_usd'),
+ run.get('job_id', ''),
+ run.get('arch', ''),
+ now_iso,
+ ))
+ except Exception:
+ continue
+ conn.commit()
+ print(f"[rk_metrics] Loaded {len(runs)} CI runs from seed")
+
+ if te_count == 0 and data.get('test_events'):
+ events = data['test_events']
+ for ev in events:
+ try:
+ conn.execute('''
+ INSERT OR IGNORE INTO test_events
+ (status, test_cmd, log_url, ref_name, commit_hash, commit_author,
+ commit_msg, exit_code, duration_secs, is_scenario, owners,
+ flake_group_id, dashboard, timestamp)
+ VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
+ ''', (
+ ev.get('status', ''),
+ ev.get('test_cmd', ''),
+ ev.get('log_url'),
+ ev.get('ref_name', ''),
+ ev.get('commit_hash'),
+ ev.get('commit_author'),
+ ev.get('commit_msg'),
+ ev.get('exit_code'),
+ ev.get('duration_secs'),
+ ev.get('is_scenario', 0),
+ ev.get('owners'),
+ ev.get('flake_group_id'),
+ ev.get('dashboard', ''),
+ ev.get('timestamp', ''),
+ ))
+ except Exception:
+ continue
+ conn.commit()
+ print(f"[rk_metrics] Loaded {len(events)} test events from seed")
+
+
+# ---- CI run sync (Redis → SQLite) for flake correlation ----
+
+_ci_sync_ts = 0
+_CI_SYNC_TTL = 3600 # 1 hour
+
+
+def sync_ci_runs_to_sqlite(redis_conn):
+ """Sync all CI runs from Redis into SQLite for persistence."""
+ global _ci_sync_ts
+ now = time.time()
+ if now - _ci_sync_ts < _CI_SYNC_TTL:
+ return
+ _ci_sync_ts = now
+
+ # Sync everything Redis has (not just 30 days)
+ runs = _get_ci_runs_from_redis(redis_conn)
+
+ now_iso = datetime.now(timezone.utc).isoformat()
+ conn = db.get_db()
+ count = 0
+ for run in runs:
+ try:
+ conn.execute('''
+ INSERT OR REPLACE INTO ci_runs
+ (dashboard, name, timestamp_ms, complete_ms, status, author,
+ pr_number, instance_type, instance_vcpus, spot, cost_usd,
+ job_id, arch, synced_at)
+ VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
+ ''', (
+ run.get('dashboard', ''),
+ run.get('name', ''),
+ run.get('timestamp', 0),
+ run.get('complete'),
+ run.get('status'),
+ run.get('author'),
+ run.get('pr_number'),
+ run.get('instance_type'),
+ run.get('instance_vcpus'),
+ 1 if run.get('spot') else 0,
+ run.get('cost_usd'),
+ run.get('job_id', ''),
+ run.get('arch', ''),
+ now_iso,
+ ))
+ count += 1
+ except Exception as e:
+ print(f"[rk_metrics] Error syncing run: {e}")
+ conn.commit()
+ print(f"[rk_metrics] Synced {count} CI runs to SQLite")
+
+
+def start_ci_run_sync(redis_conn):
+ """Start periodic CI run + test event sync thread."""
+ _load_seed_data()
+
+ def loop():
+ while True:
+ try:
+ sync_ci_runs_to_sqlite(redis_conn)
+ sync_failed_tests_to_sqlite(redis_conn)
+ except Exception as e:
+ print(f"[rk_metrics] sync error: {e}")
+ time.sleep(600) # check every 10 min (TTL gates actual work)
+
+ t = threading.Thread(target=loop, daemon=True, name='ci-run-sync')
+ t.start()
+ return t
+
+
+def get_flakes_by_command(date_from, date_to, dashboard=''):
+ """Get flake stats grouped by CI command type (dashboard/section)."""
+ if dashboard:
+ rows = db.query('''
+ SELECT dashboard, test_cmd, COUNT(*) as count
+ FROM test_events
+ WHERE status = 'flaked' AND dashboard = ?
+ AND timestamp >= ? AND timestamp < ?
+ GROUP BY dashboard, test_cmd
+ ORDER BY count DESC
+ ''', (dashboard, date_from, date_to + 'T23:59:59'))
+ else:
+ rows = db.query('''
+ SELECT dashboard, test_cmd, COUNT(*) as count
+ FROM test_events
+ WHERE status = 'flaked' AND dashboard != ''
+ AND timestamp >= ? AND timestamp < ?
+ GROUP BY dashboard, test_cmd
+ ORDER BY count DESC
+ ''', (date_from, date_to + 'T23:59:59'))
+
+ by_command = {}
+ total_flakes = 0
+ for row in rows:
+ cmd = row['dashboard']
+ if cmd not in by_command:
+ by_command[cmd] = {'total': 0, 'tests': {}}
+ by_command[cmd]['total'] += row['count']
+ by_command[cmd]['tests'][row['test_cmd']] = row['count']
+ total_flakes += row['count']
+
+ if dashboard:
+ failure_rows = db.query('''
+ SELECT dashboard, COUNT(*) as count
+ FROM test_events
+ WHERE status = 'failed' AND dashboard = ?
+ AND timestamp >= ? AND timestamp < ?
+ GROUP BY dashboard
+ ''', (dashboard, date_from, date_to + 'T23:59:59'))
+ else:
+ failure_rows = db.query('''
+ SELECT dashboard, COUNT(*) as count
+ FROM test_events
+ WHERE status = 'failed' AND dashboard != ''
+ AND timestamp >= ? AND timestamp < ?
+ GROUP BY dashboard
+ ''', (date_from, date_to + 'T23:59:59'))
+ failures_by_command = {r['dashboard']: r['count'] for r in failure_rows}
+
+ result_list = []
+ for cmd, data in sorted(by_command.items(), key=lambda x: -x[1]['total']):
+ top_tests = sorted(data['tests'].items(), key=lambda x: -x[1])[:10]
+ result_list.append({
+ 'command': cmd,
+ 'total_flakes': data['total'],
+ 'total_failures': failures_by_command.get(cmd, 0),
+ 'top_tests': [{'test_cmd': t, 'count': c} for t, c in top_tests],
+ })
+
+ return {
+ 'by_command': result_list,
+ 'summary': {
+ 'total_flakes': total_flakes,
+ 'total_failures': sum(failures_by_command.values()),
+ },
+ }
diff --git a/ci3/ci-metrics/requirements.txt b/ci3/ci-metrics/requirements.txt
new file mode 100644
index 000000000000..d6516263133f
--- /dev/null
+++ b/ci3/ci-metrics/requirements.txt
@@ -0,0 +1,8 @@
+flask
+gunicorn
+redis
+Flask-Compress
+Flask-HTTPAuth
+requests
+google-cloud-bigquery
+boto3
diff --git a/ci3/ci-metrics/sync_to_sqlite.py b/ci3/ci-metrics/sync_to_sqlite.py
new file mode 100755
index 000000000000..5dd6faae6172
--- /dev/null
+++ b/ci3/ci-metrics/sync_to_sqlite.py
@@ -0,0 +1,60 @@
+#!/usr/bin/env python3
+"""Sync ephemeral Redis CI data to persistent SQLite.
+
+Normally run automatically by the ci-metrics server's background sync thread.
+Can also be run standalone for a one-off manual sync:
+
+ cd ci3/ci-metrics && python3 sync_to_sqlite.py
+
+Connects to Redis, reads all CI runs and failed test lists, writes to SQLite.
+"""
+import os
+import sys
+import time
+
+# Ensure this script can import sibling modules
+sys.path.insert(0, os.path.dirname(os.path.abspath(__file__)))
+
+import redis as redis_lib
+import db
+import metrics
+
+REDIS_HOST = os.getenv('REDIS_HOST', 'localhost')
+REDIS_PORT = int(os.getenv('REDIS_PORT', '6379'))
+
+
+def main():
+ start = time.time()
+ r = redis_lib.Redis(host=REDIS_HOST, port=REDIS_PORT, decode_responses=False)
+
+ try:
+ r.ping()
+ except Exception as e:
+ print(f"[sync] Cannot connect to Redis at {REDIS_HOST}:{REDIS_PORT}: {e}")
+ sys.exit(1)
+
+ # Ensure DB schema is up to date
+ db.get_db()
+
+ # Force sync by resetting the TTL gates
+ metrics._ci_sync_ts = 0
+ metrics._failed_tests_sync_ts = 0
+
+ # Sync CI runs
+ print("[sync] Syncing CI runs from Redis to SQLite...")
+ metrics.sync_ci_runs_to_sqlite(r)
+
+ # Sync failed/flaked test events from Redis lists
+ print("[sync] Syncing test events from Redis to SQLite...")
+ metrics.sync_failed_tests_to_sqlite(r)
+
+ # Report
+ conn = db.get_db()
+ ci_count = conn.execute('SELECT COUNT(*) as c FROM ci_runs').fetchone()['c']
+ te_count = conn.execute('SELECT COUNT(*) as c FROM test_events').fetchone()['c']
+ elapsed = time.time() - start
+ print(f"[sync] Done in {elapsed:.1f}s. SQLite: {ci_count} CI runs, {te_count} test events.")
+
+
+if __name__ == '__main__':
+ main()
diff --git a/ci3/ci-metrics/views/ci-insights.html b/ci3/ci-metrics/views/ci-insights.html
new file mode 100644
index 000000000000..533b6bfb62cd
--- /dev/null
+++ b/ci3/ci-metrics/views/ci-insights.html
@@ -0,0 +1,658 @@
+
+
+
+
+ ACI - CI Insights
+
+
+
+
+
+ ci insights
+
+
+
+
+
+ |
+
+
+ |
+
+
+
+ |
+
+
+
+
+
+ filtered to pipeline:
+ [clear]
+
+
+
+
+
+
+
+
+
daily ci cost + 7-day rolling cost per merge
+
+
+
+
merge queue: daily outcomes + success rate
+
+
+
+
flakes + test failures per day
+
+
+
+
+
+ flakes by pipeline
+
+
+
+ author ci profile
+
+
+
+
+
+
diff --git a/ci3/ci-metrics/views/cost-overview.html b/ci3/ci-metrics/views/cost-overview.html
new file mode 100644
index 000000000000..53424a2d2d70
--- /dev/null
+++ b/ci3/ci-metrics/views/cost-overview.html
@@ -0,0 +1,905 @@
+
+
+
+
+ ACI - Cost Overview
+
+
+
+
+
+ cost overview
+
+
+
+
+
+ |
+
+
+ |
+
+
+
+ |
+
+
+
+
+
+
+
Overview
+
Resource Details
+
CI Attribution
+
+
+
+
+
+
+
+
combined daily spend
+
+
+
+
service category breakdown
+
+
+
+
+
+
+
+
+
+
+
+
+
+ filtered to user:
+ [clear]
+
+
+
+
+
ci cost by run type (time series)
+
+
+
+
cost by user (AWS + GCP)
+
+
+
+
+
instances
+
+
+
+
+
+
+
diff --git a/ci3/ci-metrics/views/test-timings.html b/ci3/ci-metrics/views/test-timings.html
new file mode 100644
index 000000000000..0bf6c7213bd6
--- /dev/null
+++ b/ci3/ci-metrics/views/test-timings.html
@@ -0,0 +1,289 @@
+
+
+
+
+ ACI - Test Timings
+
+
+
+
+
+ test timings
+
+
+
+
+
+
+ |
+
+
+ |
+
+
+ |
+
+
+
+
+ loading...
+
+
+
+
+
+
avg duration by day
+
+
+
+
test run count by day
+
+
+
+
+ tests by duration
+
+
+ slowest individual runs
+
+
+
+
+
+
diff --git a/ci3/dashboard/Dockerfile b/ci3/dashboard/Dockerfile
index 2ca190fd9753..2da7805ffa83 100644
--- a/ci3/dashboard/Dockerfile
+++ b/ci3/dashboard/Dockerfile
@@ -16,7 +16,12 @@ RUN apt update && apt install -y \
WORKDIR /app
COPY requirements.txt requirements.txt
RUN pip install --no-cache-dir -r requirements.txt gunicorn
+
+# Install ci-metrics dependencies (ci-metrics runs as subprocess)
+COPY ci-metrics/requirements.txt ci-metrics/requirements.txt
+RUN pip install --no-cache-dir -r ci-metrics/requirements.txt
+
RUN git config --global --add safe.directory /aztec-packages
COPY . .
-EXPOSE 8080
+EXPOSE 8080 8081
CMD ["gunicorn", "-w", "100", "-b", "0.0.0.0:8080", "rk:app"]
diff --git a/ci3/dashboard/deploy.sh b/ci3/dashboard/deploy.sh
index cc417006d072..1d9e930e95a1 100755
--- a/ci3/dashboard/deploy.sh
+++ b/ci3/dashboard/deploy.sh
@@ -1,7 +1,13 @@
#!/bin/bash
set -euo pipefail
-rsync -avz --exclude='deploy.sh' -e "ssh -i ~/.ssh/build_instance_key" * ubuntu@ci.aztec-labs.com:rk
+SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
+
+# Sync dashboard (rkapp) files
+rsync -avz --exclude='deploy.sh' -e "ssh -i ~/.ssh/build_instance_key" "$SCRIPT_DIR"/* ubuntu@ci.aztec-labs.com:rk
+
+# Sync ci-metrics server (started as subprocess by rkapp)
+rsync -avz -e "ssh -i ~/.ssh/build_instance_key" "$SCRIPT_DIR/../ci-metrics/" ubuntu@ci.aztec-labs.com:rk/ci-metrics/
ssh -i ~/.ssh/build_instance_key ubuntu@ci.aztec-labs.com "
cd rk
diff --git a/ci3/dashboard/rk.py b/ci3/dashboard/rk.py
index 4e194cbc3a10..aedf35a824e2 100644
--- a/ci3/dashboard/rk.py
+++ b/ci3/dashboard/rk.py
@@ -18,13 +18,40 @@
YELLOW, BLUE, GREEN, RED, PURPLE, BOLD, RESET,
hyperlink, r, get_section_data, get_list_as_string
)
-
LOGS_DISK_PATH = os.getenv('LOGS_DISK_PATH', '/logs-disk')
DASHBOARD_PASSWORD = os.getenv('DASHBOARD_PASSWORD', 'password')
+CI_METRICS_PORT = int(os.getenv('CI_METRICS_PORT', '8081'))
+CI_METRICS_URL = os.getenv('CI_METRICS_URL', f'http://localhost:{CI_METRICS_PORT}')
+
app = Flask(__name__)
Compress(app)
auth = HTTPBasicAuth()
+# Start the ci-metrics server as a subprocess
+# Check sibling dir (repo layout) then subdirectory (Docker layout)
+_ci_metrics_dir = os.path.join(os.path.dirname(os.path.abspath(__file__)), '..', 'ci-metrics')
+if not os.path.isdir(_ci_metrics_dir):
+ _ci_metrics_dir = os.path.join(os.path.dirname(os.path.abspath(__file__)), 'ci-metrics')
+if os.path.isdir(_ci_metrics_dir):
+ # Kill any stale process on the port (e.g. leftover from previous reload)
+ import signal
+ try:
+ out = subprocess.check_output(
+ ['lsof', '-ti', f':{CI_METRICS_PORT}'], stderr=subprocess.DEVNULL, text=True)
+ for pid in out.strip().split('\n'):
+ if pid:
+ os.kill(int(pid), signal.SIGTERM)
+ import time; time.sleep(0.5)
+ except (subprocess.CalledProcessError, OSError):
+ pass
+ _ci_metrics_env = {**os.environ, 'CI_METRICS_PORT': str(CI_METRICS_PORT)}
+ subprocess.Popen(
+ ['gunicorn', '-w', '4', '-b', f'0.0.0.0:{CI_METRICS_PORT}', '--timeout', '120', 'app:app'],
+ cwd=_ci_metrics_dir,
+ env=_ci_metrics_env,
+ )
+ print(f"[rk.py] ci-metrics server started on port {CI_METRICS_PORT}")
+
def read_from_disk(key):
"""Read log from disk as fallback when Redis key not found."""
try:
@@ -145,6 +172,14 @@ def root() -> str:
f"{hyperlink('https://aztecprotocol.github.io/benchmark-page-data/bench?branch=next', 'next')}\n"
f"{hyperlink('/chonk-breakdowns', 'chonk breakdowns')}\n"
f"{RESET}"
+ f"\n"
+ f"CI Metrics:\n"
+ f"\n{YELLOW}"
+ f"{hyperlink('/cost-overview', 'cost overview (AWS + GCP)')}\n"
+ f"{hyperlink('/namespace-billing', 'namespace billing')}\n"
+ f"{hyperlink('/ci-insights', 'ci insights')}\n"
+ f"{hyperlink('/test-timings', 'test timings')}\n"
+ f"{RESET}"
)
def section_view(section: str) -> str:
@@ -487,6 +522,57 @@ def make_options(param_name, options, current_value, suffix=''):
# Redirect to log view.
return redirect(f'/{run_id}')
+
+# ---- Reverse proxy to ci-metrics server ----
+
+_proxy_session = requests.Session()
+_HOP_BY_HOP = frozenset([
+ 'connection', 'keep-alive', 'proxy-authenticate', 'proxy-authorization',
+ 'te', 'trailers', 'transfer-encoding', 'upgrade', 'content-length',
+ # `requests` auto-decompresses gzip responses, so Content-Encoding is
+ # stale — strip it so the browser doesn't try to decompress plain content.
+ # Flask-Compress on rkapp handles browser compression.
+ 'content-encoding',
+])
+# Don't forward Accept-Encoding — let `requests` negotiate with ci-metrics
+# (it adds its own and auto-decompresses).
+_STRIP_REQUEST_HEADERS = frozenset(['host', 'accept-encoding'])
+
+def _proxy(path):
+ """Forward request to ci-metrics, streaming the response back."""
+ url = f'{CI_METRICS_URL}/{path.lstrip("/")}'
+ try:
+ resp = _proxy_session.request(
+ method=request.method,
+ url=url,
+ params=request.args,
+ data=request.get_data(),
+ headers={k: v for k, v in request.headers if k.lower() not in _STRIP_REQUEST_HEADERS},
+ stream=True,
+ timeout=60,
+ )
+ # Strip hop-by-hop headers
+ headers = {k: v for k, v in resp.headers.items() if k.lower() not in _HOP_BY_HOP}
+ return Response(resp.iter_content(chunk_size=8192),
+ status=resp.status_code, headers=headers)
+ except Exception as e:
+ return Response(json.dumps({'error': f'ci-metrics unavailable: {e}'}),
+ mimetype='application/json', status=502)
+
+@app.route('/namespace-billing')
+@app.route('/ci-health')
+@app.route('/ci-insights')
+@app.route('/cost-overview')
+@app.route('/test-timings')
+@auth.login_required
+def proxy_dashboard():
+ return _proxy(request.path)
+
+@app.route('/api/', methods=['GET', 'POST', 'PUT', 'DELETE'])
+@auth.login_required
+def proxy_api(path):
+ return _proxy(f'/api/{path}')
+
@app.route('/')
@auth.login_required
def get_value(key):
diff --git a/ci3/log_ci_run b/ci3/log_ci_run
index 5c9567ae91dd..b52b93256edc 100755
--- a/ci3/log_ci_run
+++ b/ci3/log_ci_run
@@ -35,6 +35,14 @@ if [ -z "$key" ]; then
author="$(git log -1 --pretty=format:"%an")"
name=$REF_NAME
[ "$(aws_get_meta_data instance-life-cycle)" == "spot" ] && spot=true || spot=false
+ instance_type=$(aws_get_meta_data instance-type 2>/dev/null || echo "unknown")
+ instance_vcpus=$(nproc 2>/dev/null || echo 0)
+
+ # Extract PR number from branch name or merge queue ref
+ pr_number=""
+ if [[ "$REF_NAME" =~ [Pp][Rr]-?([0-9]+) ]]; then
+ pr_number="${BASH_REMATCH[1]}"
+ fi
# If this is github merge queue, just keep the queue name.
if [[ "$name" =~ ^gh-readonly-queue/([^/]+)/ ]]; then
@@ -42,6 +50,7 @@ if [ -z "$key" ]; then
fi
msg=$(pr_link "$msg")
+ dashboard="${range_key#ci-run-}"
json=$(jq -c -j -n \
--argjson timestamp "$key" \
@@ -53,7 +62,12 @@ if [ -z "$key" ]; then
--arg author "$author" \
--arg arch "$(arch)" \
--argjson spot "$spot" \
- '{timestamp: $timestamp, run_id: $run_id, job_id: $job_id, status: $status, msg: $msg, name: $name, author: $author, arch: $arch, spot: $spot}')
+ --arg instance_type "$instance_type" \
+ --argjson instance_vcpus "$instance_vcpus" \
+ --arg pr_number "$pr_number" \
+ --arg dashboard "$dashboard" \
+ --arg github_actor "${GITHUB_ACTOR:-}" \
+ '{timestamp: $timestamp, run_id: $run_id, job_id: $job_id, status: $status, msg: $msg, name: $name, author: $author, github_actor: $github_actor, arch: $arch, spot: $spot, instance_type: $instance_type, instance_vcpus: $instance_vcpus, pr_number: $pr_number, dashboard: $dashboard}')
# echo "$json" >&2
redis_cli ZADD $range_key $key "$json" &>/dev/null
redis_cli SETEX hb-$key 60 1 &>/dev/null
diff --git a/ci3/run_test_cmd b/ci3/run_test_cmd
index 66334e535f27..35c37c4d0c42 100755
--- a/ci3/run_test_cmd
+++ b/ci3/run_test_cmd
@@ -160,7 +160,8 @@ if [ "$publish" -eq 1 ]; then
--arg commit_hash "$COMMIT_HASH" \
--arg commit_author "$COMMIT_AUTHOR" \
--arg commit_msg "$COMMIT_MSG" \
- '{status: $status, test_cmd: $test_cmd, log_id: $log_id, log_url: $log_url, ref_name: $ref_name, commit_hash: $commit_hash, commit_author: $commit_author, commit_msg: $commit_msg, timestamp: now | todate}')
+ --arg dashboard "${CI_DASHBOARD:-}" \
+ '{status: $status, test_cmd: $test_cmd, log_id: $log_id, log_url: $log_url, ref_name: $ref_name, commit_hash: $commit_hash, commit_author: $commit_author, commit_msg: $commit_msg, dashboard: $dashboard, timestamp: now | todate}')
redis_publish "ci:test:started" "$start_redis_data"
fi
@@ -228,15 +229,16 @@ function track_test_failed {
function publish_redis {
local redis_data=$(jq -n \
--arg status "$1" \
- --arg cmd "$cmd" \
- --arg log_key "$log_key" \
- --arg ref_name "$REF_NAME" \
+ --arg test_cmd "$cmd" \
+ --arg log_url "http://ci.aztec-labs.com/$log_key" \
+ --arg ref_name "${TARGET_BRANCH:-$REF_NAME}" \
--arg commit_hash "$COMMIT_HASH" \
--arg commit_author "$COMMIT_AUTHOR" \
--arg commit_msg "$COMMIT_MSG" \
--argjson code "$code" \
--argjson duration "$SECONDS" \
- '{status: $status, cmd: $cmd, log_key: $log_key, ref_name: $ref_name, commit_hash: $commit_hash, commit_author: $commit_author, commit_msg: $commit_msg, exit_code: $code, duration_seconds: $duration, timestamp: now | todate}')
+ --arg dashboard "${CI_DASHBOARD:-}" \
+ '{status: $status, test_cmd: $test_cmd, log_url: $log_url, ref_name: $ref_name, commit_hash: $commit_hash, commit_author: $commit_author, commit_msg: $commit_msg, exit_code: $code, duration_seconds: $duration, dashboard: $dashboard, timestamp: now | todate}')
redis_publish "ci:test:$1" "$redis_data"
}
@@ -247,6 +249,8 @@ function pass {
local line="${green}PASSED${reset}${log_info:-}: $test_cmd (${SECONDS}s)"
echo -e "$line"
+ [ "$publish" -eq 1 ] && publish_redis "passed"
+
if [ "$track_test_history" -eq 1 ]; then
local track_line="${green}PASSED${reset}${log_info:-} ${fail_links}: $test_cmd (${SECONDS}s) (${purple}$COMMIT_AUTHOR${reset}: $COMMIT_MSG)"
track_test_history "$track_line"