From 6126b70be02515d185d3ac90d03b7de7ab536527 Mon Sep 17 00:00:00 2001 From: Pahuldeep Singh Date: Sun, 29 Mar 2026 14:27:21 -0500 Subject: [PATCH 01/19] feat(staging): scaffold pre-prod environment and harden local/runtime flows --- README.md | 79 ++++++----- .../src/features/settings/SettingsPage.tsx | 14 +- .../src/routes/__tests__/account.test.ts | 23 +++- apps/gateway/src/routes/__tests__/sso.test.ts | 12 ++ apps/gateway/src/routes/account.ts | 95 ++++++++----- apps/gateway/src/routes/sso.ts | 16 ++- apps/gateway/src/services/device.ts | 19 ++- infra/docker/docker-compose.yml | 21 +-- infra/terraform/environments/dev/main.tf | 2 +- .../terraform/environments/staging/.gitignore | 1 + .../environments/staging/.terraform.lock.hcl | 45 +++++++ infra/terraform/environments/staging/main.tf | 79 +++++++++++ .../terraform/environments/staging/outputs.tf | 17 +++ .../environments/staging/providers.tf | 25 ++++ .../environments/staging/variables.tf | 14 ++ k8s/argocd/apps/grainguard-staging.yaml | 36 +++++ k8s/argocd/project.yaml | 7 + k8s/helm/grainguard/values-dev.yaml | 2 + k8s/helm/grainguard/values-prod.yaml | 2 + k8s/helm/grainguard/values-staging.yaml | 87 ++++++++++++ scripts/load-tests/graphql-stress.js | 59 ++++++++ scripts/load-tests/ingest-stress.js | 79 +++++++++++ scripts/load-tests/mixed-stack-stress.js | 127 ++++++++++++++++++ 23 files changed, 761 insertions(+), 100 deletions(-) create mode 100644 infra/terraform/environments/staging/.gitignore create mode 100644 infra/terraform/environments/staging/.terraform.lock.hcl create mode 100644 infra/terraform/environments/staging/main.tf create mode 100644 infra/terraform/environments/staging/outputs.tf create mode 100644 infra/terraform/environments/staging/providers.tf create mode 100644 infra/terraform/environments/staging/variables.tf create mode 100644 k8s/argocd/apps/grainguard-staging.yaml create mode 100644 k8s/helm/grainguard/values-staging.yaml create mode 100644 scripts/load-tests/graphql-stress.js create mode 100644 scripts/load-tests/ingest-stress.js create mode 100644 scripts/load-tests/mixed-stack-stress.js diff --git a/README.md b/README.md index aa9556f..4c4fb08 100644 --- a/README.md +++ b/README.md @@ -2,7 +2,7 @@ > Production-grade, polyglot microservices SaaS platform for grain and agri operations. -GrainGuard ingests high-volume device telemetry, computes spoilage risk scores, triggers automated alert workflows, and ships with full multi-tenant billing, SSO, team management, audit logging, observability, CI/CD, chaos testing, SLO monitoring, and operational runbooks. +GrainGuard ingests high-volume device telemetry, computes spoilage risk scores, triggers automated alert workflows, and ships with multi-tenant billing, SSO, team management, audit logging, observability, CI/CD, load testing, and operational runbooks. --- @@ -81,6 +81,18 @@ Risk Engine (Python) ── Workflow Alerts (Node.js) ── RabbitMQ ── Job --- +## Current Deployment Status + +| Area | State | +|------|-------| +| Local Docker stack | ✅ Validated end-to-end | +| GitOps apps in repo | ✅ `dev`, `staging`, and `prod` ArgoCD apps committed | +| Terraform environments in repo | ✅ `dev` and `staging` committed | +| Dedicated staging environment | 🟡 Scaffold committed; deploy/validate next | +| Production rollout strategy | 🟡 Safe rolling deploys now; canary planned for production | + +--- + ## SaaS Features | Feature | Status | @@ -172,18 +184,22 @@ go run tools/publish-telemetry/main.go # Go unit + integration tests go test -race -count=1 ./... +# Go lint +go run github.com/golangci/golangci-lint/v2/cmd/golangci-lint@v2.11.0 run --timeout=5m + # k6 load tests (requires running stack) k6 run tests/load/spike.js k6 run tests/load/soak.js k6 run tests/load/stress.js -# Chaos tests (requires kubectl + live cluster) -bash tests/chaos/run-all.sh - # Replay + idempotency test ./scripts/replay/replay_test.sh ``` +Note: +- The core load-test scripts above are committed in `tests/load/`. +- Cluster-level chaos automation is not currently committed on `master`; add or restore it before relying on README-driven chaos drills. + ## Code Review Automation This repository is preconfigured for CodeRabbit via [`/.coderabbit.yaml`](./.coderabbit.yaml). @@ -215,35 +231,15 @@ Notes: --- -## Chaos Testing - -Five experiments covering the critical failure modes: - -| Experiment | What it kills | Pass condition | -|------------|--------------|----------------| -| `pod-kill` | gateway, bff, telemetry-service pods | Respawns within 30s | -| `kafka-consumer-pause` | read-model-builder + cdc-transformer | Lag ≤ 10 000 within 5 min | -| `redis-outage` | Redis | BFF falls back to DB, no panics | -| `projection-lag` | read-model-builder | Alert fires, lag recovers in 5 min | -| `network-partition` | telemetry-service → Kafka egress | Messages buffered, delivered after heal | - -```bash -# Run all experiments -bash tests/chaos/run-all.sh - -# Or trigger via GitHub Actions (manual dispatch) -# .github/workflows/chaos.yml — also runs weekly on Saturdays -``` - ---- - ## Operational Runbooks | Runbook | Trigger | |---------|---------| +| [Postgres Backup / Restore](docs/runbooks/postgres-backup-restore.md) | Backup verification, restore drill, data recovery | | [Postgres Failover](docs/runbooks/postgres-failover.md) | Primary down, replica lag high | | [Kafka Loss](docs/runbooks/kafka-loss.md) | Broker down, under-replicated partitions | | [DLQ Spike](docs/runbooks/dlq-spike.md) | `DLQMessagesAccumulating` alert | +| [Redis Backup / Restore](docs/runbooks/redis-backup-restore.md) | Cache restore drill, persistence recovery | | [Redis Failover](docs/runbooks/redis-failover.md) | Cache miss 100%, lock timeouts | | [Projection Lag](docs/runbooks/projection-lag.md) | `ProjectionLagHigh` alert | | [gRPC Outage](docs/runbooks/grpc-outage.md) | Circuit breaker open, 503 upstream | @@ -261,6 +257,8 @@ terraform apply -var="db_password=yourpassword" Provisions: VPC · EKS · RDS Postgres · Elasticache Redis · MSK Kafka · DynamoDB · ECR · Secrets Manager +Today, `dev` and `staging` Terraform environments are committed in-repo. The next step is to deploy and validate `staging` before treating the rollout path as production-ready. + --- ## Kubernetes (GitOps) @@ -278,6 +276,14 @@ helm diff upgrade grainguard k8s/helm/grainguard \ ArgoCD watches `k8s/argocd/apps/` and auto-syncs on every push to master. +Committed applications today: +- `grainguard-dev` -> `grainguard-dev` +- `grainguard-staging` -> `grainguard-staging` +- `grainguard-prod` -> `grainguard-prod` + +Recommended next environment: +- `grainguard-staging` -> deploy and validate ingress, TLS, DNS, secrets, restore drills, and production-like auth/billing flows before first prod rollout + --- ## Architecture Decision Records @@ -303,21 +309,26 @@ ArgoCD watches `k8s/argocd/apps/` and auto-syncs on every push to master. |-------|------|--------| | R1 — Core loop | Ingest, CQRS, outbox, saga | ✅ Done | | R2 — CDC + Search | Debezium, Elasticsearch, RabbitMQ | ✅ Done | -| R3 — Reliability | Helm, ArgoCD, k6 load tests, chaos tests | ✅ Done | +| R3 — Reliability baseline | Helm, ArgoCD scaffolding, k6 load tests, runbooks | ✅ Done | | R4 — Observability | SLOs, burn-rate alerts, Grafana dashboard, runbooks | ✅ Done | | R5 — Security | CSRF, rate limiting, audit logging, RBAC, API keys | ✅ Done | | R6 — SaaS billing | Stripe, tenant onboarding, team management, SSO, webhooks | ✅ Done | -| R7 — DB migrations | Flyway/Knex migration framework, schema versioning | 🔜 Next | -| R8 — Secret management | HashiCorp Vault / AWS Secrets Manager integration | 🔜 Planned | +| R7 — Staging environment | Dedicated Argo app, Terraform env, deployed validation | 🟡 Scaffolded | +| R8 — Production hardening | Canary rollout, restore proof, deployed auth/webhook validation | 🔜 Next | --- -## Load test results +## Latest Local Validation + +Latest mixed read/write validation on `master` (local Docker stack): -- Kafka ingest: **1,700 events/sec** -- Gateway p95 latency: **5.89ms** -- Read model builder: **2,500–3,000 events/sec** sustained +- **35,077** total requests +- **438 req/s** aggregate throughput +- **0%** HTTP failure rate +- Gateway GraphQL p95: **11.5 ms** +- Ingest p95: **10.8 ms** +- Kafka consumer groups drained back to **0 lag** after the run --- -*Built to demonstrate end-to-end DDIA patterns, distributed systems, GitOps, SRE practices, and production multi-tenant SaaS architecture.* +*Built to demonstrate end-to-end DDIA patterns, distributed systems, GitOps, SRE practices, and production-style multi-tenant SaaS architecture.* diff --git a/apps/dashboard/src/features/settings/SettingsPage.tsx b/apps/dashboard/src/features/settings/SettingsPage.tsx index 462c341..cfeb216 100644 --- a/apps/dashboard/src/features/settings/SettingsPage.tsx +++ b/apps/dashboard/src/features/settings/SettingsPage.tsx @@ -103,16 +103,24 @@ export function SettingsPage() { const res = await fetch(`${GW}/account/export`, { headers: { Authorization: `Bearer ${token}` }, }); + if (!res.ok) { + const body = await res.json().catch(() => ({})); + throw new Error( + typeof body?.error === "string" ? body.error : `HTTP ${res.status}` + ); + } const blob = await res.blob(); const url = URL.createObjectURL(blob); const a = document.createElement("a"); a.href = url; - a.download = `grainguard-export-${new Date().toISOString().slice(0, 10)}.json`; + const disposition = res.headers.get("content-disposition") ?? ""; + const filenameMatch = disposition.match(/filename=\"?([^"]+)\"?/i); + a.download = filenameMatch?.[1] ?? `grainguard-export-${new Date().toISOString().slice(0, 10)}.json`; a.click(); URL.revokeObjectURL(url); toast.success("Data exported"); - } catch { - toast.error("Export failed"); + } catch (e) { + toast.error(e instanceof Error ? e.message : "Export failed"); } } diff --git a/apps/gateway/src/routes/__tests__/account.test.ts b/apps/gateway/src/routes/__tests__/account.test.ts index 41facac..e3fd306 100644 --- a/apps/gateway/src/routes/__tests__/account.test.ts +++ b/apps/gateway/src/routes/__tests__/account.test.ts @@ -75,12 +75,9 @@ describe("DELETE /account/me", () => { .mockResolvedValueOnce(undefined as any) // BEGIN .mockResolvedValueOnce({ rows: [{ id: "a1" }] } as any) // only admin .mockResolvedValueOnce({ rows: [{ id: "u1", role: "admin" }] } as any) // user - .mockResolvedValueOnce(undefined as any) // DELETE invites - .mockResolvedValueOnce(undefined as any) // DELETE api_keys - .mockResolvedValueOnce(undefined as any) // DELETE alert_rules - .mockResolvedValueOnce(undefined as any) // DELETE audit_events + .mockResolvedValueOnce(undefined as any) // DELETE telemetry_readings .mockResolvedValueOnce(undefined as any) // DELETE devices - .mockResolvedValueOnce(undefined as any) // DELETE tenant_users + .mockResolvedValueOnce({ rows: [{ count: 0 }] } as any) // COUNT audit_events .mockResolvedValueOnce(undefined as any) // DELETE tenants .mockResolvedValueOnce(undefined as any); // COMMIT @@ -88,6 +85,22 @@ describe("DELETE /account/me", () => { expect(res.status).toBe(200); expect(res.body.scope).toBe("tenant"); }); + + it("reports retained immutable audit events when present", async () => { + mockPool.query + .mockResolvedValueOnce(undefined as any) // BEGIN + .mockResolvedValueOnce({ rows: [{ id: "a1" }] } as any) // only admin + .mockResolvedValueOnce({ rows: [{ id: "u1", role: "admin" }] } as any) // user + .mockResolvedValueOnce(undefined as any) // DELETE telemetry_readings + .mockResolvedValueOnce(undefined as any) // DELETE devices + .mockResolvedValueOnce({ rows: [{ count: 3 }] } as any) // COUNT audit_events + .mockResolvedValueOnce(undefined as any) // DELETE tenants + .mockResolvedValueOnce(undefined as any); // COMMIT + + const res = await request(app).delete("/account/me"); + expect(res.status).toBe(200); + expect(res.body.message).toContain("Immutable audit events"); + }); }); describe("GET /account/export", () => { diff --git a/apps/gateway/src/routes/__tests__/sso.test.ts b/apps/gateway/src/routes/__tests__/sso.test.ts index ce3b971..a2a69c4 100644 --- a/apps/gateway/src/routes/__tests__/sso.test.ts +++ b/apps/gateway/src/routes/__tests__/sso.test.ts @@ -22,12 +22,14 @@ jest.mock("../../lib/auth0Management", () => ({ import { ssoRouter } from "../sso"; import { writePool as pool } from "../../database/db"; +import { listOrgConnections } from "../../lib/auth0Management"; const app = express(); app.use(express.json()); app.use(ssoRouter); const mockPool = pool as unknown as { query: jest.Mock }; +const mockListOrgConnections = listOrgConnections as jest.Mock; describe("GET /tenants/me/sso", () => { it("returns unconfigured state when no org exists", async () => { @@ -44,6 +46,16 @@ describe("GET /tenants/me/sso", () => { expect(res.body.configured).toBe(true); expect(res.body.connections).toHaveLength(1); }); + + it("returns a soft warning when Auth0 management is unavailable", async () => { + mockPool.query.mockResolvedValue({ rows: [{ auth0_org_id: "org-123" }] } as any); + mockListOrgConnections.mockRejectedValueOnce(new Error("SSO not configured")); + const res = await request(app).get("/tenants/me/sso"); + expect(res.status).toBe(200); + expect(res.body.configured).toBe(true); + expect(res.body.connections).toEqual([]); + expect(res.body.warning).toContain("Auth0 management API"); + }); }); describe("POST /tenants/me/sso/org", () => { diff --git a/apps/gateway/src/routes/account.ts b/apps/gateway/src/routes/account.ts index c2de51f..df24e19 100644 --- a/apps/gateway/src/routes/account.ts +++ b/apps/gateway/src/routes/account.ts @@ -76,17 +76,37 @@ accountRouter.delete( admins.length === 1; if (isLastAdmin) { - // Delete the entire tenant and all associated data - await client.query("DELETE FROM tenant_invites WHERE tenant_id = $1", [tenantId]); - await client.query("DELETE FROM api_keys WHERE tenant_id = $1", [tenantId]); - await client.query("DELETE FROM alert_rules WHERE tenant_id = $1", [tenantId]); - await client.query("DELETE FROM audit_events WHERE tenant_id = $1", [tenantId]); + // Delete tenant-owned device data first because telemetry_readings + // references devices without ON DELETE CASCADE. + await client.query( + `DELETE FROM telemetry_readings tr + USING devices d + WHERE tr.device_id = d.id + AND d.tenant_id = $1`, + [tenantId] + ); await client.query("DELETE FROM devices WHERE tenant_id = $1", [tenantId]); - await client.query("DELETE FROM tenant_users WHERE tenant_id = $1", [tenantId]); + + const { rows: auditEventRows } = await client.query( + "SELECT COUNT(*)::int AS count FROM audit_events WHERE tenant_id = $1", + [tenantId] + ); + + // Most tenant-linked tables cascade from tenants, so deleting the + // tenant removes them automatically. Immutable audit_events are + // intentionally retained for compliance and cannot be deleted. await client.query("DELETE FROM tenants WHERE id = $1", [tenantId]); await client.query("COMMIT"); - return res.json({ deleted: true, scope: "tenant", message: "Tenant and all data deleted" }); + const immutableAuditEvents = auditEventRows[0]?.count ?? 0; + return res.json({ + deleted: true, + scope: "tenant", + message: + immutableAuditEvents > 0 + ? "Tenant deleted. Immutable audit events were retained for compliance." + : "Tenant and all mutable data deleted", + }); } // Just remove this user from the tenant @@ -113,33 +133,38 @@ accountRouter.get( "/account/export", authMiddleware, async (req: Request, res: Response) => { - const tenantId = req.user!.tenantId; - - const [tenantResult, usersResult, devicesResult, alertsResult, auditResult, keysResult] = - await Promise.all([ - pool.query("SELECT id, name, slug, plan, email, created_at FROM tenants WHERE id = $1", [tenantId]), - pool.query("SELECT id, email, role, created_at FROM tenant_users WHERE tenant_id = $1", [tenantId]), - pool.query("SELECT id, serial_number, created_at FROM devices WHERE tenant_id = $1", [tenantId]), - pool.query("SELECT id, name, metric, operator, threshold, enabled, created_at FROM alert_rules WHERE tenant_id = $1", [tenantId]), - pool.query("SELECT id, event_type, actor_id, resource_type, payload, created_at FROM audit_events WHERE tenant_id = $1 ORDER BY created_at DESC LIMIT 1000", [tenantId]), - pool.query("SELECT id, name, created_at, expires_at, revoked_at FROM api_keys WHERE tenant_id = $1", [tenantId]), - ]); - - const exportData = { - exportedAt: new Date().toISOString(), - tenant: tenantResult.rows[0] || null, - users: usersResult.rows, - devices: devicesResult.rows, - alertRules: alertsResult.rows, - auditEvents: auditResult.rows, - apiKeys: keysResult.rows, - }; - - res.setHeader("Content-Type", "application/json"); - res.setHeader( - "Content-Disposition", - `attachment; filename="grainguard-export-${tenantId}-${new Date().toISOString().slice(0, 10)}.json"` - ); - return res.json(exportData); + try { + const tenantId = req.user!.tenantId; + + const [tenantResult, usersResult, devicesResult, alertsResult, auditResult, keysResult] = + await Promise.all([ + pool.query("SELECT id, name, slug, plan, email, created_at FROM tenants WHERE id = $1", [tenantId]), + pool.query("SELECT id, email, role, created_at FROM tenant_users WHERE tenant_id = $1", [tenantId]), + pool.query("SELECT id, serial_number, created_at FROM devices WHERE tenant_id = $1", [tenantId]), + pool.query("SELECT id, name, metric, operator, threshold, enabled, created_at FROM alert_rules WHERE tenant_id = $1", [tenantId]), + pool.query("SELECT id, event_type, actor_id, resource_type, payload, created_at FROM audit_events WHERE tenant_id = $1 ORDER BY created_at DESC LIMIT 1000", [tenantId]), + pool.query("SELECT id, name, created_at, expires_at, revoked_at FROM api_keys WHERE tenant_id = $1", [tenantId]), + ]); + + const exportData = { + exportedAt: new Date().toISOString(), + tenant: tenantResult.rows[0] || null, + users: usersResult.rows, + devices: devicesResult.rows, + alertRules: alertsResult.rows, + auditEvents: auditResult.rows, + apiKeys: keysResult.rows, + }; + + res.setHeader("Content-Type", "application/json"); + res.setHeader( + "Content-Disposition", + `attachment; filename="grainguard-export-${tenantId}-${new Date().toISOString().slice(0, 10)}.json"` + ); + return res.json(exportData); + } catch (err) { + console.error("[account] export error:", err); + return res.status(500).json({ error: "internal_error" }); + } } ); diff --git a/apps/gateway/src/routes/sso.ts b/apps/gateway/src/routes/sso.ts index 9eb830d..39734a4 100644 --- a/apps/gateway/src/routes/sso.ts +++ b/apps/gateway/src/routes/sso.ts @@ -43,9 +43,19 @@ ssoRouter.get( } const orgId = rows[0].auth0_org_id; - const connections = await listOrgConnections(orgId); - - return res.json({ configured: true, orgId, connections }); + try { + const connections = await listOrgConnections(orgId); + + return res.json({ configured: true, orgId, connections }); + } catch (error) { + console.error("[sso] failed to list org connections:", error); + return res.json({ + configured: true, + orgId, + connections: [], + warning: "Auth0 management API is unavailable for the gateway right now.", + }); + } } ); diff --git a/apps/gateway/src/services/device.ts b/apps/gateway/src/services/device.ts index 32248e5..1063392 100644 --- a/apps/gateway/src/services/device.ts +++ b/apps/gateway/src/services/device.ts @@ -8,10 +8,21 @@ import fs from "fs"; 📦 Load Proto ========================================= */ -const protoPath = path.resolve( - __dirname, - "../../libs/proto/device.proto" -); +const protoCandidates = [ + path.resolve(process.cwd(), "libs/proto/device.proto"), + path.resolve(__dirname, "../../libs/proto/device.proto"), + path.resolve(__dirname, "../../../libs/proto/device.proto"), + "/app/libs/proto/device.proto", + "/libs/proto/device.proto", +]; + +const protoPath = protoCandidates.find((candidate) => fs.existsSync(candidate)); + +if (!protoPath) { + throw new Error( + `device.proto not found; checked: ${protoCandidates.join(", ")}` + ); +} const packageDefinition = protoLoader.loadSync(protoPath, { keepCase: true, diff --git a/infra/docker/docker-compose.yml b/infra/docker/docker-compose.yml index 2be16a9..e31013c 100644 --- a/infra/docker/docker-compose.yml +++ b/infra/docker/docker-compose.yml @@ -557,6 +557,8 @@ services: dockerfile: apps/gateway/Dockerfile container_name: grainguard-gateway restart: unless-stopped + env_file: + - ../../.env volumes: - ../certs:/certs environment: @@ -578,17 +580,6 @@ services: WRITE_DB_NAME: grainguard WRITE_DB_USER: postgres WRITE_DB_PASSWORD: postgres - # Stripe — override via root .env - STRIPE_SECRET_KEY: "${STRIPE_SECRET_KEY:-sk_test_placeholder}" - STRIPE_WEBHOOK_SECRET: "${STRIPE_WEBHOOK_SECRET:-whsec_placeholder}" - STRIPE_PRICE_STARTER: "${STRIPE_PRICE_STARTER:-price_starter_placeholder}" - STRIPE_PRICE_PROFESSIONAL: "${STRIPE_PRICE_PROFESSIONAL:-price_pro_placeholder}" - STRIPE_PRICE_ENTERPRISE: "${STRIPE_PRICE_ENTERPRISE:-price_enterprise_placeholder}" - # Auth0 M2M — override via root .env - AUTH0_DOMAIN: "${AUTH0_DOMAIN:-dev-dz6bl3nngdeib7ro.us.auth0.com}" - AUTH0_MANAGEMENT_CLIENT_ID: "${AUTH0_MANAGEMENT_CLIENT_ID:-}" - AUTH0_MANAGEMENT_CLIENT_SECRET: "${AUTH0_MANAGEMENT_CLIENT_SECRET:-}" - AUTH0_M2M_AUDIENCE: "${AUTH0_MANAGEMENT_AUDIENCE:-https://dev-dz6bl3nngdeib7ro.us.auth0.com/api/v2/}" depends_on: redis: condition: service_started @@ -620,11 +611,12 @@ services: container_name: grainguard-grafana ports: - "3000:3000" + env_file: + - ../../.env environment: - GF_SECURITY_ADMIN_PASSWORD=admin - GF_USERS_ALLOW_SIGN_UP=false - GF_UNIFIED_ALERTING_ENABLED=true - - SLACK_WEBHOOK_URL=${SLACK_WEBHOOK_URL:-https://hooks.slack.com/services/YOUR/SLACK/WEBHOOK} volumes: - grafana-data:/var/lib/grafana - ./grafana/provisioning:/etc/grafana/provisioning @@ -764,12 +756,11 @@ services: dockerfile: apps/jobs-worker/Dockerfile container_name: grainguard-jobs-worker restart: unless-stopped + env_file: + - ../../.env environment: RABBITMQ_URL: amqp://grainguard:grainguard@rabbitmq:5672/grainguard DATABASE_URL: postgres://postgres:postgres@pgbouncer-write:5432/grainguard?sslmode=disable - # Set RESEND_API_KEY and EMAIL_FROM via root .env to enable real email delivery - RESEND_API_KEY: "${RESEND_API_KEY:-}" - EMAIL_FROM: "${EMAIL_FROM:-GrainGuard }" depends_on: rabbitmq: condition: service_healthy diff --git a/infra/terraform/environments/dev/main.tf b/infra/terraform/environments/dev/main.tf index e3ce892..489e678 100644 --- a/infra/terraform/environments/dev/main.tf +++ b/infra/terraform/environments/dev/main.tf @@ -65,7 +65,7 @@ module "iam_irsa" { environment = "dev" oidc_issuer_url = module.eks.oidc_issuer_url oidc_provider_arn = module.eks.oidc_provider_arn - k8s_namespace = "grainguard" + k8s_namespace = "grainguard-dev" secrets_read_policy_arn = module.secrets_manager.secrets_read_policy_arn dynamodb_table_arns = module.dynamodb.all_table_arns } diff --git a/infra/terraform/environments/staging/.gitignore b/infra/terraform/environments/staging/.gitignore new file mode 100644 index 0000000..1c99dc1 --- /dev/null +++ b/infra/terraform/environments/staging/.gitignore @@ -0,0 +1 @@ +.terraform/ diff --git a/infra/terraform/environments/staging/.terraform.lock.hcl b/infra/terraform/environments/staging/.terraform.lock.hcl new file mode 100644 index 0000000..8ae2b79 --- /dev/null +++ b/infra/terraform/environments/staging/.terraform.lock.hcl @@ -0,0 +1,45 @@ +# This file is maintained automatically by "terraform init". +# Manual edits may be lost in future updates. + +provider "registry.terraform.io/hashicorp/aws" { + version = "5.100.0" + constraints = "~> 5.0" + hashes = [ + "h1:Ijt7pOlB7Tr7maGQIqtsLFbl7pSMIj06TVdkoSBcYOw=", + "zh:054b8dd49f0549c9a7cc27d159e45327b7b65cf404da5e5a20da154b90b8a644", + "zh:0b97bf8d5e03d15d83cc40b0530a1f84b459354939ba6f135a0086c20ebbe6b2", + "zh:1589a2266af699cbd5d80737a0fe02e54ec9cf2ca54e7e00ac51c7359056f274", + "zh:6330766f1d85f01ae6ea90d1b214b8b74cc8c1badc4696b165b36ddd4cc15f7b", + "zh:7c8c2e30d8e55291b86fcb64bdf6c25489d538688545eb48fd74ad622e5d3862", + "zh:99b1003bd9bd32ee323544da897148f46a527f622dc3971af63ea3e251596342", + "zh:9b12af85486a96aedd8d7984b0ff811a4b42e3d88dad1a3fb4c0b580d04fa425", + "zh:9f8b909d3ec50ade83c8062290378b1ec553edef6a447c56dadc01a99f4eaa93", + "zh:aaef921ff9aabaf8b1869a86d692ebd24fbd4e12c21205034bb679b9caf883a2", + "zh:ac882313207aba00dd5a76dbd572a0ddc818bb9cbf5c9d61b28fe30efaec951e", + "zh:bb64e8aff37becab373a1a0cc1080990785304141af42ed6aa3dd4913b000421", + "zh:dfe495f6621df5540d9c92ad40b8067376350b005c637ea6efac5dc15028add4", + "zh:f0ddf0eaf052766cfe09dea8200a946519f653c384ab4336e2a4a64fdd6310e9", + "zh:f1b7e684f4c7ae1eed272b6de7d2049bb87a0275cb04dbb7cda6636f600699c9", + "zh:ff461571e3f233699bf690db319dfe46aec75e58726636a0d97dd9ac6e32fb70", + ] +} + +provider "registry.terraform.io/hashicorp/tls" { + version = "4.2.1" + constraints = "~> 4.0" + hashes = [ + "h1:akFNuHwvrtnYMBofieoeXhPJDhYZzJVu/Q/BgZK2fgg=", + "zh:0d1e7d07ac973b97fa228f46596c800de830820506ee145626f079dd6bbf8d8a", + "zh:5c7e3d4348cb4861ab812973ef493814a4b224bdd3e9d534a7c8a7c992382b86", + "zh:7c6d4a86cd7a4e9c1025c6b3a3a6a45dea202af85d870cddbab455fb1bd568ad", + "zh:7d0864755ba093664c4b2c07c045d3f5e3d7c799dda1a3ef33d17ed1ac563191", + "zh:83734f57950ab67c0d6a87babdb3f13c908cbe0a48949333f489698532e1391b", + "zh:951e3c285218ebca0cf20eaa4265020b4ef042fea9c6ade115ad1558cfe459e5", + "zh:b9543955b4297e1d93b85900854891c0e645d936d8285a190030475379c5c635", + "zh:bb1bd9e86c003d08c30c1b00d44118ed5bbbf6b1d2d6f7eaac4fa5c6ebea5933", + "zh:c9477bfe00653629cd77ddac3968475f7ad93ac3ca8bc45b56d1d9efb25e4a6e", + "zh:d4cfda8687f736d0cba664c22ec49dae1188289e214ef57f5afe6a7217854fed", + "zh:dc77ee066cf96532a48f0578c35b1eaf6dc4d8ddd0e3ae8e029a3b10676dd5d3", + "zh:f569b65999264a9416862bca5cd2a6177d94ccb0424f3a4ef424428912b9cb3c", + ] +} diff --git a/infra/terraform/environments/staging/main.tf b/infra/terraform/environments/staging/main.tf new file mode 100644 index 0000000..84434c4 --- /dev/null +++ b/infra/terraform/environments/staging/main.tf @@ -0,0 +1,79 @@ +module "vpc" { + source = "../../modules/vpc" + project = var.project + environment = "staging" + vpc_cidr = "10.10.0.0/16" + availability_zones = ["us-east-1a", "us-east-1b"] +} + +module "eks" { + source = "../../modules/eks" + project = var.project + environment = "staging" + private_subnet_ids = module.vpc.private_subnet_ids + instance_type = "t3.large" + desired_nodes = 2 +} + +module "rds" { + source = "../../modules/rds" + project = var.project + environment = "staging" + vpc_id = module.vpc.vpc_id + vpc_cidr = "10.10.0.0/16" + private_subnet_ids = module.vpc.private_subnet_ids + instance_class = "db.t3.medium" + db_password = var.db_password +} + +module "elasticache" { + source = "../../modules/elasticache" + project = var.project + environment = "staging" + vpc_id = module.vpc.vpc_id + vpc_cidr = "10.10.0.0/16" + private_subnet_ids = module.vpc.private_subnet_ids + node_type = "cache.t3.small" +} + +module "msk" { + source = "../../modules/msk" + project = var.project + environment = "staging" + vpc_id = module.vpc.vpc_id + vpc_cidr = "10.10.0.0/16" + private_subnet_ids = module.vpc.private_subnet_ids + instance_type = "kafka.t3.small" +} + +module "dynamodb" { + source = "../../modules/dynamodb" + project = var.project + environment = "staging" + create_terraform_lock_table = false +} + +module "secrets_manager" { + source = "../../modules/secrets_manager" + project = var.project + environment = "staging" +} + +module "iam_irsa" { + source = "../../modules/iam_irsa" + project = var.project + environment = "staging" + oidc_issuer_url = module.eks.oidc_issuer_url + oidc_provider_arn = module.eks.oidc_provider_arn + k8s_namespace = "grainguard-staging" + secrets_read_policy_arn = module.secrets_manager.secrets_read_policy_arn + dynamodb_table_arns = module.dynamodb.all_table_arns +} + +module "ecr" { + source = "../../modules/ecr" + project = var.project + environment = "staging" + eks_node_role_arn = module.eks.node_role_arn + ci_role_arn = module.iam_irsa.ci_push_role_arn +} diff --git a/infra/terraform/environments/staging/outputs.tf b/infra/terraform/environments/staging/outputs.tf new file mode 100644 index 0000000..d8a7aa0 --- /dev/null +++ b/infra/terraform/environments/staging/outputs.tf @@ -0,0 +1,17 @@ +output "eks_cluster_name" { value = module.eks.cluster_name } +output "eks_cluster_endpoint" { value = module.eks.cluster_endpoint } +output "rds_endpoint" { value = module.rds.endpoint } +output "redis_endpoint" { value = module.elasticache.primary_endpoint } +output "kafka_brokers_tls" { value = module.msk.bootstrap_brokers_tls } +output "kafka_brokers_sasl" { value = module.msk.bootstrap_brokers_sasl } +output "ecr_repository_urls" { value = module.ecr.repository_urls } +output "service_role_arns" { value = module.iam_irsa.service_role_arns } +output "secret_names" { value = module.secrets_manager.secret_names } +output "dynamodb_tables" { + value = { + feature_flags = module.dynamodb.feature_flags_table_name + idempotency_keys = module.dynamodb.idempotency_keys_table_name + rate_counters = module.dynamodb.rate_counters_table_name + webhook_retry_state = module.dynamodb.webhook_retry_state_table_name + } +} diff --git a/infra/terraform/environments/staging/providers.tf b/infra/terraform/environments/staging/providers.tf new file mode 100644 index 0000000..169532d --- /dev/null +++ b/infra/terraform/environments/staging/providers.tf @@ -0,0 +1,25 @@ +terraform { + required_version = ">= 1.5.0" + required_providers { + aws = { + source = "hashicorp/aws" + version = "~> 5.0" + } + tls = { + source = "hashicorp/tls" + version = "~> 4.0" + } + } + + backend "s3" { + bucket = "grainguard-terraform-state" + key = "environments/staging/terraform.tfstate" + region = "us-east-1" + dynamodb_table = "grainguard-terraform-locks" + encrypt = true + } +} + +provider "aws" { + region = var.aws_region +} diff --git a/infra/terraform/environments/staging/variables.tf b/infra/terraform/environments/staging/variables.tf new file mode 100644 index 0000000..3844676 --- /dev/null +++ b/infra/terraform/environments/staging/variables.tf @@ -0,0 +1,14 @@ +variable "project" { + type = string + default = "grainguard" +} + +variable "aws_region" { + type = string + default = "us-east-1" +} + +variable "db_password" { + type = string + sensitive = true +} diff --git a/k8s/argocd/apps/grainguard-staging.yaml b/k8s/argocd/apps/grainguard-staging.yaml new file mode 100644 index 0000000..5d19211 --- /dev/null +++ b/k8s/argocd/apps/grainguard-staging.yaml @@ -0,0 +1,36 @@ +apiVersion: argoproj.io/v1alpha1 +kind: Application +metadata: + name: grainguard-staging + namespace: argocd + finalizers: + - resources-finalizer.argocd.argoproj.io + labels: + environment: staging +spec: + project: grainguard + source: + repoURL: https://github.com/pahuldeepp/GrainGuard-.git + targetRevision: HEAD + path: k8s/helm/grainguard + helm: + valueFiles: + - values.yaml + - values-staging.yaml + destination: + server: https://kubernetes.default.svc + namespace: grainguard-staging + syncPolicy: + automated: + prune: true + selfHeal: true + syncOptions: + - CreateNamespace=true + - ServerSideApply=true + retry: + limit: 4 + backoff: + duration: 8s + factor: 2 + maxDuration: 2m + revisionHistoryLimit: 7 diff --git a/k8s/argocd/project.yaml b/k8s/argocd/project.yaml index 2a2e66c..35d8243 100644 --- a/k8s/argocd/project.yaml +++ b/k8s/argocd/project.yaml @@ -10,6 +10,8 @@ spec: destinations: - namespace: grainguard-dev server: https://kubernetes.default.svc + - namespace: grainguard-staging + server: https://kubernetes.default.svc - namespace: grainguard-prod server: https://kubernetes.default.svc - namespace: argocd @@ -31,3 +33,8 @@ spec: policies: - p, proj:grainguard:prod-deployer, applications, sync, grainguard/grainguard-prod, allow - p, proj:grainguard:prod-deployer, applications, get, grainguard/grainguard-prod, allow + - name: staging-deployer + description: Can sync staging for pre-production validation + policies: + - p, proj:grainguard:staging-deployer, applications, sync, grainguard/grainguard-staging, allow + - p, proj:grainguard:staging-deployer, applications, get, grainguard/grainguard-staging, allow diff --git a/k8s/helm/grainguard/values-dev.yaml b/k8s/helm/grainguard/values-dev.yaml index 73acce5..b3183d3 100644 --- a/k8s/helm/grainguard/values-dev.yaml +++ b/k8s/helm/grainguard/values-dev.yaml @@ -1,3 +1,5 @@ +namespace: grainguard-dev + # Dev environment overrides — lower replicas, no autoscaling image: tag: latest diff --git a/k8s/helm/grainguard/values-prod.yaml b/k8s/helm/grainguard/values-prod.yaml index b6ad562..23817dd 100644 --- a/k8s/helm/grainguard/values-prod.yaml +++ b/k8s/helm/grainguard/values-prod.yaml @@ -1,3 +1,5 @@ +namespace: grainguard-prod + # Prod environment overrides — higher replicas, aggressive autoscaling image: pullPolicy: Always diff --git a/k8s/helm/grainguard/values-staging.yaml b/k8s/helm/grainguard/values-staging.yaml new file mode 100644 index 0000000..237592a --- /dev/null +++ b/k8s/helm/grainguard/values-staging.yaml @@ -0,0 +1,87 @@ +namespace: grainguard-staging + +# Staging environment overrides — production-like topology with slightly +# reduced capacity so we can validate rollouts, ingress, and restores safely. +image: + pullPolicy: Always + +services: + gateway: + replicaCount: 2 + autoscaling: + enabled: true + minReplicas: 2 + maxReplicas: 8 + + bff: + replicaCount: 2 + autoscaling: + enabled: true + minReplicas: 2 + maxReplicas: 6 + + telemetry-service: + replicaCount: 2 + autoscaling: + enabled: true + minReplicas: 2 + maxReplicas: 8 + targetCPUUtilizationPercentage: 55 + + saga-orchestrator: + replicaCount: 1 + autoscaling: + enabled: true + minReplicas: 1 + maxReplicas: 4 + + read-model-builder: + replicaCount: 2 + autoscaling: + enabled: true + minReplicas: 2 + maxReplicas: 6 + + asset-registry: + replicaCount: 1 + autoscaling: + enabled: true + minReplicas: 1 + maxReplicas: 4 + + risk-engine: + replicaCount: 2 + autoscaling: + enabled: true + minReplicas: 2 + maxReplicas: 6 + targetCPUUtilizationPercentage: 55 + + search-indexer: + replicaCount: 1 + autoscaling: + enabled: true + minReplicas: 1 + maxReplicas: 4 + + workflow-alerts: + replicaCount: 1 + autoscaling: + enabled: true + minReplicas: 1 + maxReplicas: 4 + + jobs-worker: + replicaCount: 1 + autoscaling: + enabled: true + minReplicas: 1 + maxReplicas: 4 + + cassandra-writer: + replicaCount: 2 + autoscaling: + enabled: true + minReplicas: 2 + maxReplicas: 6 + targetCPUUtilizationPercentage: 55 diff --git a/scripts/load-tests/graphql-stress.js b/scripts/load-tests/graphql-stress.js new file mode 100644 index 0000000..cd1f34a --- /dev/null +++ b/scripts/load-tests/graphql-stress.js @@ -0,0 +1,59 @@ +import http from "k6/http"; +import { check, sleep } from "k6"; +import { Rate, Trend } from "k6/metrics"; + +const errorRate = new Rate("errors"); +const gatewayLatency = new Trend("gateway_graphql_latency"); + +export const options = { + stages: [ + { duration: "15s", target: 10 }, + { duration: "30s", target: 30 }, + { duration: "20s", target: 60 }, + { duration: "15s", target: 0 }, + ], + thresholds: { + http_req_failed: ["rate<0.05"], + checks: ["rate>0.95"], + gateway_graphql_latency: ["p(95)<300"], + }, +}; + +const GATEWAY_URL = __ENV.GATEWAY_URL || "http://localhost:8086"; +const GRAPHQL_BODY = JSON.stringify({ + query: + "query { devices(limit: 20) { deviceId serialNumber temperature humidity version } }", +}); +const HEADERS = { "Content-Type": "application/json" }; + +export default function () { + const response = http.post(`${GATEWAY_URL}/graphql`, GRAPHQL_BODY, { + headers: HEADERS, + }); + + gatewayLatency.add(response.timings.duration); + const ok = check(response, { + "gateway graphql 200": (r) => r.status === 200, + "gateway graphql devices": (r) => { + try { + const devices = JSON.parse(r.body).data.devices; + return Array.isArray(devices); + } catch { + return false; + } + }, + }); + + errorRate.add(!ok); + sleep(0.2); +} + +export function handleSummary(data) { + return { + "scripts/load-tests/results/graphql-stress-summary.json": JSON.stringify( + data, + null, + 2 + ), + }; +} diff --git a/scripts/load-tests/ingest-stress.js b/scripts/load-tests/ingest-stress.js new file mode 100644 index 0000000..1b1cc0b --- /dev/null +++ b/scripts/load-tests/ingest-stress.js @@ -0,0 +1,79 @@ +import http from "k6/http"; +import { check, sleep } from "k6"; +import { Rate, Trend } from "k6/metrics"; + +const errorRate = new Rate("errors"); +const ingestLatency = new Trend("ingest_latency"); + +export const options = { + stages: [ + { duration: "15s", target: 20 }, + { duration: "25s", target: 60 }, + { duration: "20s", target: 120 }, + { duration: "15s", target: 0 }, + ], + thresholds: { + http_req_failed: ["rate<0.05"], + checks: ["rate>0.95"], + ingest_latency: ["p(95)<500"], + }, +}; + +const INGEST_URL = __ENV.INGEST_URL || "http://localhost:3001"; +const API_KEY = __ENV.INGEST_API_KEY || ""; +const DEVICE_IDS = (__ENV.DEVICE_IDS || "") + .split(",") + .map((value) => value.trim()) + .filter(Boolean); + +if (!API_KEY) { + throw new Error("INGEST_API_KEY is required"); +} + +if (DEVICE_IDS.length === 0) { + throw new Error("DEVICE_IDS must contain at least one UUID"); +} + +function buildPayload(deviceId) { + return JSON.stringify({ + serialNumber: deviceId, + temperature: 20 + Math.random() * 15, + humidity: 35 + Math.random() * 35, + timestamp: new Date().toISOString(), + }); +} + +export default function () { + const deviceId = DEVICE_IDS[__ITER % DEVICE_IDS.length]; + const response = http.post(`${INGEST_URL}/ingest`, buildPayload(deviceId), { + headers: { + "Content-Type": "application/json", + "X-Api-Key": API_KEY, + }, + }); + + ingestLatency.add(response.timings.duration); + const ok = check(response, { + "ingest accepted": (r) => r.status === 202, + "ingest acknowledged": (r) => { + try { + return JSON.parse(r.body).accepted === true; + } catch { + return false; + } + }, + }); + + errorRate.add(!ok); + sleep(0.1); +} + +export function handleSummary(data) { + return { + "scripts/load-tests/results/ingest-stress-summary.json": JSON.stringify( + data, + null, + 2 + ), + }; +} diff --git a/scripts/load-tests/mixed-stack-stress.js b/scripts/load-tests/mixed-stack-stress.js new file mode 100644 index 0000000..9219945 --- /dev/null +++ b/scripts/load-tests/mixed-stack-stress.js @@ -0,0 +1,127 @@ +import http from "k6/http"; +import { check, sleep } from "k6"; +import { Rate, Trend } from "k6/metrics"; + +const errorRate = new Rate("errors"); +const gatewayLatency = new Trend("gateway_graphql_latency"); +const ingestLatency = new Trend("ingest_latency"); + +export const options = { + scenarios: { + graphql_readers: { + executor: "ramping-vus", + exec: "graphqlReader", + startVUs: 0, + stages: [ + { duration: "15s", target: 10 }, + { duration: "25s", target: 30 }, + { duration: "20s", target: 50 }, + { duration: "15s", target: 0 }, + ], + }, + ingest_writers: { + executor: "ramping-vus", + exec: "ingestWriter", + startVUs: 0, + stages: [ + { duration: "15s", target: 20 }, + { duration: "25s", target: 50 }, + { duration: "20s", target: 80 }, + { duration: "15s", target: 0 }, + ], + startTime: "5s", + }, + }, + thresholds: { + http_req_failed: ["rate<0.08"], + checks: ["rate>0.92"], + gateway_graphql_latency: ["p(95)<400"], + ingest_latency: ["p(95)<600"], + }, +}; + +const GATEWAY_URL = __ENV.GATEWAY_URL || "http://localhost:8086"; +const INGEST_URL = __ENV.INGEST_URL || "http://localhost:3001"; +const API_KEY = __ENV.INGEST_API_KEY || ""; +const DEVICE_IDS = (__ENV.DEVICE_IDS || "") + .split(",") + .map((value) => value.trim()) + .filter(Boolean); +const GRAPHQL_BODY = JSON.stringify({ + query: + "query { devices(limit: 20) { deviceId serialNumber temperature humidity version } }", +}); + +if (!API_KEY) { + throw new Error("INGEST_API_KEY is required"); +} + +if (DEVICE_IDS.length === 0) { + throw new Error("DEVICE_IDS must contain at least one UUID"); +} + +function buildPayload(deviceId) { + return JSON.stringify({ + serialNumber: deviceId, + temperature: 20 + Math.random() * 15, + humidity: 35 + Math.random() * 35, + timestamp: new Date().toISOString(), + }); +} + +export function graphqlReader() { + const response = http.post(`${GATEWAY_URL}/graphql`, GRAPHQL_BODY, { + headers: { "Content-Type": "application/json" }, + }); + + gatewayLatency.add(response.timings.duration); + const ok = check(response, { + "gateway graphql 200": (r) => r.status === 200, + "gateway graphql devices": (r) => { + try { + const devices = JSON.parse(r.body).data.devices; + return Array.isArray(devices); + } catch { + return false; + } + }, + }); + + errorRate.add(!ok); + sleep(0.2); +} + +export function ingestWriter() { + const deviceId = DEVICE_IDS[__ITER % DEVICE_IDS.length]; + const response = http.post(`${INGEST_URL}/ingest`, buildPayload(deviceId), { + headers: { + "Content-Type": "application/json", + "X-Api-Key": API_KEY, + }, + }); + + ingestLatency.add(response.timings.duration); + const ok = check(response, { + "ingest accepted": (r) => r.status === 202, + "ingest acknowledged": (r) => { + try { + return JSON.parse(r.body).accepted === true; + } catch { + return false; + } + }, + }); + + errorRate.add(!ok); + sleep(0.1); +} + +export function handleSummary(data) { + return { + "scripts/load-tests/results/mixed-stack-stress-summary.json": JSON.stringify( + data, + null, + 2 + ), + }; +} From 3c14d5c4babb79f37b3e10c38880793b212dc450 Mon Sep 17 00:00:00 2001 From: Pahuldeep Singh Date: Sun, 29 Mar 2026 14:47:36 -0500 Subject: [PATCH 02/19] fix(dashboard): satisfy settings export lint --- apps/dashboard/src/features/settings/SettingsPage.tsx | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/apps/dashboard/src/features/settings/SettingsPage.tsx b/apps/dashboard/src/features/settings/SettingsPage.tsx index cfeb216..b5d7da5 100644 --- a/apps/dashboard/src/features/settings/SettingsPage.tsx +++ b/apps/dashboard/src/features/settings/SettingsPage.tsx @@ -114,7 +114,7 @@ export function SettingsPage() { const a = document.createElement("a"); a.href = url; const disposition = res.headers.get("content-disposition") ?? ""; - const filenameMatch = disposition.match(/filename=\"?([^"]+)\"?/i); + const filenameMatch = disposition.match(/filename="?([^"]+)"?/i); a.download = filenameMatch?.[1] ?? `grainguard-export-${new Date().toISOString().slice(0, 10)}.json`; a.click(); URL.revokeObjectURL(url); From bd8768f64417a5dca6c4f9ae8da22b681df2e73b Mon Sep 17 00:00:00 2001 From: Pahuldeep Singh Date: Sun, 29 Mar 2026 14:55:30 -0500 Subject: [PATCH 03/19] fix(perf): use k6-compatible catch syntax --- scripts/load-tests/performance-budget.js | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/scripts/load-tests/performance-budget.js b/scripts/load-tests/performance-budget.js index f7aeedf..eade28e 100644 --- a/scripts/load-tests/performance-budget.js +++ b/scripts/load-tests/performance-budget.js @@ -82,7 +82,7 @@ function hasGraphqlErrors(response) { try { const errors = response.json("errors"); return Array.isArray(errors) && errors.length > 0; - } catch { + } catch (error) { return true; } } From 4b00e4bd51467459f28b5640dbbfc928039b0eb0 Mon Sep 17 00:00:00 2001 From: Pahuldeep Singh Date: Sun, 29 Mar 2026 14:57:54 -0500 Subject: [PATCH 04/19] fix(security): make account route rate limits explicit --- apps/gateway/src/routes/account.ts | 2 ++ 1 file changed, 2 insertions(+) diff --git a/apps/gateway/src/routes/account.ts b/apps/gateway/src/routes/account.ts index df24e19..46cf713 100644 --- a/apps/gateway/src/routes/account.ts +++ b/apps/gateway/src/routes/account.ts @@ -46,6 +46,7 @@ accountRouter.get( // If the user is the last admin, the entire tenant is deleted. accountRouter.delete( "/account/me", + apiRateLimiter, authMiddleware, async (req: Request, res: Response) => { const tenantId = req.user!.tenantId; @@ -131,6 +132,7 @@ accountRouter.delete( // GDPR Article 20 — Right to Data Portability. Returns all user data as JSON. accountRouter.get( "/account/export", + apiRateLimiter, authMiddleware, async (req: Request, res: Response) => { try { From df1c8c508c128e89cc07e1fcf43434c09e92d049 Mon Sep 17 00:00:00 2001 From: Pahuldeep Singh Date: Sun, 29 Mar 2026 15:34:55 -0500 Subject: [PATCH 05/19] fix(staging): unblock e2e and perf validation --- .github/workflows/perf.yml | 3 +- .../src/features/billing/BillingPage.tsx | 45 +++++++------ .../components/RegisterDeviceModal.tsx | 20 ++++-- .../providers/Auth0ProviderWithNavigate.tsx | 48 +++++++++++++- apps/gateway/src/server.ts | 4 +- infra/terraform/modules/msk/main.tf | 6 +- scripts/load-tests/performance-budget.js | 27 ++++++-- tests/e2e/auth.spec.ts | 6 +- tests/e2e/billing.spec.ts | 6 +- tests/e2e/devices.spec.ts | 15 ++--- tests/e2e/fixtures/mockAuth.ts | 64 +++++++++++++++---- 11 files changed, 180 insertions(+), 64 deletions(-) diff --git a/.github/workflows/perf.yml b/.github/workflows/perf.yml index a7b0e9c..d114405 100644 --- a/.github/workflows/perf.yml +++ b/.github/workflows/perf.yml @@ -69,6 +69,8 @@ jobs: working-directory: apps/gateway env: PORT: 3000 + BFF_HOST: localhost + BFF_PORT: "4000" NODE_ENV: test AUTH_ENABLED: "false" DATABASE_URL: postgres://grainguard:grainguard@localhost:5432/grainguard @@ -168,7 +170,6 @@ jobs: --env BFF_URL=http://localhost:4000 \ --env GATEWAY_AUTH_DISABLED=true \ --env BFF_AUTH_DISABLED=true \ - --env JWT=dummy-jwt \ --env TEST_DEVICE_ID=00000000-0000-0000-0000-000000000001 \ scripts/load-tests/performance-budget.js diff --git a/apps/dashboard/src/features/billing/BillingPage.tsx b/apps/dashboard/src/features/billing/BillingPage.tsx index 38c6602..98f5ecf 100644 --- a/apps/dashboard/src/features/billing/BillingPage.tsx +++ b/apps/dashboard/src/features/billing/BillingPage.tsx @@ -236,25 +236,32 @@ export function BillingPage() { ))} - + {plan.key === "enterprise" && !isCurrent ? ( + + Contact Sales + + ) : ( + + )} ); })} diff --git a/apps/dashboard/src/features/devices/components/RegisterDeviceModal.tsx b/apps/dashboard/src/features/devices/components/RegisterDeviceModal.tsx index aadddf6..5cb976e 100644 --- a/apps/dashboard/src/features/devices/components/RegisterDeviceModal.tsx +++ b/apps/dashboard/src/features/devices/components/RegisterDeviceModal.tsx @@ -33,6 +33,14 @@ function RegisterDeviceModalContent({ onClose, onRegistered }: Omit window.clearTimeout(focusTimer); }, [reset]); + useEffect(() => { + const onWindowKeyDown = (event: KeyboardEvent) => { + if (event.key === "Escape") onClose(); + }; + window.addEventListener("keydown", onWindowKeyDown); + return () => window.removeEventListener("keydown", onWindowKeyDown); + }, [onClose]); + const validate = (value: string): string | null => { if (!value.trim()) return "Serial number is required"; if (!SERIAL_REGEX.test(value.trim())) @@ -100,8 +108,9 @@ function RegisterDeviceModalContent({ onClose, onRegistered }: Omit { - setSerial(e.target.value); - setValidationError(null); + const nextSerial = e.target.value.toUpperCase(); + setSerial(nextSerial); + setValidationError(nextSerial.trim() ? validate(nextSerial) : null); }} placeholder="e.g. GG-SILO-001" className="w-full px-3 py-2 border rounded-lg text-sm bg-white dark:bg-gray-800 text-gray-900 dark:text-white placeholder-gray-400 focus:outline-none focus:ring-2 focus:ring-green-500 border-gray-300 dark:border-gray-700" @@ -110,7 +119,10 @@ function RegisterDeviceModalContent({ onClose, onRegistered }: Omit {(validationError || error) && ( -

+

{validationError || error}

)} @@ -130,7 +142,7 @@ function RegisterDeviceModalContent({ onClose, onRegistered }: Omit