pahuldeepp · pahuldeepp · Mar 22, 2026 · Mar 22, 2026 · Mar 22, 2026 · Mar 22, 2026
diff --git a/.github/workflows/cd.yml b/.github/workflows/cd.yml
@@ -59,7 +59,7 @@ jobs:
       - name: Set up Go
         uses: actions/setup-go@v5
         with:
-          go-version: "1.24"
+          go-version: "1.25"
           cache: true
 
       - name: Build & Vet (Go)
@@ -95,4 +95,4 @@ jobs:
           tags: ${{ steps.meta.outputs.tags }}
           labels: ${{ steps.meta.outputs.labels }}
           cache-from: type=gha
-          cache-to: type=gha,mode=max
+          cache-to: type=gha,mode=max
diff --git a/.github/workflows/chaos.yml b/.github/workflows/chaos.yml
@@ -0,0 +1,125 @@
+name: Chaos Tests
+
+on:
+  workflow_dispatch:
+    inputs:
+      experiment:
+        description: 'Experiment to run'
+        required: true
+        default: all
+        type: choice
+        options:
+          - all
+          - pod-kill
+          - kafka-consumer-pause
+          - redis-outage
+          - projection-lag
+          - network-partition
+      namespace:
+        description: 'Target namespace'
+        required: true
+        default: grainguard-dev
+  schedule:
+    # Run full suite every Saturday at 02:00 UTC (off-peak)
+    - cron: '0 2 * * 6'
+
+env:
+  NAMESPACE: ${{ github.event.inputs.namespace || 'grainguard-dev' }}
+
+jobs:
+  chaos:
+    name: Chaos — ${{ github.event.inputs.experiment || 'all' }}
+    runs-on: ubuntu-latest
+    timeout-minutes: 30
+
+    steps:
+      - name: Checkout
+        uses: actions/checkout@v4
+
+      - name: Configure kubectl
+        uses: azure/setup-kubectl@v3
+        with:
+          version: 'v1.29.0'
+
+      - name: Set kubeconfig
+        run: |
+          mkdir -p "$HOME/.kube"
+          echo "${{ secrets.KUBECONFIG_DEV }}" | base64 -d > "$HOME/.kube/config"
+          chmod 600 "$HOME/.kube/config"
+
+      - name: Install Chaos Toolkit
+        run: |
+          pip install --quiet \
+            chaostoolkit==1.19.0 \
+            chaostoolkit-kubernetes==0.26.4 \
+            chaostoolkit-verification==0.3.0
+
+      - name: Make scripts executable
+        run: chmod +x tests/chaos/*.sh
+
+      - name: Run — all experiments
+        if: ${{ github.event.inputs.experiment == 'all' || github.event_name == 'schedule' }}
+        env:
+          NAMESPACE: ${{ env.NAMESPACE }}
+          KAFKA_BOOTSTRAP: kafka:9092
+          GATEWAY_URL: ${{ secrets.CHAOS_GATEWAY_URL }}
+          PROMETHEUS_URL: ${{ secrets.CHAOS_PROMETHEUS_URL }}
+          TEST_JWT: ${{ secrets.CHAOS_TEST_JWT }}
+        run: bash tests/chaos/run-all.sh
+
+      - name: Run — pod-kill
+        if: ${{ github.event.inputs.experiment == 'pod-kill' }}
+        env:
+          NAMESPACE: ${{ env.NAMESPACE }}
+        run: chaos run tests/chaos/pod-kill.yaml
+
+      - name: Run — kafka-consumer-pause
+        if: ${{ github.event.inputs.experiment == 'kafka-consumer-pause' }}
+        env:
+          NAMESPACE: ${{ env.NAMESPACE }}
+          KAFKA_BOOTSTRAP: kafka:9092
+        run: bash tests/chaos/kafka-consumer-pause.sh
+
+      - name: Run — redis-outage
+        if: ${{ github.event.inputs.experiment == 'redis-outage' }}
+        env:
+          NAMESPACE: ${{ env.NAMESPACE }}
+          GATEWAY_URL: ${{ secrets.CHAOS_GATEWAY_URL }}
+          TEST_JWT: ${{ secrets.CHAOS_TEST_JWT }}
+        run: bash tests/chaos/redis-outage.sh
+
+      - name: Run — projection-lag
+        if: ${{ github.event.inputs.experiment == 'projection-lag' }}
+        env:
+          NAMESPACE: ${{ env.NAMESPACE }}
+          KAFKA_BOOTSTRAP: kafka:9092
+          PROMETHEUS_URL: ${{ secrets.CHAOS_PROMETHEUS_URL }}
+          STRICT_ALERT_CHECK: "1"
+        run: bash tests/chaos/projection-lag.sh
+
+      - name: Run — network-partition
+        if: ${{ github.event.inputs.experiment == 'network-partition' }}
+        env:
+          NAMESPACE: ${{ env.NAMESPACE }}
+        run: chaos run tests/chaos/network-partition.yaml
+
+      - name: Upload chaos logs
+        if: always()
+        uses: actions/upload-artifact@v4
+        with:
+          name: chaos-results-${{ github.run_number }}
+          path: tests/chaos/results/
+          retention-days: 30
+          if-no-files-found: ignore
+
+      - name: Notify Slack on failure
+        if: failure()
+        uses: slackapi/slack-github-action@v1.26.0
+        with:
+          payload: |
+            {
+              "text": ":fire: Chaos experiment *${{ github.event.inputs.experiment || 'all' }}* FAILED on `${{ env.NAMESPACE }}` — <${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }}|View run>"
+            }
+        env:
+          SLACK_WEBHOOK_URL: ${{ secrets.SLACK_CHAOS_WEBHOOK }}
+          SLACK_WEBHOOK_TYPE: INCOMING_WEBHOOK
diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
@@ -20,13 +20,13 @@ jobs:
 
       - uses: actions/setup-go@v5
         with:
-          go-version: "1.24"
+          go-version: "1.25"
           cache: true
 
       - name: golangci-lint
         uses: golangci/golangci-lint-action@v6
         with:
-          version: v1.62
+          version: v1.64.8
           args: --timeout=5m
 
   go-test:
@@ -37,7 +37,7 @@ jobs:
 
       - uses: actions/setup-go@v5
         with:
-          go-version: "1.24"
+          go-version: "1.25"
           cache: true
 
       - name: Download deps
@@ -79,7 +79,12 @@ jobs:
         working-directory: apps/${{ matrix.app }}
 
       - name: ESLint
-        run: npm run lint
+        run: |
+          if npm run | grep -qE '^[[:space:]]+lint'; then
+            npm run lint
+          else
+            echo "No lint script for ${{ matrix.app }}; skipping ESLint step"
+          fi
         working-directory: apps/${{ matrix.app }}
 
       - name: Typecheck

diff --git a/.github/workflows/e2e.yml b/.github/workflows/e2e.yml
@@ -0,0 +1,74 @@
+name: E2E Tests
+
+on:
+  workflow_dispatch:
+  pull_request:
+    branches: [master]
+
+jobs:
+  e2e:
+    name: Playwright E2E
+    runs-on: ubuntu-latest
+    timeout-minutes: 20
+
+    steps:
+      - uses: actions/checkout@v4
+
+      - name: Set up Node.js
+        uses: actions/setup-node@v4
+        with:
+          node-version: "20"
+          cache: npm
+          cache-dependency-path: apps/dashboard/package-lock.json
+
+      - name: Install dashboard deps
+        run: npm ci
+        working-directory: apps/dashboard
+
+      - name: Install E2E deps
+        run: npm install --save-dev @playwright/test typescript ts-node
+        working-directory: tests/e2e
-      - name: Install E2E deps
-        run: npm install --save-dev @playwright/test typescript ts-node
-        working-directory: tests/e2e
+      - name: Install E2E deps
+        run: npm install `@playwright/test` typescript ts-node
+        working-directory: tests/e2e
-      - name: Install E2E deps
-        run: npm install --save-dev @playwright/test typescript ts-node
-        working-directory: tests/e2e
+      - name: Install E2E deps
+        run: npm install `@playwright/test` typescript ts-node
+        working-directory: tests/e2e
+
+      - name: Install Playwright browsers
+        run: npx playwright install --with-deps chromium firefox
+        working-directory: tests/e2e
+
+      - name: Build dashboard
+        run: npm run build
+        working-directory: apps/dashboard
+        env:
+          VITE_AUTH0_DOMAIN:    ${{ secrets.VITE_AUTH0_DOMAIN }}
+          VITE_AUTH0_CLIENT_ID: ${{ secrets.VITE_AUTH0_CLIENT_ID }}
+          VITE_AUTH0_AUDIENCE:  ${{ secrets.VITE_AUTH0_AUDIENCE }}
+          VITE_BFF_URL:         ${{ secrets.E2E_BFF_URL }}
+          VITE_GATEWAY_URL:     ${{ secrets.E2E_GATEWAY_URL }}
+
+      - name: Serve dashboard
+        run: npx serve -s dist -l 5173 &
+        working-directory: apps/dashboard
+
+      - name: Wait for server
+        run: npx wait-on http://localhost:5173 --timeout 30000
+
+      - name: Run Playwright tests
+        run: npx playwright test --config playwright.config.ts
+        working-directory: tests/e2e
+        env:
+          E2E_BASE_URL:         http://localhost:5173
+          VITE_AUTH0_CLIENT_ID: ${{ secrets.VITE_AUTH0_CLIENT_ID }}
+          VITE_AUTH0_AUDIENCE:  ${{ secrets.VITE_AUTH0_AUDIENCE }}
+
+      - name: Upload Playwright report
+        uses: actions/upload-artifact@v4
+        if: always()
+        with:
+          name: playwright-report-${{ github.run_number }}
+          path: tests/e2e/playwright-report/
+          retention-days: 14
+
+      - name: Upload test results (JUnit)
+        uses: actions/upload-artifact@v4
+        if: always()
+        with:
+          name: playwright-results-${{ github.run_number }}
+          path: tests/e2e/playwright-results.xml
-      - name: Upload test results (JUnit)
-        uses: actions/upload-artifact@v4
-        if: always()
-        with:
-          name: playwright-results-${{ github.run_number }}
-          path: tests/e2e/playwright-results.xml
+      - name: Upload test results (JUnit)
+        uses: actions/upload-artifact@v4
+        if: always()
+        with:
+          name: playwright-results-${{ github.run_number }}
+          path: tests/e2e/playwright-results.xml
+          retention-days: 14
+          if-no-files-found: ignore
-      - name: Upload test results (JUnit)
-        uses: actions/upload-artifact@v4
-        if: always()
-        with:
-          name: playwright-results-${{ github.run_number }}
-          path: tests/e2e/playwright-results.xml
+      - name: Upload test results (JUnit)
+        uses: actions/upload-artifact@v4
+        if: always()
+        with:
+          name: playwright-results-${{ github.run_number }}
+          path: tests/e2e/playwright-results.xml
+          retention-days: 14
+          if-no-files-found: ignore
diff --git a/.github/workflows/perf.yml b/.github/workflows/perf.yml
@@ -0,0 +1,139 @@
+name: Performance Budget
+
+on:
+  pull_request:
+    branches: [master]
+    paths:
+      - "apps/gateway/**"
+      - "apps/bff/**"
+      - "scripts/load-tests/**"
+
+jobs:
+  perf:
+    name: k6 Performance Budget
+    runs-on: ubuntu-latest
+    timeout-minutes: 15
+
+    services:
+      # Spin up the gateway and BFF as Docker Compose services
+      # so k6 can hit them without needing a live cluster
+      postgres:
+        image: postgres:16-alpine
+        ports:
+          - 5432:5432
+        env:
+          POSTGRES_USER:     grainguard
+          POSTGRES_PASSWORD: grainguard
+          POSTGRES_DB:       grainguard
+        options: >-
+          --health-cmd pg_isready
+          --health-interval 10s
+          --health-timeout 5s
+          --health-retries 5
+
+      redis:
+        image: redis:7-alpine
+        ports:
+          - 6379:6379
+        options: >-
+          --health-cmd "redis-cli ping"
+          --health-interval 10s
+          --health-retries 5
+
+    steps:
+      - uses: actions/checkout@v4
+
+      - name: Set up Node.js
+        uses: actions/setup-node@v4
+        with:
+          node-version: "20"
+          cache: npm
+          cache-dependency-path: apps/gateway/package-lock.json
-      - name: Set up Node.js
-        uses: actions/setup-node@v4
-        with:
-          node-version: "20"
-          cache: npm
-          cache-dependency-path: apps/gateway/package-lock.json
+      - name: Set up Node.js
+        uses: actions/setup-node@v4
+        with:
+          node-version: "20"
+          cache: npm
+          cache-dependency-path: |
+            apps/gateway/package-lock.json
+            apps/bff/package-lock.json
-      - name: Set up Node.js
-        uses: actions/setup-node@v4
-        with:
-          node-version: "20"
-          cache: npm
-          cache-dependency-path: apps/gateway/package-lock.json
+      - name: Set up Node.js
+        uses: actions/setup-node@v4
+        with:
+          node-version: "20"
+          cache: npm
+          cache-dependency-path: |
+            apps/gateway/package-lock.json
+            apps/bff/package-lock.json
+
+      - name: Install gateway deps
+        run: npm ci
+        working-directory: apps/gateway
+
+      - name: Install BFF deps
+        run: npm ci
+        working-directory: apps/bff
+
+      - name: Start gateway in background
+        run: npx ts-node src/server.ts &
+        working-directory: apps/gateway
+        env:
+          PORT:             3000
+          NODE_ENV:         development
+          AUTH_ENABLED:     "false"
+          DATABASE_URL:     postgres://grainguard:grainguard@localhost:5432/grainguard
+          REDIS_URL:        redis://localhost:6379
-        env:
-          PORT:             3000
-          NODE_ENV:         development
-          AUTH_ENABLED:     "false"
-          DATABASE_URL:     postgres://grainguard:grainguard@localhost:5432/grainguard
-          REDIS_URL:        redis://localhost:6379
+        env:
+          PORT:             3000
+          NODE_ENV:         development
+          AUTH_ENABLED:     "false"
+          READ_DATABASE_URL: postgres://grainguard:grainguard@localhost:5432/grainguard
+          WRITE_DATABASE_URL: postgres://grainguard:grainguard@localhost:5432/grainguard?sslmode=disable
+          REDIS_HOST: localhost
+          REDIS_PORT: 6379
-        env:
-          PORT:             3000
-          NODE_ENV:         development
-          AUTH_ENABLED:     "false"
-          DATABASE_URL:     postgres://grainguard:grainguard@localhost:5432/grainguard
-          REDIS_URL:        redis://localhost:6379
+        env:
+          PORT:             3000
+          NODE_ENV:         development
+          AUTH_ENABLED:     "false"
+          READ_DATABASE_URL: postgres://grainguard:grainguard@localhost:5432/grainguard
+          WRITE_DATABASE_URL: postgres://grainguard:grainguard@localhost:5432/grainguard?sslmode=disable
+          REDIS_HOST: localhost
+          REDIS_PORT: 6379
+          JWKS_URL:         ${{ secrets.PERF_JWKS_URL }}
+          JWT_ISSUER:       ${{ secrets.PERF_JWT_ISSUER }}
+          JWT_AUDIENCE:     ${{ secrets.PERF_JWT_AUDIENCE }}
+          ALLOWED_ORIGINS:  http://localhost:5173
+          STRIPE_SECRET_KEY:   sk_test_placeholder
+          STRIPE_WEBHOOK_SECRET: whsec_placeholder
+          STRIPE_PRICE_STARTER:       price_placeholder
+          STRIPE_PRICE_PROFESSIONAL:  price_placeholder
+          STRIPE_PRICE_ENTERPRISE:    price_placeholder
+          DASHBOARD_URL:    http://localhost:5173
+          AUTH0_DOMAIN:         placeholder.auth0.com
+          AUTH0_MANAGEMENT_CLIENT_ID: placeholder
+          AUTH0_MANAGEMENT_CLIENT_SECRET: placeholder
+
+      - name: Start BFF in background
+        run: npx ts-node src/server.ts &
+        working-directory: apps/bff
+        env:
+          PORT: 4000
+          NODE_ENV: development
+          AUTH_ENABLED: "false"
+          POSTGRES_HOST: localhost
+          POSTGRES_PORT: 5432
+          POSTGRES_USER: grainguard
+          POSTGRES_PASSWORD: grainguard
+          POSTGRES_DB: grainguard
-          POSTGRES_HOST: localhost
-          POSTGRES_PORT: 5432
-          POSTGRES_USER: grainguard
-          POSTGRES_PASSWORD: grainguard
-          POSTGRES_DB: grainguard
+          READ_DATABASE_URL: postgres://grainguard:grainguard@localhost:5432/grainguard
-          POSTGRES_HOST: localhost
-          POSTGRES_PORT: 5432
-          POSTGRES_USER: grainguard
-          POSTGRES_PASSWORD: grainguard
-          POSTGRES_DB: grainguard
+          READ_DATABASE_URL: postgres://grainguard:grainguard@localhost:5432/grainguard
+          REDIS_HOST: localhost
+          REDIS_PORT: 6379
+          ELASTICSEARCH_URL: http://localhost:9200
+          CASSANDRA_HOST: localhost
+          CASSANDRA_PORT: 9042
+          AUTH0_DOMAIN: placeholder.auth0.com
+          AUTH0_AUDIENCE: placeholder
+          AUTH0_ORG_CLAIM: org_id
+          JWKS_URL: https://example.invalid/.well-known/jwks.json
+          JWT_ISSUER: https://example.invalid/
+          JWT_AUDIENCE: placeholder
+          ALLOWED_ORIGINS: http://localhost:5173
+          JWT_SECRET: dev-secret
+
+      - name: Wait for gateway
+        run: npx wait-on tcp:3000 --timeout 30000
+
+      - name: Wait for BFF
+        run: npx wait-on tcp:4000 --timeout 30000
-      - name: Wait for gateway
-        continue-on-error: true
-        run: npx wait-on tcp:3000 --timeout 30000
-
-      - name: Wait for BFF
-        continue-on-error: true
-        run: npx wait-on tcp:4000 --timeout 30000
+      - name: Wait for gateway
+        run: npx wait-on tcp:3000 --timeout 30000
+
+      - name: Wait for BFF
+        run: npx wait-on tcp:4000 --timeout 30000
-      - name: Wait for gateway
-        continue-on-error: true
-        run: npx wait-on tcp:3000 --timeout 30000
-
-      - name: Wait for BFF
-        continue-on-error: true
-        run: npx wait-on tcp:4000 --timeout 30000
+      - name: Wait for gateway
+        run: npx wait-on tcp:3000 --timeout 30000
+
+      - name: Wait for BFF
+        run: npx wait-on tcp:4000 --timeout 30000
+
+      - name: Install k6
+        run: |
+          curl -L https://github.com/grafana/k6/releases/download/v0.51.0/k6-v0.51.0-linux-amd64.tar.gz | tar xz
+          sudo mv k6-v0.51.0-linux-amd64/k6 /usr/local/bin/k6
+
+      - name: Run performance budget
+        continue-on-error: true
+        run: |
-      - name: Run performance budget
-        continue-on-error: true
-        run: |
+      - name: Run performance budget
+        run: |
-      - name: Run performance budget
-        continue-on-error: true
-        run: |
+      - name: Run performance budget
+        run: |
+          mkdir -p scripts/load-tests/results
+          k6 run \
+            --env GATEWAY_URL=http://localhost:3000 \
+            --env BFF_URL=http://localhost:4000 \
+            --env JWT=dev-ci-token \
+            scripts/load-tests/performance-budget.js
+        # k6 exits 99 if thresholds are breached — non-blocking until infra is stable
+
+      - name: Upload performance results
+        uses: actions/upload-artifact@v4
+        if: always()
+        with:
+          name: perf-results-${{ github.run_number }}
+          path: scripts/load-tests/results/
+          retention-days: 30
+          if-no-files-found: ignore