pahuldeepp · pahuldeepp · Mar 22, 2026 · Mar 22, 2026 · Mar 22, 2026 · Mar 22, 2026
diff --git a/.github/workflows/cd.yml b/.github/workflows/cd.yml
@@ -59,7 +59,7 @@ jobs:
       - name: Set up Go
         uses: actions/setup-go@v5
         with:
-          go-version: "1.24"
+          go-version: "1.25"
           cache: true
 
       - name: Build & Vet (Go)
@@ -95,4 +95,4 @@ jobs:
           tags: ${{ steps.meta.outputs.tags }}
           labels: ${{ steps.meta.outputs.labels }}
           cache-from: type=gha
-          cache-to: type=gha,mode=max
+          cache-to: type=gha,mode=max
diff --git a/.github/workflows/chaos.yml b/.github/workflows/chaos.yml
@@ -0,0 +1,136 @@
+name: Chaos Tests
+
+on:
+  workflow_dispatch:
+    inputs:
+      experiment:
+        description: 'Experiment to run'
+        required: true
+        default: all
+        type: choice
+        options:
+          - all
+          - pod-kill
+          - kafka-consumer-pause
+          - redis-outage
+          - projection-lag
+          - network-partition
+      namespace:
+        description: 'Target namespace'
+        required: true
+        default: grainguard-dev
+  schedule:
+    # Run full suite every Saturday at 02:00 UTC (off-peak)
+    - cron: '0 2 * * 6'
+
+env:
+  NAMESPACE: ${{ github.event.inputs.namespace || 'grainguard-dev' }}
+
+jobs:
+  chaos:
+    name: Chaos — ${{ github.event.inputs.experiment || 'all' }}
+    runs-on: ubuntu-latest
+    timeout-minutes: 30
+
+    steps:
+      - name: Checkout
+        uses: actions/checkout@v4
+
+      - name: Validate namespace allowlist
+        run: |
+          case "$NAMESPACE" in
+            grainguard-dev)
+              ;;
+            *)
+              echo "Unsupported namespace: $NAMESPACE"
+              exit 1
+              ;;
+          esac
+
+      - name: Configure kubectl
+        uses: azure/setup-kubectl@v3
+        with:
+          version: 'v1.29.0'
+
+      - name: Set kubeconfig
+        run: |
+          mkdir -p "$HOME/.kube"
+          echo "${{ secrets.KUBECONFIG_DEV }}" | base64 -d > "$HOME/.kube/config"
+          chmod 600 "$HOME/.kube/config"
+
+      - name: Install Chaos Toolkit
+        run: |
+          pip install --quiet \
+            chaostoolkit==1.19.0 \
+            chaostoolkit-kubernetes==0.26.4 \
+            chaostoolkit-verification==0.3.0
+
+      - name: Make scripts executable
+        run: chmod +x tests/chaos/*.sh
+
+      - name: Run — all experiments
+        if: ${{ github.event.inputs.experiment == 'all' || github.event_name == 'schedule' }}
+        env:
+          NAMESPACE: ${{ env.NAMESPACE }}
+          KAFKA_BOOTSTRAP: kafka:9092
+          GATEWAY_URL: ${{ secrets.CHAOS_GATEWAY_URL }}
+          PROMETHEUS_URL: ${{ secrets.CHAOS_PROMETHEUS_URL }}
+          TEST_JWT: ${{ secrets.CHAOS_TEST_JWT }}
+        run: bash tests/chaos/run-all.sh
+
+      - name: Run — pod-kill
+        if: ${{ github.event.inputs.experiment == 'pod-kill' }}
+        env:
+          NAMESPACE: ${{ env.NAMESPACE }}
+        run: chaos run tests/chaos/pod-kill.yaml
+
+      - name: Run — kafka-consumer-pause
+        if: ${{ github.event.inputs.experiment == 'kafka-consumer-pause' }}
+        env:
+          NAMESPACE: ${{ env.NAMESPACE }}
+          KAFKA_BOOTSTRAP: kafka:9092
+        run: bash tests/chaos/kafka-consumer-pause.sh
+
+      - name: Run — redis-outage
+        if: ${{ github.event.inputs.experiment == 'redis-outage' }}
+        env:
+          NAMESPACE: ${{ env.NAMESPACE }}
+          GATEWAY_URL: ${{ secrets.CHAOS_GATEWAY_URL }}
+          TEST_JWT: ${{ secrets.CHAOS_TEST_JWT }}
+        run: bash tests/chaos/redis-outage.sh
+
+      - name: Run — projection-lag
+        if: ${{ github.event.inputs.experiment == 'projection-lag' }}
+        env:
+          NAMESPACE: ${{ env.NAMESPACE }}
+          KAFKA_BOOTSTRAP: kafka:9092
+          PROMETHEUS_URL: ${{ secrets.CHAOS_PROMETHEUS_URL }}
+          STRICT_ALERT_CHECK: "1"
+        run: bash tests/chaos/projection-lag.sh
+
+      - name: Run — network-partition
+        if: ${{ github.event.inputs.experiment == 'network-partition' }}
+        env:
+          NAMESPACE: ${{ env.NAMESPACE }}
+        run: chaos run tests/chaos/network-partition.yaml
+
+      - name: Upload chaos logs
+        if: always()
+        uses: actions/upload-artifact@v4
+        with:
+          name: chaos-results-${{ github.run_number }}
+          path: tests/chaos/results/
+          retention-days: 30
+          if-no-files-found: ignore
+
+      - name: Notify Slack on failure
+        if: failure()
+        uses: slackapi/slack-github-action@v1.26.0
+        with:
+          payload: |
+            {
+              "text": ":fire: Chaos experiment *${{ github.event.inputs.experiment || 'all' }}* FAILED on `${{ env.NAMESPACE }}` — <${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }}|View run>"
+            }
+        env:
+          SLACK_WEBHOOK_URL: ${{ secrets.SLACK_CHAOS_WEBHOOK }}
+          SLACK_WEBHOOK_TYPE: INCOMING_WEBHOOK
diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
@@ -20,13 +20,13 @@ jobs:
 
       - uses: actions/setup-go@v5
         with:
-          go-version: "1.24"
+          go-version: "1.25"
           cache: true
 
       - name: golangci-lint
         uses: golangci/golangci-lint-action@v6
         with:
-          version: v1.62
+          version: v2.11
           args: --timeout=5m
 
   go-test:
@@ -37,7 +37,7 @@ jobs:
 
       - uses: actions/setup-go@v5
         with:
-          go-version: "1.24"
+          go-version: "1.25"
           cache: true
 
       - name: Download deps
@@ -79,7 +79,12 @@ jobs:
         working-directory: apps/${{ matrix.app }}
 
       - name: ESLint
-        run: npm run lint
+        run: |
+          if npm run | grep -qE '^[[:space:]]+lint'; then
+            npm run lint
+          else
+            echo "No lint script for ${{ matrix.app }}; skipping ESLint step"
+          fi
         working-directory: apps/${{ matrix.app }}
 
       - name: Typecheck

diff --git a/.github/workflows/e2e.yml b/.github/workflows/e2e.yml
@@ -0,0 +1,77 @@
+name: E2E Tests
+
+on:
+  workflow_dispatch:
+  pull_request:
+    branches: [master]
+
+jobs:
+  e2e:
+    name: Playwright E2E
+    runs-on: ubuntu-latest
+    timeout-minutes: 20
+
+    steps:
+      - uses: actions/checkout@v4
+
+      - name: Set up Node.js
+        uses: actions/setup-node@v4
+        with:
+          node-version: "20"
+          cache: npm
+          cache-dependency-path: apps/dashboard/package-lock.json
+
+      - name: Install dashboard deps
+        run: npm ci
+        working-directory: apps/dashboard
+
+      - name: Install E2E deps
+        run: npm install --no-save @playwright/test typescript ts-node
+        working-directory: tests/e2e
-      - name: Install E2E deps
-        run: npm install --save-dev @playwright/test typescript ts-node
-        working-directory: tests/e2e
+      - name: Install E2E deps
+        run: npm install `@playwright/test` typescript ts-node
+        working-directory: tests/e2e
-      - name: Install E2E deps
-        run: npm install --save-dev @playwright/test typescript ts-node
-        working-directory: tests/e2e
+      - name: Install E2E deps
+        run: npm install `@playwright/test` typescript ts-node
+        working-directory: tests/e2e
+
+      - name: Install Playwright browsers
+        run: npx playwright install --with-deps chromium firefox
+        working-directory: tests/e2e
+
+      - name: Build dashboard
+        run: npm run build
+        working-directory: apps/dashboard
+        env:
+          VITE_E2E_MOCK_AUTH: "true"
+          VITE_AUTH0_DOMAIN: e2e.auth0.local
+          VITE_AUTH0_CLIENT_ID: e2e-client-id
+          VITE_AUTH0_AUDIENCE: https://api.grainguard.test
+          VITE_BFF_URL: http://localhost:5173/graphql
+          VITE_GATEWAY_URL: http://localhost:5173
+
+      - name: Serve dashboard
+        run: npx serve -s dist -l 5173 &
+        working-directory: apps/dashboard
+
+      - name: Wait for server
+        run: npx wait-on http://localhost:5173 --timeout 30000
+
+      - name: Run Playwright tests
+        run: npx playwright test --config playwright.config.ts
+        working-directory: tests/e2e
+        env:
+          E2E_BASE_URL: http://localhost:5173
+          VITE_AUTH0_CLIENT_ID: e2e-client-id
+          VITE_AUTH0_AUDIENCE: https://api.grainguard.test
+
+      - name: Upload Playwright report
+        uses: actions/upload-artifact@v4
+        if: always()
+        with:
+          name: playwright-report-${{ github.run_number }}
+          path: tests/e2e/playwright-report/
+          retention-days: 14
+
+      - name: Upload test results (JUnit)
+        uses: actions/upload-artifact@v4
+        if: always()
+        with:
+          name: playwright-results-${{ github.run_number }}
+          path: tests/e2e/playwright-results.xml
-      - name: Upload test results (JUnit)
-        uses: actions/upload-artifact@v4
-        if: always()
-        with:
-          name: playwright-results-${{ github.run_number }}
-          path: tests/e2e/playwright-results.xml
+      - name: Upload test results (JUnit)
+        uses: actions/upload-artifact@v4
+        if: always()
+        with:
+          name: playwright-results-${{ github.run_number }}
+          path: tests/e2e/playwright-results.xml
+          retention-days: 14
+          if-no-files-found: ignore
-      - name: Upload test results (JUnit)
-        uses: actions/upload-artifact@v4
-        if: always()
-        with:
-          name: playwright-results-${{ github.run_number }}
-          path: tests/e2e/playwright-results.xml
+      - name: Upload test results (JUnit)
+        uses: actions/upload-artifact@v4
+        if: always()
+        with:
+          name: playwright-results-${{ github.run_number }}
+          path: tests/e2e/playwright-results.xml
+          retention-days: 14
+          if-no-files-found: ignore
+          retention-days: 14
+          if-no-files-found: ignore