diff --git a/.github/workflows/e2e-flows.yml b/.github/workflows/e2e-flows.yml
new file mode 100644
index 0000000000..e3c2509b7c
--- /dev/null
+++ b/.github/workflows/e2e-flows.yml
@@ -0,0 +1,231 @@
+name: E2E Flow Tests
+
+# This workflow runs comprehensive E2E tests with real email accounts.
+# It's designed to run in a private fork where secrets are configured,
+# but the workflow file lives in the public repo for version control.
+#
+# To enable: Set the repository variable E2E_FLOWS_ENABLED=true
+# To disable: Remove the variable or set it to anything other than "true"
+
+on:
+  # Run on schedule (every 12 hours)
+  schedule:
+    - cron: "0 */12 * * *"
+
+  # Allow manual trigger with branch selection
+  workflow_dispatch:
+    inputs:
+      branch:
+        description: "Branch to test"
+        required: false
+        default: "main"
+      test_file:
+        description: "Specific test file (optional, e.g., full-reply-cycle)"
+        required: false
+        default: ""
+
+# Prevent concurrent runs to avoid test account conflicts
+concurrency:
+  group: e2e-flows
+  cancel-in-progress: false
+
+jobs:
+  check-enabled:
+    runs-on: ubuntu-latest
+    permissions:
+      contents: read
+    outputs:
+      enabled: ${{ steps.check.outputs.enabled }}
+    steps:
+      - name: Check if E2E flows are enabled
+        id: check
+        run: |
+          if [ "${{ vars.E2E_FLOWS_ENABLED }}" = "true" ]; then
+            echo "enabled=true" >> $GITHUB_OUTPUT
+            echo "E2E flow tests are ENABLED"
+          else
+            echo "enabled=false" >> $GITHUB_OUTPUT
+            echo "E2E flow tests are DISABLED (set E2E_FLOWS_ENABLED=true to enable)"
+          fi
+
+  e2e-flows:
+    needs: check-enabled
+    if: needs.check-enabled.outputs.enabled == 'true'
+    runs-on: ubuntu-latest
+    timeout-minutes: 30
+    permissions:
+      contents: read
+
+    steps:
+      - name: Checkout repository
+        uses: actions/checkout@v4
+        with:
+          ref: ${{ github.event.inputs.branch || github.ref }}
+
+      - name: Setup Node.js
+        uses: actions/setup-node@v4
+        with:
+          node-version: "22"
+
+      - name: Install pnpm
+        uses: pnpm/action-setup@v2
+        with:
+          version: 8
+
+      - name: Get pnpm store directory
+        shell: bash
+        run: |
+          echo "STORE_PATH=$(pnpm store path --silent)" >> $GITHUB_ENV
+
+      - name: Setup pnpm cache
+        uses: actions/cache@v4
+        with:
+          path: ~/.pnpm-store
+          key: ${{ runner.os }}-pnpm-store-${{ hashFiles('**/pnpm-lock.yaml') }}
+          restore-keys: |
+            ${{ runner.os }}-pnpm-store-
+
+      - name: Install dependencies
+        run: pnpm install
+
+      - name: Install ngrok
+        run: |
+          curl -s https://ngrok-agent.s3.amazonaws.com/ngrok.asc | sudo tee /etc/apt/trusted.gpg.d/ngrok.asc >/dev/null
+          echo "deb https://ngrok-agent.s3.amazonaws.com buster main" | sudo tee /etc/apt/sources.list.d/ngrok.list
+          sudo apt-get update && sudo apt-get install ngrok
+
+      - name: Configure ngrok
+        run: ngrok config add-authtoken ${{ secrets.E2E_NGROK_AUTH_TOKEN }}
+
+      - name: Build app
+        run: pnpm -F inbox-zero-ai build
+        env:
+          DATABASE_URL: ${{ secrets.DATABASE_URL }}
+          SKIP_ENV_VALIDATION: "true"
+
+      - name: Start app server
+        run: |
+          cd apps/web
+          pnpm start &
+          # Wait for server to be ready
+          echo "Waiting for app server to start..."
+          SERVER_READY=false
+          for i in {1..30}; do
+            if curl -sf http://localhost:3000 > /dev/null 2>&1; then
+              echo "App server is ready"
+              SERVER_READY=true
+              break
+            fi
+            sleep 2
+          done
+          if [ "$SERVER_READY" != "true" ]; then
+            echo "ERROR: App server failed to start within 60 seconds"
+            exit 1
+          fi
+        env:
+          DATABASE_URL: ${{ secrets.DATABASE_URL }}
+          UPSTASH_REDIS_REST_URL: ${{ secrets.UPSTASH_REDIS_REST_URL }}
+          UPSTASH_REDIS_REST_TOKEN: ${{ secrets.UPSTASH_REDIS_REST_TOKEN }}
+          GOOGLE_CLIENT_ID: ${{ secrets.GOOGLE_CLIENT_ID }}
+          GOOGLE_CLIENT_SECRET: ${{ secrets.GOOGLE_CLIENT_SECRET }}
+          GOOGLE_PUBSUB_TOPIC_NAME: ${{ secrets.GOOGLE_PUBSUB_TOPIC_NAME }}
+          GOOGLE_PUBSUB_VERIFICATION_TOKEN: ${{ secrets.GOOGLE_PUBSUB_VERIFICATION_TOKEN }}
+          MICROSOFT_CLIENT_ID: ${{ secrets.MICROSOFT_CLIENT_ID }}
+          MICROSOFT_CLIENT_SECRET: ${{ secrets.MICROSOFT_CLIENT_SECRET }}
+          MICROSOFT_WEBHOOK_CLIENT_STATE: ${{ secrets.MICROSOFT_WEBHOOK_CLIENT_STATE }}
+          # AI provider secrets - configure whichever provider you use
+          DEFAULT_LLM_PROVIDER: ${{ secrets.DEFAULT_LLM_PROVIDER }}
+          OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }}
+          ANTHROPIC_API_KEY: ${{ secrets.ANTHROPIC_API_KEY }}
+          GOOGLE_API_KEY: ${{ secrets.GOOGLE_API_KEY }}
+          OPENROUTER_API_KEY: ${{ secrets.OPENROUTER_API_KEY }}
+          AUTH_SECRET: ${{ secrets.AUTH_SECRET }}
+          EMAIL_ENCRYPT_SECRET: ${{ secrets.EMAIL_ENCRYPT_SECRET }}
+          EMAIL_ENCRYPT_SALT: ${{ secrets.EMAIL_ENCRYPT_SALT }}
+          INTERNAL_API_KEY: ${{ secrets.INTERNAL_API_KEY }}
+
+      - name: Start ngrok tunnel
+        run: |
+          ngrok http 3000 --log=stdout > ngrok.log 2>&1 &
+          sleep 5
+          # Extract the public URL
+          NGROK_URL=$(curl -s http://localhost:4040/api/tunnels | jq -r '.tunnels[0].public_url')
+          echo "NGROK_URL=$NGROK_URL" >> $GITHUB_ENV
+          echo "Tunnel URL: $NGROK_URL"
+
+      - name: Run E2E Flow Tests
+        run: |
+          if [ -n "${{ github.event.inputs.test_file }}" ]; then
+            pnpm -F inbox-zero-ai test-e2e:flows ${{ github.event.inputs.test_file }}
+          else
+            pnpm -F inbox-zero-ai test-e2e:flows
+          fi
+        env:
+          NEXT_PUBLIC_BASE_URL: ${{ env.NGROK_URL }}
+          # Test control
+          RUN_E2E_FLOW_TESTS: "true"
+          E2E_RUN_ID: ${{ github.run_id }}-${{ github.run_attempt }}
+
+          # E2E-specific: Test account emails
+          E2E_GMAIL_EMAIL: ${{ secrets.E2E_GMAIL_EMAIL }}
+          E2E_OUTLOOK_EMAIL: ${{ secrets.E2E_OUTLOOK_EMAIL }}
+
+          # Standard app secrets (reused from existing config)
+          DATABASE_URL: ${{ secrets.DATABASE_URL }}
+          UPSTASH_REDIS_REST_URL: ${{ secrets.UPSTASH_REDIS_REST_URL }}
+          UPSTASH_REDIS_REST_TOKEN: ${{ secrets.UPSTASH_REDIS_REST_TOKEN }}
+          GOOGLE_CLIENT_ID: ${{ secrets.GOOGLE_CLIENT_ID }}
+          GOOGLE_CLIENT_SECRET: ${{ secrets.GOOGLE_CLIENT_SECRET }}
+          GOOGLE_PUBSUB_TOPIC_NAME: ${{ secrets.GOOGLE_PUBSUB_TOPIC_NAME }}
+          GOOGLE_PUBSUB_VERIFICATION_TOKEN: ${{ secrets.GOOGLE_PUBSUB_VERIFICATION_TOKEN }}
+          MICROSOFT_CLIENT_ID: ${{ secrets.MICROSOFT_CLIENT_ID }}
+          MICROSOFT_CLIENT_SECRET: ${{ secrets.MICROSOFT_CLIENT_SECRET }}
+          MICROSOFT_WEBHOOK_CLIENT_STATE: ${{ secrets.MICROSOFT_WEBHOOK_CLIENT_STATE }}
+          # AI provider secrets - configure whichever provider you use
+          DEFAULT_LLM_PROVIDER: ${{ secrets.DEFAULT_LLM_PROVIDER }}
+          OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }}
+          ANTHROPIC_API_KEY: ${{ secrets.ANTHROPIC_API_KEY }}
+          GOOGLE_API_KEY: ${{ secrets.GOOGLE_API_KEY }}
+          OPENROUTER_API_KEY: ${{ secrets.OPENROUTER_API_KEY }}
+          AUTH_SECRET: ${{ secrets.AUTH_SECRET }}
+          EMAIL_ENCRYPT_SECRET: ${{ secrets.EMAIL_ENCRYPT_SECRET }}
+          EMAIL_ENCRYPT_SALT: ${{ secrets.EMAIL_ENCRYPT_SALT }}
+          INTERNAL_API_KEY: ${{ secrets.INTERNAL_API_KEY }}
+
+      - name: Upload test logs on failure
+        if: failure()
+        uses: actions/upload-artifact@v4
+        with:
+          name: e2e-flow-logs-${{ github.run_id }}
+          path: |
+            apps/web/__tests__/e2e/flows/*.log
+            ngrok.log
+          retention-days: 7
+
+  notify-disabled:
+    needs: check-enabled
+    if: needs.check-enabled.outputs.enabled != 'true'
+    runs-on: ubuntu-latest
+    permissions:
+      contents: read
+    steps:
+      - name: E2E flows disabled notice
+        run: |
+          echo "::notice::E2E flow tests are disabled. To enable, set the repository variable E2E_FLOWS_ENABLED=true"
+          echo ""
+          echo "Required secrets for E2E flow tests:"
+          echo ""
+          echo "E2E-specific secrets:"
+          echo "  - E2E_GMAIL_EMAIL: Gmail test account email"
+          echo "  - E2E_OUTLOOK_EMAIL: Outlook test account email"
+          echo "  - E2E_NGROK_AUTH_TOKEN: ngrok auth token for tunnel"
+          echo ""
+          echo "Standard app secrets (same as production):"
+          echo "  - DATABASE_URL, AUTH_SECRET, INTERNAL_API_KEY"
+          echo "  - EMAIL_ENCRYPT_SECRET, EMAIL_ENCRYPT_SALT"
+          echo "  - UPSTASH_REDIS_REST_URL, UPSTASH_REDIS_REST_TOKEN"
+          echo "  - GOOGLE_CLIENT_ID, GOOGLE_CLIENT_SECRET"
+          echo "  - GOOGLE_PUBSUB_TOPIC_NAME, GOOGLE_PUBSUB_VERIFICATION_TOKEN"
+          echo "  - MICROSOFT_CLIENT_ID, MICROSOFT_CLIENT_SECRET, MICROSOFT_WEBHOOK_CLIENT_STATE"
+          echo "  - AI provider secrets (one of: OPENAI_API_KEY, ANTHROPIC_API_KEY, GOOGLE_API_KEY, OPENROUTER_API_KEY)"
+          echo "  - DEFAULT_LLM_PROVIDER (optional, defaults to openai)"
diff --git a/apps/web/__tests__/e2e/flows/README.md b/apps/web/__tests__/e2e/flows/README.md
new file mode 100644
index 0000000000..a71331daf0
--- /dev/null
+++ b/apps/web/__tests__/e2e/flows/README.md
@@ -0,0 +1,184 @@
+# E2E Flow Tests
+
+End-to-end tests that verify complete email processing flows with real accounts, webhooks, and AI processing.
+
+## Overview
+
+These flow tests verify multi-step scenarios:
+
+- **Full Reply Cycle**: Gmail → Outlook → Rule Processing → Draft → Send → Reply Received
+- **Auto-Labeling**: Email classification and label application
+- **Outbound Tracking**: Sent message handling and reply tracking
+- **Draft Cleanup**: AI draft deletion when user sends manual reply
+
+## Setup
+
+### 1. Test Accounts
+
+You need two email accounts connected to your test database:
+
+1. **Gmail account** - Connected via OAuth with valid refresh token
+2. **Outlook account** - Connected via OAuth with valid refresh token
+
+The test setup automatically verifies premium status and creates default rules if missing.
+
+### 2. Required Secrets (GitHub Actions)
+
+Configure these secrets in your repository:
+
+**E2E-specific secrets:**
+
+| Secret | Description |
+|--------|-------------|
+| `E2E_GMAIL_EMAIL` | Gmail test account email |
+| `E2E_OUTLOOK_EMAIL` | Outlook test account email |
+| `E2E_NGROK_AUTH_TOKEN` | ngrok auth token for tunnel |
+
+**Standard app secrets** (same as production - see [environment-variables.md](/docs/hosting/environment-variables.md)):
+
+- `DATABASE_URL`, `AUTH_SECRET`, `INTERNAL_API_KEY`
+- `EMAIL_ENCRYPT_SECRET`, `EMAIL_ENCRYPT_SALT`
+- `UPSTASH_REDIS_REST_URL`, `UPSTASH_REDIS_REST_TOKEN`
+- `GOOGLE_CLIENT_ID`, `GOOGLE_CLIENT_SECRET`
+- `GOOGLE_PUBSUB_TOPIC_NAME`, `GOOGLE_PUBSUB_VERIFICATION_TOKEN`
+- `MICROSOFT_CLIENT_ID`, `MICROSOFT_CLIENT_SECRET`, `MICROSOFT_WEBHOOK_CLIENT_STATE`
+- AI provider secrets (one of: `OPENAI_API_KEY`, `ANTHROPIC_API_KEY`, `GOOGLE_API_KEY`, `OPENROUTER_API_KEY`, etc.)
+
+Also set the repository variable `E2E_FLOWS_ENABLED=true` to enable the workflow.
+
+### 3. Local Development
+
+For local testing, set the equivalent environment variables and run:
+
+```bash
+RUN_E2E_FLOW_TESTS=true pnpm test-e2e:flows
+```
+
+## Running Tests
+
+```bash
+# Run all flow tests
+pnpm test-e2e:flows
+
+# Run specific test file
+pnpm test-e2e:flows full-reply-cycle
+
+# Run with verbose logging
+E2E_VERBOSE=true pnpm test-e2e:flows
+```
+
+## Test Structure
+
+```text
+flows/
+├── config.ts              # Configuration and environment
+├── setup.ts               # Global test setup (account verification, premium check)
+├── teardown.ts            # Global test teardown
+├── helpers/
+│   ├── accounts.ts        # Test account loading
+│   ├── polling.ts         # Wait for state changes
+│   ├── email.ts           # Send/receive helpers
+│   ├── webhook.ts         # Webhook subscription management
+│   └── logging.ts         # Debug logging
+├── full-reply-cycle.test.ts
+├── auto-labeling.test.ts
+├── outbound-tracking.test.ts
+└── draft-cleanup.test.ts
+```
+
+## Test Scenarios
+
+### Full Reply Cycle
+
+1. Gmail sends email to Outlook
+2. Outlook receives via webhook
+3. Rule matches and creates draft
+4. User sends the draft
+5. Gmail receives the reply
+6. Outbound handling cleans up
+
+### Auto-Labeling
+
+- Emails needing reply → labeled + draft created
+- FYI emails → labeled, no draft
+- Thank you emails → appropriate handling
+
+### Outbound Tracking
+
+- SENT folder webhook triggers
+- Reply tracking updates
+- No duplicate rule execution
+
+### Draft Cleanup
+
+- Draft deleted when user sends manual reply
+- DraftSendLog properly recorded
+- Multiple drafts in thread cleaned up
+
+## Debugging
+
+### Logs
+
+Tests output detailed logs with the run ID:
+
+```text
+[E2E-abc123] Step 1: Sending email from Gmail to Outlook
+[E2E-abc123] Email sent { messageId: "...", threadId: "..." }
+[E2E-abc123] Step 2: Waiting for Outlook to receive email
+```
+
+### Verbose Mode
+
+```bash
+E2E_VERBOSE=true pnpm test-e2e:flows
+```
+
+## Timeouts
+
+| Operation | Timeout |
+|-----------|---------|
+| Email delivery | 90s |
+| Webhook processing | 60s |
+| Full test cycle | 300s |
+| Polling interval | 3s |
+
+## Local Setup Guide
+
+### Quick Start
+
+```bash
+# 1. Run setup with a named config (won't overwrite your existing .env)
+npm run setup -- --name e2e
+
+# 2. Run database migrations with the E2E env
+cd apps/web
+pnpm prisma:migrate:e2e
+
+# 3. Start the dev server with E2E config
+pnpm dev:e2e
+
+# 4. OAuth your test accounts at http://localhost:3000
+#    - Sign in with your Gmail test account
+#    - Sign out and sign in with your Outlook test account
+
+# 5. Add test account emails to apps/web/.env.e2e:
+#    E2E_GMAIL_EMAIL="your-test@gmail.com"
+#    E2E_OUTLOOK_EMAIL="your-test@outlook.com"
+
+# 6. Run the tests (loads .env.e2e automatically)
+pnpm test-e2e:flows
+```
+
+## Troubleshooting
+
+### "No account found"
+
+Test accounts aren't in the database. Run `pnpm dev:e2e`, visit http://localhost:3000, and sign in with each account.
+
+### Token expired
+
+OAuth tokens may expire. Run `pnpm dev:e2e` and sign in again at http://localhost:3000.
+
+### Draft not created
+
+Check AI API key is configured. Rules are created automatically by the test setup.
diff --git a/apps/web/__tests__/e2e/flows/auto-labeling.test.ts b/apps/web/__tests__/e2e/flows/auto-labeling.test.ts
new file mode 100644
index 0000000000..cf8d4ae3dd
--- /dev/null
+++ b/apps/web/__tests__/e2e/flows/auto-labeling.test.ts
@@ -0,0 +1,297 @@
+/**
+ * E2E Flow Test: Auto-Labeling
+ *
+ * Tests that emails are correctly classified and labeled:
+ * - Emails needing reply get appropriate labels
+ * - FYI/informational emails don't trigger drafts
+ * - Labels are actually applied in the email provider
+ *
+ * Usage:
+ * RUN_E2E_FLOW_TESTS=true pnpm test-e2e auto-labeling
+ */
+
+import { describe, test, expect, beforeAll, afterEach } from "vitest";
+import { shouldRunFlowTests, TIMEOUTS, getTestSubjectPrefix } from "./config";
+import { initializeFlowTests, setupFlowTest } from "./setup";
+import { generateTestSummary } from "./teardown";
+import { sendTestEmail, TEST_EMAIL_SCENARIOS } from "./helpers/email";
+import { waitForExecutedRule, waitForMessageInInbox } from "./helpers/polling";
+import { logStep, clearLogs } from "./helpers/logging";
+import type { TestAccount } from "./helpers/accounts";
+
+describe.skipIf(!shouldRunFlowTests())("Auto-Labeling", () => {
+  let gmail: TestAccount;
+  let outlook: TestAccount;
+  let testStartTime: number;
+
+  beforeAll(async () => {
+    await initializeFlowTests();
+    const accounts = await setupFlowTest();
+    gmail = accounts.gmail;
+    outlook = accounts.outlook;
+  }, TIMEOUTS.TEST_DEFAULT);
+
+  afterEach(async () => {
+    generateTestSummary("Auto-Labeling", testStartTime);
+    clearLogs();
+  });
+
+  test(
+    "should label email that needs reply and create draft",
+    async () => {
+      testStartTime = Date.now();
+      const scenario = TEST_EMAIL_SCENARIOS.NEEDS_REPLY;
+
+      // ========================================
+      // Send email that clearly needs a reply
+      // ========================================
+      logStep("Sending email that needs reply");
+
+      await sendTestEmail({
+        from: gmail,
+        to: outlook,
+        subject: scenario.subject,
+        body: scenario.body,
+      });
+
+      // Wait for Outlook to receive
+      const outlookMessage = await waitForMessageInInbox({
+        provider: outlook.emailProvider,
+        subjectContains: getTestSubjectPrefix(),
+        timeout: TIMEOUTS.EMAIL_DELIVERY,
+      });
+
+      // ========================================
+      // Wait for rule execution
+      // ========================================
+      logStep("Waiting for rule execution");
+
+      const executedRule = await waitForExecutedRule({
+        messageId: outlookMessage.messageId,
+        emailAccountId: outlook.id,
+        timeout: TIMEOUTS.WEBHOOK_PROCESSING,
+      });
+
+      expect(executedRule).toBeDefined();
+      expect(executedRule.status).toBe("APPLIED");
+
+      // ========================================
+      // Verify draft was created (needs reply = should draft)
+      // ========================================
+      logStep("Verifying draft action");
+
+      const draftAction = executedRule.actionItems.find(
+        (a) => a.type === "DRAFT_EMAIL",
+      );
+
+      // For a "needs reply" email, we expect a draft to be created
+      expect(draftAction).toBeDefined();
+      expect(draftAction?.draftId).toBeTruthy();
+
+      logStep("Draft created for needs-reply email", {
+        draftId: draftAction?.draftId,
+      });
+
+      // ========================================
+      // Verify labels in email provider
+      // ========================================
+      logStep("Verifying labels in provider");
+
+      const message = await outlook.emailProvider.getMessage(
+        outlookMessage.messageId,
+      );
+
+      logStep("Message labels", { labels: message.labelIds });
+
+      // The message should have some label applied (specific label depends on rules)
+      // At minimum, we verify the message was processed
+      expect(executedRule.actionItems.length).toBeGreaterThan(0);
+    },
+    TIMEOUTS.TEST_DEFAULT,
+  );
+
+  test(
+    "should label FYI email without creating draft",
+    async () => {
+      testStartTime = Date.now();
+      const scenario = TEST_EMAIL_SCENARIOS.FYI_ONLY;
+
+      // ========================================
+      // Send FYI/informational email
+      // ========================================
+      logStep("Sending FYI email");
+
+      await sendTestEmail({
+        from: gmail,
+        to: outlook,
+        subject: scenario.subject,
+        body: scenario.body,
+      });
+
+      // Wait for Outlook to receive
+      const outlookMessage = await waitForMessageInInbox({
+        provider: outlook.emailProvider,
+        subjectContains: getTestSubjectPrefix(),
+        timeout: TIMEOUTS.EMAIL_DELIVERY,
+      });
+
+      // ========================================
+      // Wait for rule execution
+      // ========================================
+      logStep("Waiting for rule execution");
+
+      const executedRule = await waitForExecutedRule({
+        messageId: outlookMessage.messageId,
+        emailAccountId: outlook.id,
+        timeout: TIMEOUTS.WEBHOOK_PROCESSING,
+      });
+
+      expect(executedRule).toBeDefined();
+
+      // ========================================
+      // Verify NO draft was created for FYI email
+      // ========================================
+      logStep("Verifying no draft for FYI email");
+
+      const draftAction = executedRule.actionItems.find(
+        (a) => a.type === "DRAFT_EMAIL" && a.draftId,
+      );
+
+      // FYI emails should NOT create drafts
+      expect(draftAction).toBeUndefined();
+
+      logStep("Draft action result", {
+        hasDraft: false,
+      });
+
+      // ========================================
+      // Verify appropriate label was applied
+      // ========================================
+      logStep("Verifying labels");
+
+      const message = await outlook.emailProvider.getMessage(
+        outlookMessage.messageId,
+      );
+
+      logStep("Message labels", { labels: message.labelIds });
+    },
+    TIMEOUTS.TEST_DEFAULT,
+  );
+
+  test(
+    "should handle thank you email appropriately",
+    async () => {
+      testStartTime = Date.now();
+      const scenario = TEST_EMAIL_SCENARIOS.THANK_YOU;
+
+      // ========================================
+      // Send thank you email
+      // ========================================
+      logStep("Sending thank you email");
+
+      await sendTestEmail({
+        from: gmail,
+        to: outlook,
+        subject: scenario.subject,
+        body: scenario.body,
+      });
+
+      // Wait for Outlook to receive
+      const outlookMessage = await waitForMessageInInbox({
+        provider: outlook.emailProvider,
+        subjectContains: getTestSubjectPrefix(),
+        timeout: TIMEOUTS.EMAIL_DELIVERY,
+      });
+
+      // ========================================
+      // Wait for rule execution
+      // ========================================
+      logStep("Waiting for rule execution");
+
+      const executedRule = await waitForExecutedRule({
+        messageId: outlookMessage.messageId,
+        emailAccountId: outlook.id,
+        timeout: TIMEOUTS.WEBHOOK_PROCESSING,
+      });
+
+      expect(executedRule).toBeDefined();
+
+      // ========================================
+      // Verify processing
+      // ========================================
+      logStep("Verifying thank you email processing");
+
+      // Thank you emails typically don't need replies
+      const draftAction = executedRule.actionItems.find(
+        (a) => a.type === "DRAFT_EMAIL" && a.draftId,
+      );
+
+      // Thank you emails should NOT create drafts
+      expect(draftAction).toBeUndefined();
+
+      logStep("Thank you email processed", {
+        hasDraft: false,
+        actionsCount: executedRule.actionItems.length,
+      });
+    },
+    TIMEOUTS.TEST_DEFAULT,
+  );
+
+  test(
+    "should handle question email with draft",
+    async () => {
+      testStartTime = Date.now();
+      const scenario = TEST_EMAIL_SCENARIOS.QUESTION;
+
+      // ========================================
+      // Send question email
+      // ========================================
+      logStep("Sending question email");
+
+      await sendTestEmail({
+        from: gmail,
+        to: outlook,
+        subject: scenario.subject,
+        body: scenario.body,
+      });
+
+      // Wait for Outlook to receive
+      const outlookMessage = await waitForMessageInInbox({
+        provider: outlook.emailProvider,
+        subjectContains: getTestSubjectPrefix(),
+        timeout: TIMEOUTS.EMAIL_DELIVERY,
+      });
+
+      // ========================================
+      // Wait for rule execution
+      // ========================================
+      logStep("Waiting for rule execution");
+
+      const executedRule = await waitForExecutedRule({
+        messageId: outlookMessage.messageId,
+        emailAccountId: outlook.id,
+        timeout: TIMEOUTS.WEBHOOK_PROCESSING,
+      });
+
+      expect(executedRule).toBeDefined();
+
+      // ========================================
+      // Verify draft created for question
+      // ========================================
+      logStep("Verifying question email processing");
+
+      const draftAction = executedRule.actionItems.find(
+        (a) => a.type === "DRAFT_EMAIL",
+      );
+
+      // Questions should typically create drafts
+      expect(draftAction).toBeDefined();
+
+      logStep("Question email processed", {
+        hasDraft: !!draftAction?.draftId,
+        actionsCount: executedRule.actionItems.length,
+      });
+    },
+    TIMEOUTS.TEST_DEFAULT,
+  );
+});
diff --git a/apps/web/__tests__/e2e/flows/config.ts b/apps/web/__tests__/e2e/flows/config.ts
new file mode 100644
index 0000000000..4a216c3425
--- /dev/null
+++ b/apps/web/__tests__/e2e/flows/config.ts
@@ -0,0 +1,73 @@
+/**
+ * Configuration for E2E flow tests
+ *
+ * Environment variables:
+ * - E2E_GMAIL_EMAIL: Gmail test account email
+ * - E2E_OUTLOOK_EMAIL: Outlook test account email
+ * - E2E_RUN_ID: Unique run identifier (auto-generated if not set)
+ * - E2E_WEBHOOK_URL: Tunnel URL for webhook delivery
+ * - E2E_AI_MODEL: AI model to use (defaults to gpt-4o-mini for cost)
+ */
+
+// Test account configuration
+export const E2E_GMAIL_EMAIL = process.env.E2E_GMAIL_EMAIL;
+export const E2E_OUTLOOK_EMAIL = process.env.E2E_OUTLOOK_EMAIL;
+
+// Generate unique run ID for this test session
+export const E2E_RUN_ID =
+  process.env.E2E_RUN_ID ||
+  `${Date.now()}-${Math.random().toString(36).slice(2, 8)}`;
+
+// Webhook tunnel URL (set by tunnel startup script)
+export const E2E_WEBHOOK_URL = process.env.E2E_WEBHOOK_URL;
+
+// AI model for tests - use cheap model
+export const E2E_AI_MODEL = process.env.E2E_AI_MODEL || "gpt-4o-mini";
+
+// Timeouts
+export const TIMEOUTS = {
+  /** How long to wait for webhook processing to complete */
+  WEBHOOK_PROCESSING: 60_000,
+  /** How long to wait for email delivery between accounts */
+  EMAIL_DELIVERY: 90_000,
+  /** Polling interval when waiting for state changes */
+  POLL_INTERVAL: 3000,
+  /** Default test timeout */
+  TEST_DEFAULT: 120_000,
+  /** Timeout for full reply cycle tests */
+  FULL_CYCLE: 300_000,
+} as const;
+
+// Test email subject prefix for identification
+export function getTestSubjectPrefix(): string {
+  return `[E2E-${E2E_RUN_ID}]`;
+}
+
+// Check if flow tests should run
+export function shouldRunFlowTests(): boolean {
+  return (
+    process.env.RUN_E2E_FLOW_TESTS === "true" ||
+    process.env.RUN_E2E_TESTS === "true"
+  );
+}
+
+// Validate required configuration
+export function validateConfig(): {
+  valid: boolean;
+  errors: string[];
+} {
+  const errors: string[] = [];
+
+  if (!E2E_GMAIL_EMAIL) {
+    errors.push("E2E_GMAIL_EMAIL environment variable is required");
+  }
+
+  if (!E2E_OUTLOOK_EMAIL) {
+    errors.push("E2E_OUTLOOK_EMAIL environment variable is required");
+  }
+
+  return {
+    valid: errors.length === 0,
+    errors,
+  };
+}
diff --git a/apps/web/__tests__/e2e/flows/draft-cleanup.test.ts b/apps/web/__tests__/e2e/flows/draft-cleanup.test.ts
new file mode 100644
index 0000000000..532b8c5a69
--- /dev/null
+++ b/apps/web/__tests__/e2e/flows/draft-cleanup.test.ts
@@ -0,0 +1,318 @@
+/**
+ * E2E Flow Test: Draft Cleanup
+ *
+ * Tests that AI-generated drafts are properly cleaned up:
+ * - When user sends their own reply (not the AI draft)
+ * - When user sends the AI draft
+ * - DraftSendLog is properly recorded
+ *
+ * Usage:
+ * RUN_E2E_FLOW_TESTS=true pnpm test-e2e draft-cleanup
+ */
+
+import { describe, test, expect, beforeAll, afterEach } from "vitest";
+import { shouldRunFlowTests, TIMEOUTS, getTestSubjectPrefix } from "./config";
+import { initializeFlowTests, setupFlowTest } from "./setup";
+import { generateTestSummary } from "./teardown";
+import {
+  sendTestEmail,
+  sendTestReply,
+  TEST_EMAIL_SCENARIOS,
+} from "./helpers/email";
+import {
+  waitForExecutedRule,
+  waitForMessageInInbox,
+  waitForDraftDeleted,
+  waitForDraftSendLog,
+} from "./helpers/polling";
+import { logStep, clearLogs } from "./helpers/logging";
+import type { TestAccount } from "./helpers/accounts";
+
+describe.skipIf(!shouldRunFlowTests())("Draft Cleanup", () => {
+  let gmail: TestAccount;
+  let outlook: TestAccount;
+  let testStartTime: number;
+
+  beforeAll(async () => {
+    await initializeFlowTests();
+    const accounts = await setupFlowTest();
+    gmail = accounts.gmail;
+    outlook = accounts.outlook;
+  }, TIMEOUTS.TEST_DEFAULT);
+
+  afterEach(async () => {
+    generateTestSummary("Draft Cleanup", testStartTime);
+    clearLogs();
+  });
+
+  test(
+    "should delete AI draft when user sends manual reply",
+    async () => {
+      testStartTime = Date.now();
+      const scenario = TEST_EMAIL_SCENARIOS.NEEDS_REPLY;
+
+      // ========================================
+      // Step 1: Send email that triggers draft creation
+      // ========================================
+      logStep("Step 1: Sending email that needs reply");
+
+      await sendTestEmail({
+        from: gmail,
+        to: outlook,
+        subject: scenario.subject,
+        body: scenario.body,
+      });
+
+      const receivedMessage = await waitForMessageInInbox({
+        provider: outlook.emailProvider,
+        subjectContains: getTestSubjectPrefix(),
+        timeout: TIMEOUTS.EMAIL_DELIVERY,
+      });
+
+      // ========================================
+      // Step 2: Wait for AI draft to be created
+      // ========================================
+      logStep("Step 2: Waiting for AI draft creation");
+
+      const executedRule = await waitForExecutedRule({
+        messageId: receivedMessage.messageId,
+        emailAccountId: outlook.id,
+        timeout: TIMEOUTS.WEBHOOK_PROCESSING,
+      });
+
+      const draftAction = executedRule.actionItems.find(
+        (a) => a.type === "DRAFT_EMAIL" && a.draftId,
+      );
+
+      expect(draftAction).toBeDefined();
+      expect(draftAction?.draftId).toBeTruthy();
+      // Safe to use ! after the assertions above
+      const aiDraftId = draftAction!.draftId!;
+
+      logStep("AI draft created", { draftId: aiDraftId });
+
+      // Verify draft exists
+      const aiDraft = await outlook.emailProvider.getDraft(aiDraftId);
+      expect(aiDraft).toBeDefined();
+
+      // ========================================
+      // Step 3: User sends their own reply (NOT the AI draft)
+      // ========================================
+      logStep("Step 3: User sends manual reply (not the AI draft)");
+
+      // Send a different reply than the AI draft
+      const manualReply = await sendTestReply({
+        from: outlook,
+        to: gmail,
+        threadId: receivedMessage.threadId,
+        originalMessageId: receivedMessage.messageId,
+        body: "This is my own manually written response, not the AI draft.",
+      });
+
+      logStep("Manual reply sent", {
+        messageId: manualReply.messageId,
+        threadId: manualReply.threadId,
+      });
+
+      // ========================================
+      // Step 4: Verify AI draft is deleted
+      // ========================================
+      logStep("Step 4: Verifying AI draft is deleted");
+
+      await waitForDraftDeleted({
+        draftId: aiDraftId,
+        provider: outlook.emailProvider,
+        timeout: TIMEOUTS.WEBHOOK_PROCESSING,
+      });
+
+      logStep("AI draft successfully deleted");
+
+      // ========================================
+      // Step 5: Verify DraftSendLog records the event
+      // ========================================
+      logStep("Step 5: Verifying DraftSendLog");
+
+      const draftSendLog = await waitForDraftSendLog({
+        threadId: receivedMessage.threadId,
+        emailAccountId: outlook.id,
+        timeout: TIMEOUTS.WEBHOOK_PROCESSING,
+      });
+
+      expect(draftSendLog).toBeDefined();
+
+      // When user sends a different reply (not the AI draft), similarity score should be low
+      expect(draftSendLog.similarityScore).toBeLessThan(0.9);
+
+      logStep("DraftSendLog recorded", {
+        similarityScore: draftSendLog.similarityScore,
+        wasSentFromDraft: draftSendLog.wasSentFromDraft,
+      });
+    },
+    TIMEOUTS.FULL_CYCLE,
+  );
+
+  test(
+    "should handle multiple drafts in same thread",
+    async () => {
+      testStartTime = Date.now();
+
+      // ========================================
+      // Setup: Create thread with multiple incoming emails
+      // ========================================
+      logStep("Setting up thread with multiple messages");
+
+      await sendTestEmail({
+        from: gmail,
+        to: outlook,
+        subject: "Multi-draft cleanup test",
+        body: "First question: What is the project timeline?",
+      });
+
+      const firstReceived = await waitForMessageInInbox({
+        provider: outlook.emailProvider,
+        subjectContains: getTestSubjectPrefix(),
+        timeout: TIMEOUTS.EMAIL_DELIVERY,
+      });
+
+      // Wait for first draft
+      const firstRule = await waitForExecutedRule({
+        messageId: firstReceived.messageId,
+        emailAccountId: outlook.id,
+        timeout: TIMEOUTS.WEBHOOK_PROCESSING,
+      });
+
+      const firstDraftAction = firstRule.actionItems.find(
+        (a) => a.type === "DRAFT_EMAIL" && a.draftId,
+      );
+
+      // Assert draft was created - this test requires a draft
+      expect(firstDraftAction?.draftId).toBeTruthy();
+      const firstDraftId = firstDraftAction!.draftId!;
+      logStep("First draft created", { draftId: firstDraftId });
+
+      // ========================================
+      // User sends reply
+      // ========================================
+      logStep("User sends reply");
+
+      await sendTestReply({
+        from: outlook,
+        to: gmail,
+        threadId: firstReceived.threadId,
+        originalMessageId: firstReceived.messageId,
+        body: "Here is my response covering all your questions.",
+      });
+
+      // ========================================
+      // Verify all drafts for thread are cleaned up
+      // ========================================
+      logStep("Verifying all thread drafts cleaned up");
+
+      await waitForDraftDeleted({
+        draftId: firstDraftId,
+        provider: outlook.emailProvider,
+        timeout: TIMEOUTS.WEBHOOK_PROCESSING,
+      });
+      logStep("First draft deleted");
+
+      // Check for any remaining drafts for this thread
+      const drafts = await outlook.emailProvider.getDrafts({ maxResults: 50 });
+      const threadDrafts = drafts.filter(
+        (d) => d.threadId === firstReceived.threadId,
+      );
+
+      // No drafts should remain after user sends a reply
+      expect(threadDrafts.length).toBe(0);
+
+      logStep("Remaining drafts for thread", { count: threadDrafts.length });
+    },
+    TIMEOUTS.FULL_CYCLE,
+  );
+
+  test(
+    "should record DraftSendLog when AI draft is sent",
+    async () => {
+      testStartTime = Date.now();
+      const scenario = TEST_EMAIL_SCENARIOS.QUESTION;
+
+      // ========================================
+      // Send email and wait for draft
+      // ========================================
+      logStep("Sending email and waiting for draft");
+
+      await sendTestEmail({
+        from: gmail,
+        to: outlook,
+        subject: scenario.subject,
+        body: scenario.body,
+      });
+
+      const receivedMessage = await waitForMessageInInbox({
+        provider: outlook.emailProvider,
+        subjectContains: getTestSubjectPrefix(),
+        timeout: TIMEOUTS.EMAIL_DELIVERY,
+      });
+
+      const executedRule = await waitForExecutedRule({
+        messageId: receivedMessage.messageId,
+        emailAccountId: outlook.id,
+        timeout: TIMEOUTS.WEBHOOK_PROCESSING,
+      });
+
+      const draftAction = executedRule.actionItems.find(
+        (a) => a.type === "DRAFT_EMAIL" && a.draftId,
+      );
+
+      // This test requires a draft to be created
+      expect(draftAction?.draftId).toBeTruthy();
+
+      const aiDraftId = draftAction!.draftId!;
+      logStep("AI draft created", { draftId: aiDraftId });
+
+      // ========================================
+      // Get draft content and "send" it
+      // ========================================
+      logStep("Fetching and sending AI draft");
+
+      const draft = await outlook.emailProvider.getDraft(aiDraftId);
+      expect(draft).toBeDefined();
+
+      // Send the draft content as a reply
+      // (simulating user clicking send on the draft)
+      const sentDraft = await sendTestReply({
+        from: outlook,
+        to: gmail,
+        threadId: receivedMessage.threadId,
+        originalMessageId: receivedMessage.messageId,
+        body: draft?.textPlain || "Draft content",
+      });
+
+      logStep("Draft sent", { messageId: sentDraft.messageId });
+
+      // ========================================
+      // Verify DraftSendLog
+      // ========================================
+      logStep("Verifying DraftSendLog records draft was sent");
+
+      const draftSendLog = await waitForDraftSendLog({
+        threadId: receivedMessage.threadId,
+        emailAccountId: outlook.id,
+        timeout: TIMEOUTS.WEBHOOK_PROCESSING,
+      });
+
+      expect(draftSendLog).toBeDefined();
+
+      // When user sends the exact AI draft content, similarity score should be very high
+      expect(draftSendLog.similarityScore).toBeGreaterThanOrEqual(0.9);
+
+      logStep("DraftSendLog recorded", {
+        id: draftSendLog.id,
+        similarityScore: draftSendLog.similarityScore,
+        wasSentFromDraft: draftSendLog.wasSentFromDraft,
+        draftId: draftSendLog.draftId,
+        sentMessageId: draftSendLog.sentMessageId,
+      });
+    },
+    TIMEOUTS.FULL_CYCLE,
+  );
+});
diff --git a/apps/web/__tests__/e2e/flows/full-reply-cycle.test.ts b/apps/web/__tests__/e2e/flows/full-reply-cycle.test.ts
new file mode 100644
index 0000000000..de86f90000
--- /dev/null
+++ b/apps/web/__tests__/e2e/flows/full-reply-cycle.test.ts
@@ -0,0 +1,321 @@
+/**
+ * E2E Flow Test: Full Reply Cycle
+ *
+ * Tests the complete email processing flow:
+ * 1. Gmail sends email to Outlook
+ * 2. Outlook webhook fires
+ * 3. Rule processes and creates draft
+ * 4. Draft is sent as reply
+ * 5. Gmail receives the reply
+ * 6. Outbound handling cleans up drafts
+ *
+ * Usage:
+ * RUN_E2E_FLOW_TESTS=true pnpm test-e2e full-reply-cycle
+ */
+
+import { describe, test, expect, beforeAll, afterAll, afterEach } from "vitest";
+import { shouldRunFlowTests, TIMEOUTS, getTestSubjectPrefix } from "./config";
+import { initializeFlowTests, setupFlowTest } from "./setup";
+import { generateTestSummary } from "./teardown";
+import {
+  sendTestEmail,
+  sendTestReply,
+  TEST_EMAIL_SCENARIOS,
+  assertDraftExists,
+} from "./helpers/email";
+import {
+  waitForExecutedRule,
+  waitForMessageInInbox,
+  waitForDraftDeleted,
+  waitForDraftSendLog,
+} from "./helpers/polling";
+import { logStep, clearLogs } from "./helpers/logging";
+import type { TestAccount } from "./helpers/accounts";
+
+describe.skipIf(!shouldRunFlowTests())("Full Reply Cycle", () => {
+  let gmail: TestAccount;
+  let outlook: TestAccount;
+  let testStartTime: number;
+
+  beforeAll(async () => {
+    await initializeFlowTests();
+    const accounts = await setupFlowTest();
+    gmail = accounts.gmail;
+    outlook = accounts.outlook;
+  }, TIMEOUTS.TEST_DEFAULT);
+
+  afterAll(async () => {
+    // Note: We intentionally don't call teardownFlowTests() here
+    // to keep webhook subscriptions active for subsequent runs
+  });
+
+  afterEach(async () => {
+    generateTestSummary("Full Reply Cycle", testStartTime);
+    clearLogs();
+  });
+
+  test(
+    "Gmail sends to Outlook, rule creates draft, user sends reply, Gmail receives",
+    async () => {
+      testStartTime = Date.now();
+      const scenario = TEST_EMAIL_SCENARIOS.NEEDS_REPLY;
+
+      // ========================================
+      // Step 1: Gmail sends email to Outlook
+      // ========================================
+      logStep("Step 1: Sending email from Gmail to Outlook");
+
+      const sentEmail = await sendTestEmail({
+        from: gmail,
+        to: outlook,
+        subject: scenario.subject,
+        body: scenario.body,
+      });
+
+      logStep("Email sent", {
+        messageId: sentEmail.messageId,
+        threadId: sentEmail.threadId,
+        subject: sentEmail.fullSubject,
+      });
+
+      // ========================================
+      // Step 2: Wait for Outlook to receive and process
+      // ========================================
+      logStep("Step 2: Waiting for Outlook to receive email");
+
+      // Wait for message to appear in Outlook inbox
+      const outlookMessage = await waitForMessageInInbox({
+        provider: outlook.emailProvider,
+        subjectContains: getTestSubjectPrefix(),
+        timeout: TIMEOUTS.EMAIL_DELIVERY,
+      });
+
+      logStep("Email received in Outlook", {
+        messageId: outlookMessage.messageId,
+        threadId: outlookMessage.threadId,
+      });
+
+      // ========================================
+      // Step 3: Wait for rule execution
+      // ========================================
+      logStep("Step 3: Waiting for rule execution");
+
+      const executedRule = await waitForExecutedRule({
+        messageId: outlookMessage.messageId,
+        emailAccountId: outlook.id,
+        timeout: TIMEOUTS.WEBHOOK_PROCESSING,
+      });
+
+      expect(executedRule).toBeDefined();
+      expect(executedRule.status).toBe("APPLIED");
+
+      logStep("Rule executed", {
+        ruleId: executedRule.ruleId,
+        status: executedRule.status,
+        actionItems: executedRule.actionItems.length,
+      });
+
+      // ========================================
+      // Step 4: Verify draft was created
+      // ========================================
+      logStep("Step 4: Verifying draft creation");
+
+      const draftAction = executedRule.actionItems.find(
+        (a) => a.type === "DRAFT_EMAIL" && a.draftId,
+      );
+
+      expect(draftAction).toBeDefined();
+      expect(draftAction?.draftId).toBeTruthy();
+
+      // Verify draft exists in Outlook
+      const draftInfo = await assertDraftExists({
+        provider: outlook.emailProvider,
+        threadId: outlookMessage.threadId,
+      });
+
+      logStep("Draft created", {
+        draftId: draftInfo.draftId,
+        contentPreview: draftInfo.content?.substring(0, 100),
+      });
+
+      // ========================================
+      // Step 5: Check that appropriate label was applied
+      // ========================================
+      logStep("Step 5: Verifying label applied");
+
+      // Check if any of the expected labels were applied
+      const labelAction = executedRule.actionItems.find(
+        (a) => a.type === "LABEL" && a.labelId,
+      );
+
+      if (labelAction?.labelId) {
+        const message = await outlook.emailProvider.getMessage(
+          outlookMessage.messageId,
+        );
+        expect(message.labelIds).toBeDefined();
+        expect(message.labelIds).toContain(labelAction.labelId);
+        logStep("Labels on message", { labels: message.labelIds });
+      }
+
+      // ========================================
+      // Step 6: Send the draft reply
+      // ========================================
+      logStep("Step 6: Sending draft reply from Outlook");
+
+      // Get the draft content
+      const draft = await outlook.emailProvider.getDraft(draftInfo.draftId);
+      expect(draft).toBeDefined();
+
+      // Send a reply (simulating user sending the draft)
+      const replyResult = await sendTestReply({
+        from: outlook,
+        to: gmail,
+        threadId: outlookMessage.threadId,
+        originalMessageId: outlookMessage.messageId,
+        body:
+          draft?.textPlain ||
+          "Thank you for your email. Here is the information you requested.",
+      });
+
+      logStep("Reply sent from Outlook", {
+        messageId: replyResult.messageId,
+        threadId: replyResult.threadId,
+      });
+
+      // ========================================
+      // Step 7: Verify Gmail receives the reply
+      // ========================================
+      logStep("Step 7: Waiting for Gmail to receive reply");
+
+      const gmailReply = await waitForMessageInInbox({
+        provider: gmail.emailProvider,
+        subjectContains: sentEmail.fullSubject
+          .replace(getTestSubjectPrefix(), "")
+          .trim(),
+        timeout: TIMEOUTS.EMAIL_DELIVERY,
+      });
+
+      logStep("Reply received in Gmail", {
+        messageId: gmailReply.messageId,
+        threadId: gmailReply.threadId,
+      });
+
+      // Verify it's in the same thread
+      expect(gmailReply.threadId).toBe(sentEmail.threadId);
+
+      // ========================================
+      // Step 8: Verify outbound handling
+      // ========================================
+      logStep("Step 8: Verifying outbound handling");
+
+      // Wait for DraftSendLog to be recorded
+      const draftSendLog = await waitForDraftSendLog({
+        threadId: outlookMessage.threadId,
+        emailAccountId: outlook.id,
+        timeout: TIMEOUTS.WEBHOOK_PROCESSING,
+      });
+
+      expect(draftSendLog).toBeDefined();
+      logStep("DraftSendLog recorded", {
+        id: draftSendLog.id,
+        wasSentFromDraft: draftSendLog.wasSentFromDraft,
+      });
+
+      // ========================================
+      // Step 9: Verify draft cleanup
+      // ========================================
+      logStep("Step 9: Verifying draft cleanup");
+
+      // The AI draft should have been deleted since user sent their own reply
+      // or used the draft
+      await waitForDraftDeleted({
+        draftId: draftInfo.draftId,
+        provider: outlook.emailProvider,
+        timeout: TIMEOUTS.WEBHOOK_PROCESSING,
+      });
+
+      logStep("Draft cleanup verified - draft deleted");
+
+      // ========================================
+      // Test Complete
+      // ========================================
+      logStep("=== Full Reply Cycle Test PASSED ===");
+    },
+    TIMEOUTS.FULL_CYCLE,
+  );
+
+  test(
+    "should verify thread continuity across providers",
+    async () => {
+      testStartTime = Date.now();
+
+      // ========================================
+      // Send initial email
+      // ========================================
+      logStep("Sending initial email from Gmail to Outlook");
+
+      const initialEmail = await sendTestEmail({
+        from: gmail,
+        to: outlook,
+        subject: "Thread continuity test",
+        body: "This is the first message in the thread.",
+      });
+
+      // Wait for Outlook to receive
+      const outlookMsg1 = await waitForMessageInInbox({
+        provider: outlook.emailProvider,
+        subjectContains: getTestSubjectPrefix(),
+        timeout: TIMEOUTS.EMAIL_DELIVERY,
+      });
+
+      // ========================================
+      // Send reply from Outlook
+      // ========================================
+      logStep("Sending reply from Outlook to Gmail");
+
+      await sendTestReply({
+        from: outlook,
+        to: gmail,
+        threadId: outlookMsg1.threadId,
+        originalMessageId: outlookMsg1.messageId,
+        body: "This is the reply from Outlook.",
+      });
+
+      // Wait for Gmail to receive
+      const gmailReply = await waitForMessageInInbox({
+        provider: gmail.emailProvider,
+        subjectContains: "Thread continuity test",
+        timeout: TIMEOUTS.EMAIL_DELIVERY,
+      });
+
+      // Verify same thread on Gmail side
+      expect(gmailReply.threadId).toBe(initialEmail.threadId);
+
+      // ========================================
+      // Send another reply from Gmail
+      // ========================================
+      logStep("Sending second reply from Gmail to Outlook");
+
+      await sendTestReply({
+        from: gmail,
+        to: outlook,
+        threadId: gmailReply.threadId,
+        originalMessageId: gmailReply.messageId,
+        body: "This is the second reply from Gmail.",
+      });
+
+      // Wait for Outlook to receive
+      const outlookMsg2 = await waitForMessageInInbox({
+        provider: outlook.emailProvider,
+        subjectContains: "Thread continuity test",
+        timeout: TIMEOUTS.EMAIL_DELIVERY,
+      });
+
+      // Verify same thread on Outlook side
+      expect(outlookMsg2.threadId).toBe(outlookMsg1.threadId);
+
+      logStep("Thread continuity verified across 3 messages");
+    },
+    TIMEOUTS.FULL_CYCLE,
+  );
+});
diff --git a/apps/web/__tests__/e2e/flows/helpers/accounts.ts b/apps/web/__tests__/e2e/flows/helpers/accounts.ts
new file mode 100644
index 0000000000..6943ecfa00
--- /dev/null
+++ b/apps/web/__tests__/e2e/flows/helpers/accounts.ts
@@ -0,0 +1,239 @@
+/**
+ * Test account management for E2E flow tests
+ *
+ * Loads test accounts from the database and provides
+ * helper functions for account operations.
+ */
+
+import prisma from "@/utils/prisma";
+import { createEmailProvider } from "@/utils/email/provider";
+import type { EmailProvider } from "@/utils/email/types";
+import { createScopedLogger } from "@/utils/logger";
+import { E2E_GMAIL_EMAIL, E2E_OUTLOOK_EMAIL } from "../config";
+import { logStep } from "./logging";
+
+// Logger for email provider operations
+const testLogger = createScopedLogger("e2e-test");
+
+export interface TestAccount {
+  id: string;
+  email: string;
+  userId: string;
+  provider: "google" | "microsoft";
+  emailProvider: EmailProvider;
+}
+
+let gmailAccount: TestAccount | null = null;
+let outlookAccount: TestAccount | null = null;
+
+/**
+ * Load Gmail test account from database
+ */
+export async function getGmailTestAccount(): Promise<TestAccount> {
+  if (gmailAccount) {
+    return gmailAccount;
+  }
+
+  if (!E2E_GMAIL_EMAIL) {
+    throw new Error("E2E_GMAIL_EMAIL environment variable is not set");
+  }
+
+  logStep("Loading Gmail test account", { email: E2E_GMAIL_EMAIL });
+
+  const emailAccount = await prisma.emailAccount.findFirst({
+    where: {
+      email: E2E_GMAIL_EMAIL,
+      account: {
+        provider: "google",
+      },
+    },
+    include: {
+      account: true,
+    },
+  });
+
+  if (!emailAccount) {
+    throw new Error(
+      `No Gmail account found for ${E2E_GMAIL_EMAIL}. ` +
+        "Make sure the account is logged in and stored in the test database.",
+    );
+  }
+
+  const emailProvider = await createEmailProvider({
+    emailAccountId: emailAccount.id,
+    provider: "google",
+    logger: testLogger,
+  });
+
+  gmailAccount = {
+    id: emailAccount.id,
+    email: emailAccount.email,
+    userId: emailAccount.userId,
+    provider: "google",
+    emailProvider,
+  };
+
+  logStep("Gmail test account loaded", {
+    id: gmailAccount.id,
+    email: gmailAccount.email,
+  });
+
+  return gmailAccount;
+}
+
+/**
+ * Load Outlook test account from database
+ */
+export async function getOutlookTestAccount(): Promise<TestAccount> {
+  if (outlookAccount) {
+    return outlookAccount;
+  }
+
+  if (!E2E_OUTLOOK_EMAIL) {
+    throw new Error("E2E_OUTLOOK_EMAIL environment variable is not set");
+  }
+
+  logStep("Loading Outlook test account", { email: E2E_OUTLOOK_EMAIL });
+
+  const emailAccount = await prisma.emailAccount.findFirst({
+    where: {
+      email: E2E_OUTLOOK_EMAIL,
+      account: {
+        provider: "microsoft",
+      },
+    },
+    include: {
+      account: true,
+    },
+  });
+
+  if (!emailAccount) {
+    throw new Error(
+      `No Outlook account found for ${E2E_OUTLOOK_EMAIL}. ` +
+        "Make sure the account is logged in and stored in the test database.",
+    );
+  }
+
+  const emailProvider = await createEmailProvider({
+    emailAccountId: emailAccount.id,
+    provider: "microsoft",
+    logger: testLogger,
+  });
+
+  outlookAccount = {
+    id: emailAccount.id,
+    email: emailAccount.email,
+    userId: emailAccount.userId,
+    provider: "microsoft",
+    emailProvider,
+  };
+
+  logStep("Outlook test account loaded", {
+    id: outlookAccount.id,
+    email: outlookAccount.email,
+  });
+
+  return outlookAccount;
+}
+
+/**
+ * Get both test accounts
+ */
+export async function getTestAccounts(): Promise<{
+  gmail: TestAccount;
+  outlook: TestAccount;
+}> {
+  const [gmail, outlook] = await Promise.all([
+    getGmailTestAccount(),
+    getOutlookTestAccount(),
+  ]);
+  return { gmail, outlook };
+}
+
+/**
+ * Ensure test account has premium status for AI features
+ */
+export async function ensureTestPremium(userId: string): Promise<void> {
+  logStep("Ensuring premium status", { userId });
+
+  const user = await prisma.user.findUniqueOrThrow({
+    where: { id: userId },
+    include: { premium: true },
+  });
+
+  // Clear any existing aiApiKey to use env defaults
+  await prisma.user.update({
+    where: { id: user.id },
+    data: { aiApiKey: null },
+  });
+
+  if (!user.premium) {
+    const premium = await prisma.premium.create({
+      data: {
+        tier: "BUSINESS_MONTHLY",
+        stripeSubscriptionStatus: "active",
+      },
+    });
+
+    await prisma.user.update({
+      where: { id: user.id },
+      data: { premiumId: premium.id },
+    });
+  } else {
+    await prisma.premium.update({
+      where: { id: user.premium.id },
+      data: {
+        stripeSubscriptionStatus: "active",
+        tier: "BUSINESS_MONTHLY",
+      },
+    });
+  }
+
+  logStep("Premium status ensured");
+}
+
+/**
+ * Ensure test account has at least one rule for AI processing
+ */
+export async function ensureTestRules(emailAccountId: string): Promise<void> {
+  logStep("Ensuring test rules exist", { emailAccountId });
+
+  const existingRules = await prisma.rule.findMany({
+    where: { emailAccountId, enabled: true },
+  });
+
+  if (existingRules.length > 0) {
+    logStep("Rules already exist", { count: existingRules.length });
+    return;
+  }
+
+  // Create a default rule that uses AI to draft replies
+  logStep("Creating default test rule");
+
+  await prisma.rule.create({
+    data: {
+      name: "AI Auto-Reply",
+      emailAccountId,
+      enabled: true,
+      runOnThreads: false,
+      instructions:
+        "If this email requires a response, draft a helpful reply. " +
+        "If it's just informational (FYI, newsletter, notification), do nothing.",
+      actions: {
+        create: {
+          type: "DRAFT_EMAIL",
+        },
+      },
+    },
+  });
+
+  logStep("Default test rule created");
+}
+
+/**
+ * Clear cached accounts (useful for test isolation)
+ */
+export function clearAccountCache(): void {
+  gmailAccount = null;
+  outlookAccount = null;
+}
diff --git a/apps/web/__tests__/e2e/flows/helpers/email.ts b/apps/web/__tests__/e2e/flows/helpers/email.ts
new file mode 100644
index 0000000000..dbb023b22c
--- /dev/null
+++ b/apps/web/__tests__/e2e/flows/helpers/email.ts
@@ -0,0 +1,299 @@
+/**
+ * Email sending and assertion helpers for E2E flow tests
+ */
+
+import type { EmailProvider } from "@/utils/email/types";
+import type { TestAccount } from "./accounts";
+import { getTestSubjectPrefix } from "../config";
+import { logStep, logAssertion } from "./logging";
+
+interface SendTestEmailOptions {
+  from: TestAccount;
+  to: TestAccount;
+  subject: string;
+  body: string;
+  /** Whether to include E2E run ID prefix in subject */
+  includePrefix?: boolean;
+}
+
+interface SendTestEmailResult {
+  messageId: string;
+  threadId: string;
+  fullSubject: string;
+}
+
+/**
+ * Send a test email from one account to another
+ */
+export async function sendTestEmail(
+  options: SendTestEmailOptions,
+): Promise<SendTestEmailResult> {
+  const { from, to, subject, body, includePrefix = true } = options;
+
+  const fullSubject = includePrefix
+    ? `${getTestSubjectPrefix()} ${subject}`
+    : subject;
+
+  logStep("Sending test email", {
+    from: from.email,
+    to: to.email,
+    subject: fullSubject,
+  });
+
+  const result = await from.emailProvider.sendEmailWithHtml({
+    to: to.email,
+    subject: fullSubject,
+    messageHtml: `<p>${body}</p>`,
+  });
+
+  logStep("Email sent", {
+    messageId: result.messageId,
+    threadId: result.threadId,
+  });
+
+  return {
+    messageId: result.messageId,
+    threadId: result.threadId,
+    fullSubject,
+  };
+}
+
+/**
+ * Send a reply to an existing thread
+ */
+export async function sendTestReply(options: {
+  from: TestAccount;
+  to: TestAccount;
+  threadId: string;
+  originalMessageId: string;
+  body: string;
+}): Promise<SendTestEmailResult> {
+  const { from, to, threadId, originalMessageId, body } = options;
+
+  logStep("Sending test reply", {
+    from: from.email,
+    to: to.email,
+    threadId,
+  });
+
+  // Get original message for reply headers
+  const originalMessage =
+    await from.emailProvider.getMessage(originalMessageId);
+
+  const result = await from.emailProvider.sendEmailWithHtml({
+    to: to.email,
+    subject: originalMessage.subject?.startsWith("Re:")
+      ? originalMessage.subject
+      : `Re: ${originalMessage.subject}`,
+    messageHtml: `<p>${body}</p>`,
+    replyToEmail: {
+      threadId,
+      headerMessageId: originalMessage.headers["message-id"] || "",
+      references: originalMessage.headers.references,
+    },
+  });
+
+  logStep("Reply sent", {
+    messageId: result.messageId,
+    threadId: result.threadId,
+  });
+
+  return {
+    messageId: result.messageId,
+    threadId: result.threadId,
+    fullSubject: originalMessage.subject || "",
+  };
+}
+
+/**
+ * Assert that a message has specific labels/categories
+ */
+export async function assertEmailLabeled(options: {
+  provider: EmailProvider;
+  messageId: string;
+  expectedLabels: string[];
+}): Promise<void> {
+  const { provider, messageId, expectedLabels } = options;
+
+  logStep("Checking email labels", { messageId, expectedLabels });
+
+  const message = await provider.getMessage(messageId);
+  const actualLabels = message.labelIds || [];
+
+  for (const expectedLabel of expectedLabels) {
+    const hasLabel = actualLabels.some(
+      (label) => label.toLowerCase() === expectedLabel.toLowerCase(),
+    );
+
+    logAssertion(
+      `Label "${expectedLabel}" present`,
+      hasLabel,
+      `Found: ${actualLabels.join(", ")}`,
+    );
+
+    if (!hasLabel) {
+      throw new Error(
+        `Expected message ${messageId} to have label "${expectedLabel}", ` +
+          `but found: [${actualLabels.join(", ")}]`,
+      );
+    }
+  }
+}
+
+/**
+ * Assert that a draft exists for a thread
+ */
+export async function assertDraftExists(options: {
+  provider: EmailProvider;
+  threadId: string;
+}): Promise<{ draftId: string; content: string | undefined }> {
+  const { provider, threadId } = options;
+
+  logStep("Checking draft exists", { threadId });
+
+  const drafts = await provider.getDrafts({ maxResults: 50 });
+  const threadDraft = drafts.find((d) => d.threadId === threadId);
+
+  if (!threadDraft?.id) {
+    throw new Error(`Expected draft for thread ${threadId}, but none found`);
+  }
+
+  const draft = await provider.getDraft(threadDraft.id);
+
+  logAssertion("Draft exists", true, `Draft ID: ${threadDraft.id}`);
+
+  return {
+    draftId: threadDraft.id,
+    content: draft?.textPlain,
+  };
+}
+
+/**
+ * Assert that a draft does not exist (was deleted)
+ */
+export async function assertDraftDeleted(options: {
+  provider: EmailProvider;
+  draftId: string;
+}): Promise<void> {
+  const { provider, draftId } = options;
+
+  logStep("Checking draft deleted", { draftId });
+
+  try {
+    const draft = await provider.getDraft(draftId);
+    if (draft) {
+      throw new Error(`Expected draft ${draftId} to be deleted, but it exists`);
+    }
+  } catch (error) {
+    // Draft not found is expected
+    if (error instanceof Error && !error.message.includes("to be deleted")) {
+      // API error means draft doesn't exist - good
+      logAssertion("Draft deleted", true);
+      return;
+    }
+    throw error;
+  }
+
+  logAssertion("Draft deleted", true);
+}
+
+/**
+ * Assert message is in a specific thread
+ */
+export async function assertMessageInThread(options: {
+  provider: EmailProvider;
+  messageId: string;
+  expectedThreadId: string;
+}): Promise<void> {
+  const { provider, messageId, expectedThreadId } = options;
+
+  logStep("Checking message thread", { messageId, expectedThreadId });
+
+  const message = await provider.getMessage(messageId);
+
+  const inThread = message.threadId === expectedThreadId;
+  logAssertion(
+    "Message in correct thread",
+    inThread,
+    `Expected: ${expectedThreadId}, Got: ${message.threadId}`,
+  );
+
+  if (!inThread) {
+    throw new Error(
+      `Expected message ${messageId} to be in thread ${expectedThreadId}, ` +
+        `but it's in thread ${message.threadId}`,
+    );
+  }
+}
+
+/**
+ * Get test email scenarios for predictable AI classification
+ */
+export const TEST_EMAIL_SCENARIOS = {
+  /** Email that clearly needs a reply */
+  NEEDS_REPLY: {
+    subject: "Please send me the Q4 sales report ASAP",
+    body:
+      "Hi, I need the Q4 sales report for the board meeting tomorrow. " +
+      "Can you please send it to me as soon as possible? Thanks!",
+    expectedLabels: ["Needs Reply", "To Reply", "Action Required"],
+  },
+
+  /** Email that is informational only */
+  FYI_ONLY: {
+    subject: "FYI: Q4 report is attached",
+    body:
+      "Here's the report you requested. No action needed on your end. " +
+      "Just keeping you in the loop.",
+    expectedLabels: ["FYI", "Informational", "No Reply Needed"],
+  },
+
+  /** Email that is a thank you / acknowledgment */
+  THANK_YOU: {
+    subject: "Thanks for the update!",
+    body: "Thank you for sending the report. I really appreciate it!",
+    expectedLabels: ["FYI", "Informational", "No Reply Needed"],
+  },
+
+  /** Email that is a question needing response */
+  QUESTION: {
+    subject: "Quick question about the project",
+    body:
+      "Hey, do you know when the next team meeting is scheduled? " +
+      "I want to make sure I have the materials ready.",
+    expectedLabels: ["Needs Reply", "To Reply", "Question"],
+  },
+} as const;
+
+/**
+ * Clean up test emails from inbox
+ */
+export async function cleanupTestEmails(options: {
+  provider: EmailProvider;
+  subjectPrefix: string;
+  markAsRead?: boolean;
+}): Promise<number> {
+  const { provider, subjectPrefix, markAsRead = true } = options;
+
+  logStep("Cleaning up test emails", { subjectPrefix });
+
+  const messages = await provider.getInboxMessages(50);
+  const testMessages = messages.filter((msg) =>
+    msg.subject?.includes(subjectPrefix),
+  );
+
+  let cleaned = 0;
+  for (const msg of testMessages) {
+    if (markAsRead && msg.id) {
+      try {
+        await provider.markRead(msg.threadId);
+        cleaned++;
+      } catch {
+        // Ignore errors during cleanup
+      }
+    }
+  }
+
+  logStep("Cleanup complete", { messagesProcessed: cleaned });
+  return cleaned;
+}
diff --git a/apps/web/__tests__/e2e/flows/helpers/logging.ts b/apps/web/__tests__/e2e/flows/helpers/logging.ts
new file mode 100644
index 0000000000..67c45aa19c
--- /dev/null
+++ b/apps/web/__tests__/e2e/flows/helpers/logging.ts
@@ -0,0 +1,140 @@
+/**
+ * Logging utilities for E2E flow tests
+ *
+ * Provides verbose logging for debugging test failures
+ */
+
+import { E2E_RUN_ID } from "../config";
+
+interface WebhookPayload {
+  timestamp: Date;
+  provider: "google" | "microsoft";
+  payload: unknown;
+}
+
+interface ApiCall {
+  timestamp: Date;
+  method: string;
+  endpoint: string;
+  request?: unknown;
+  response?: unknown;
+  duration: number;
+}
+
+// In-memory log storage for current test run
+const webhookLog: WebhookPayload[] = [];
+const apiCallLog: ApiCall[] = [];
+
+/**
+ * Log a webhook payload received during test
+ */
+export function logWebhook(
+  provider: "google" | "microsoft",
+  payload: unknown,
+): void {
+  const entry: WebhookPayload = {
+    timestamp: new Date(),
+    provider,
+    payload,
+  };
+  webhookLog.push(entry);
+  console.log(
+    `[E2E-${E2E_RUN_ID}] Webhook received from ${provider}:`,
+    JSON.stringify(payload, null, 2),
+  );
+}
+
+/**
+ * Log an API call made during test
+ */
+export function logApiCall(
+  method: string,
+  endpoint: string,
+  request: unknown,
+  response: unknown,
+  duration: number,
+): void {
+  const entry: ApiCall = {
+    timestamp: new Date(),
+    method,
+    endpoint,
+    request,
+    response,
+    duration,
+  };
+  apiCallLog.push(entry);
+
+  // Only log detailed info in verbose mode
+  if (process.env.E2E_VERBOSE === "true") {
+    console.log(
+      `[E2E-${E2E_RUN_ID}] API ${method} ${endpoint} (${duration}ms)`,
+    );
+  }
+}
+
+/**
+ * Get all webhook payloads logged during current test
+ */
+export function getWebhookLog(): WebhookPayload[] {
+  return [...webhookLog];
+}
+
+/**
+ * Get all API calls logged during current test
+ */
+export function getApiCallLog(): ApiCall[] {
+  return [...apiCallLog];
+}
+
+/**
+ * Clear all logs (call between tests)
+ */
+export function clearLogs(): void {
+  webhookLog.length = 0;
+  apiCallLog.length = 0;
+}
+
+/**
+ * Log a test step with context
+ */
+export function logStep(step: string, details?: Record<string, unknown>): void {
+  const detailStr = details ? ` - ${JSON.stringify(details)}` : "";
+  console.log(`[E2E-${E2E_RUN_ID}] ${step}${detailStr}`);
+}
+
+/**
+ * Log test assertion result
+ */
+export function logAssertion(
+  name: string,
+  passed: boolean,
+  details?: string,
+): void {
+  const status = passed ? "PASS" : "FAIL";
+  const detailStr = details ? ` (${details})` : "";
+  console.log(`[E2E-${E2E_RUN_ID}] [${status}] ${name}${detailStr}`);
+}
+
+/**
+ * Log test summary at end of test
+ */
+export function logTestSummary(
+  testName: string,
+  result: {
+    passed: boolean;
+    duration: number;
+    webhooksReceived: number;
+    apiCalls: number;
+    error?: string;
+  },
+): void {
+  console.log(`\n[E2E-${E2E_RUN_ID}] ===== Test Summary: ${testName} =====`);
+  console.log(`  Status: ${result.passed ? "PASSED" : "FAILED"}`);
+  console.log(`  Duration: ${result.duration}ms`);
+  console.log(`  Webhooks received: ${result.webhooksReceived}`);
+  console.log(`  API calls: ${result.apiCalls}`);
+  if (result.error) {
+    console.log(`  Error: ${result.error}`);
+  }
+  console.log("========================================\n");
+}
diff --git a/apps/web/__tests__/e2e/flows/helpers/polling.ts b/apps/web/__tests__/e2e/flows/helpers/polling.ts
new file mode 100644
index 0000000000..828e52adb6
--- /dev/null
+++ b/apps/web/__tests__/e2e/flows/helpers/polling.ts
@@ -0,0 +1,349 @@
+/**
+ * Polling utilities for E2E flow tests
+ *
+ * These helpers poll database/API state until expected
+ * conditions are met, with configurable timeouts.
+ */
+
+import prisma from "@/utils/prisma";
+import type { EmailProvider } from "@/utils/email/types";
+import { TIMEOUTS } from "../config";
+import { logStep } from "./logging";
+import { sleep } from "@/utils/sleep";
+
+interface PollOptions {
+  timeout?: number;
+  interval?: number;
+  description?: string;
+}
+
+/**
+ * Generic polling function that waits for a condition to be true
+ */
+export async function pollUntil<T>(
+  condition: () => Promise<T | null | undefined>,
+  options: PollOptions = {},
+): Promise<T> {
+  const {
+    timeout = TIMEOUTS.WEBHOOK_PROCESSING,
+    interval = TIMEOUTS.POLL_INTERVAL,
+    description = "condition",
+  } = options;
+
+  const startTime = Date.now();
+  let lastError: Error | null = null;
+
+  while (Date.now() - startTime < timeout) {
+    try {
+      const result = await condition();
+      if (result) {
+        logStep(`Condition met: ${description}`, {
+          elapsed: Date.now() - startTime,
+        });
+        return result;
+      }
+    } catch (error) {
+      lastError = error instanceof Error ? error : new Error(String(error));
+    }
+
+    await sleep(interval);
+  }
+
+  const elapsed = Date.now() - startTime;
+  throw new Error(
+    `Timeout waiting for ${description} after ${elapsed}ms` +
+      (lastError ? `: ${lastError.message}` : ""),
+  );
+}
+
+/**
+ * Wait for an ExecutedRule to be created for a message
+ */
+export async function waitForExecutedRule(options: {
+  messageId: string;
+  emailAccountId: string;
+  timeout?: number;
+}): Promise<{
+  id: string;
+  ruleId: string | null;
+  status: string;
+  actionItems: Array<{
+    id: string;
+    type: string;
+    draftId: string | null;
+    labelId: string | null;
+  }>;
+}> {
+  const {
+    messageId,
+    emailAccountId,
+    timeout = TIMEOUTS.WEBHOOK_PROCESSING,
+  } = options;
+
+  logStep("Waiting for ExecutedRule", { messageId, emailAccountId });
+
+  return pollUntil(
+    async () => {
+      const executedRule = await prisma.executedRule.findFirst({
+        where: {
+          messageId,
+          emailAccountId,
+        },
+        include: {
+          actionItems: {
+            select: {
+              id: true,
+              type: true,
+              draftId: true,
+              labelId: true,
+            },
+          },
+        },
+        orderBy: {
+          createdAt: "desc",
+        },
+      });
+
+      if (!executedRule) return null;
+
+      return {
+        id: executedRule.id,
+        ruleId: executedRule.ruleId,
+        status: executedRule.status,
+        actionItems: executedRule.actionItems.map((a) => ({
+          id: a.id,
+          type: a.type,
+          draftId: a.draftId,
+          labelId: a.labelId,
+        })),
+      };
+    },
+    {
+      timeout,
+      description: `ExecutedRule for message ${messageId}`,
+    },
+  );
+}
+
+/**
+ * Wait for a draft to be created for a thread
+ */
+export async function waitForDraft(options: {
+  threadId: string;
+  emailAccountId: string;
+  provider: EmailProvider;
+  timeout?: number;
+}): Promise<{ draftId: string; content: string | undefined }> {
+  const {
+    threadId,
+    emailAccountId,
+    provider,
+    timeout = TIMEOUTS.WEBHOOK_PROCESSING,
+  } = options;
+
+  logStep("Waiting for draft", { threadId, emailAccountId });
+
+  return pollUntil(
+    async () => {
+      // Check executedActions for draft
+      const executedAction = await prisma.executedAction.findFirst({
+        where: {
+          executedRule: {
+            emailAccountId,
+            threadId,
+          },
+          type: "DRAFT_EMAIL",
+          draftId: { not: null },
+        },
+        orderBy: {
+          createdAt: "desc",
+        },
+      });
+
+      if (executedAction?.draftId) {
+        // Verify draft exists in provider
+        const draft = await provider.getDraft(executedAction.draftId);
+        if (draft) {
+          return {
+            draftId: executedAction.draftId,
+            content: draft.textPlain,
+          };
+        }
+      }
+
+      return null;
+    },
+    {
+      timeout,
+      description: `Draft for thread ${threadId}`,
+    },
+  );
+}
+
+/**
+ * Wait for a label to be applied to a message
+ */
+export async function waitForLabel(options: {
+  messageId: string;
+  labelName: string;
+  provider: EmailProvider;
+  timeout?: number;
+}): Promise<void> {
+  const {
+    messageId,
+    labelName,
+    provider,
+    timeout = TIMEOUTS.WEBHOOK_PROCESSING,
+  } = options;
+
+  logStep("Waiting for label", { messageId, labelName });
+
+  await pollUntil(
+    async () => {
+      const message = await provider.getMessage(messageId);
+      const hasLabel = message.labelIds?.some(
+        (id) => id.toLowerCase() === labelName.toLowerCase(),
+      );
+      return hasLabel ? true : null;
+    },
+    {
+      timeout,
+      description: `Label "${labelName}" on message ${messageId}`,
+    },
+  );
+}
+
+/**
+ * Wait for a message to appear in inbox (useful after sending)
+ */
+export async function waitForMessageInInbox(options: {
+  provider: EmailProvider;
+  subjectContains: string;
+  timeout?: number;
+}): Promise<{ messageId: string; threadId: string }> {
+  const {
+    provider,
+    subjectContains,
+    timeout = TIMEOUTS.EMAIL_DELIVERY,
+  } = options;
+
+  logStep("Waiting for message in inbox", { subjectContains });
+
+  return pollUntil(
+    async () => {
+      const messages = await provider.getInboxMessages(20);
+      const found = messages.find((msg) =>
+        msg.subject?.includes(subjectContains),
+      );
+
+      if (found?.id && found?.threadId) {
+        return {
+          messageId: found.id,
+          threadId: found.threadId,
+        };
+      }
+      return null;
+    },
+    {
+      timeout,
+      description: `Message with subject containing "${subjectContains}"`,
+    },
+  );
+}
+
+/**
+ * Wait for draft to be deleted (cleanup verification)
+ */
+export async function waitForDraftDeleted(options: {
+  draftId: string;
+  provider: EmailProvider;
+  timeout?: number;
+}): Promise<void> {
+  const { draftId, provider, timeout = TIMEOUTS.WEBHOOK_PROCESSING } = options;
+
+  logStep("Waiting for draft deletion", { draftId });
+
+  await pollUntil(
+    async () => {
+      try {
+        const draft = await provider.getDraft(draftId);
+        // Draft still exists
+        return draft === null ? true : null;
+      } catch {
+        // Draft not found = deleted
+        return true;
+      }
+    },
+    {
+      timeout,
+      description: `Draft ${draftId} to be deleted`,
+    },
+  );
+}
+
+/**
+ * Wait for DraftSendLog to be recorded
+ *
+ * DraftSendLog is linked to ExecutedAction via executedActionId.
+ * We find it by looking for logs related to actions on the given thread.
+ */
+export async function waitForDraftSendLog(options: {
+  threadId: string;
+  emailAccountId: string;
+  timeout?: number;
+}): Promise<{
+  id: string;
+  sentMessageId: string;
+  similarityScore: number;
+  draftId: string | null;
+  wasSentFromDraft: boolean | null;
+}> {
+  const {
+    threadId,
+    emailAccountId,
+    timeout = TIMEOUTS.WEBHOOK_PROCESSING,
+  } = options;
+
+  logStep("Waiting for DraftSendLog", { threadId, emailAccountId });
+
+  return pollUntil(
+    async () => {
+      // Find DraftSendLog via the ExecutedAction -> ExecutedRule chain
+      const log = await prisma.draftSendLog.findFirst({
+        where: {
+          executedAction: {
+            executedRule: {
+              threadId,
+              emailAccountId,
+            },
+          },
+        },
+        include: {
+          executedAction: {
+            select: {
+              draftId: true,
+              wasDraftSent: true,
+            },
+          },
+        },
+        orderBy: {
+          createdAt: "desc",
+        },
+      });
+
+      if (!log) return null;
+
+      return {
+        id: log.id,
+        sentMessageId: log.sentMessageId,
+        similarityScore: log.similarityScore,
+        draftId: log.executedAction.draftId,
+        wasSentFromDraft: log.executedAction.wasDraftSent,
+      };
+    },
+    {
+      timeout,
+      description: `DraftSendLog for thread ${threadId}`,
+    },
+  );
+}
diff --git a/apps/web/__tests__/e2e/flows/helpers/webhook.ts b/apps/web/__tests__/e2e/flows/helpers/webhook.ts
new file mode 100644
index 0000000000..269f59386d
--- /dev/null
+++ b/apps/web/__tests__/e2e/flows/helpers/webhook.ts
@@ -0,0 +1,154 @@
+/**
+ * Webhook subscription management for E2E flow tests
+ *
+ * Handles setting up and tearing down webhook subscriptions
+ * for test accounts to receive real webhook notifications.
+ */
+
+import prisma from "@/utils/prisma";
+import type { TestAccount } from "./accounts";
+import { logStep } from "./logging";
+import { createScopedLogger } from "@/utils/logger";
+
+const logger = createScopedLogger("e2e-webhook");
+
+/**
+ * Set up webhook subscription for a test account
+ *
+ * Note: This uses the existing watchEmails functionality which will:
+ * - For Gmail: Register with Google Pub/Sub
+ * - For Outlook: Create Microsoft Graph subscription
+ *
+ * The webhook URL is determined by environment configuration
+ * (NEXT_PUBLIC_BASE_URL or specific webhook URLs).
+ */
+export async function setupTestWebhookSubscription(
+  account: TestAccount,
+): Promise<{
+  subscriptionId?: string;
+  expirationDate?: Date;
+}> {
+  logStep("Setting up webhook subscription", {
+    email: account.email,
+    provider: account.provider,
+  });
+
+  try {
+    const result = await account.emailProvider.watchEmails();
+
+    if (result) {
+      logStep("Webhook subscription created", {
+        subscriptionId: result.subscriptionId,
+        expirationDate: result.expirationDate,
+      });
+
+      // Update database with subscription info
+      await prisma.emailAccount.update({
+        where: { id: account.id },
+        data: {
+          watchEmailsExpirationDate: result.expirationDate,
+          watchEmailsSubscriptionId: result.subscriptionId,
+        },
+      });
+
+      return {
+        subscriptionId: result.subscriptionId,
+        expirationDate: result.expirationDate,
+      };
+    }
+
+    logStep("Webhook subscription returned no result");
+    return {};
+  } catch (error) {
+    logger.error("Failed to set up webhook subscription", { error });
+    throw new Error(
+      `Failed to set up webhook subscription for ${account.email}: ${error}`,
+    );
+  }
+}
+
+/**
+ * Tear down webhook subscription for a test account
+ */
+export async function teardownTestWebhookSubscription(
+  account: TestAccount,
+): Promise<void> {
+  logStep("Tearing down webhook subscription", {
+    email: account.email,
+    provider: account.provider,
+  });
+
+  try {
+    // Get current subscription ID
+    const emailAccount = await prisma.emailAccount.findUnique({
+      where: { id: account.id },
+      select: { watchEmailsSubscriptionId: true },
+    });
+
+    await account.emailProvider.unwatchEmails(
+      emailAccount?.watchEmailsSubscriptionId || undefined,
+    );
+
+    // Clear subscription data in database
+    await prisma.emailAccount.update({
+      where: { id: account.id },
+      data: {
+        watchEmailsExpirationDate: null,
+        watchEmailsSubscriptionId: null,
+      },
+    });
+
+    logStep("Webhook subscription torn down");
+  } catch (error) {
+    // Log but don't throw - cleanup should be best effort
+    logger.warn("Error tearing down webhook subscription", { error });
+  }
+}
+
+/**
+ * Verify webhook subscription is active for an account
+ */
+export async function verifyWebhookSubscription(
+  account: TestAccount,
+): Promise<boolean> {
+  const emailAccount = await prisma.emailAccount.findUnique({
+    where: { id: account.id },
+    select: {
+      watchEmailsExpirationDate: true,
+      watchEmailsSubscriptionId: true,
+    },
+  });
+
+  if (!emailAccount?.watchEmailsExpirationDate) {
+    return false;
+  }
+
+  // Check if subscription has expired
+  const isActive =
+    new Date(emailAccount.watchEmailsExpirationDate) > new Date();
+
+  logStep("Webhook subscription status", {
+    email: account.email,
+    isActive,
+    expirationDate: emailAccount.watchEmailsExpirationDate,
+    subscriptionId: emailAccount.watchEmailsSubscriptionId,
+  });
+
+  return isActive;
+}
+
+/**
+ * Ensure webhook subscription is active, creating if needed
+ */
+export async function ensureWebhookSubscription(
+  account: TestAccount,
+): Promise<void> {
+  const isActive = await verifyWebhookSubscription(account);
+
+  if (!isActive) {
+    logStep("Webhook subscription not active, setting up");
+    await setupTestWebhookSubscription(account);
+  } else {
+    logStep("Webhook subscription already active");
+  }
+}
diff --git a/apps/web/__tests__/e2e/flows/outbound-tracking.test.ts b/apps/web/__tests__/e2e/flows/outbound-tracking.test.ts
new file mode 100644
index 0000000000..eca7023152
--- /dev/null
+++ b/apps/web/__tests__/e2e/flows/outbound-tracking.test.ts
@@ -0,0 +1,239 @@
+/**
+ * E2E Flow Test: Outbound Message Tracking
+ *
+ * Tests that sent messages trigger correct outbound handling:
+ * - SENT folder webhook triggers processing
+ * - Reply tracking is updated
+ * - No duplicate rule execution
+ *
+ * Usage:
+ * RUN_E2E_FLOW_TESTS=true pnpm test-e2e outbound-tracking
+ */
+
+import { describe, test, expect, beforeAll, afterEach } from "vitest";
+import prisma from "@/utils/prisma";
+import { shouldRunFlowTests, TIMEOUTS, getTestSubjectPrefix } from "./config";
+import { initializeFlowTests, setupFlowTest } from "./setup";
+import { generateTestSummary } from "./teardown";
+import { sendTestEmail, sendTestReply } from "./helpers/email";
+import { waitForMessageInInbox } from "./helpers/polling";
+import { logStep, clearLogs } from "./helpers/logging";
+import type { TestAccount } from "./helpers/accounts";
+
+describe.skipIf(!shouldRunFlowTests())("Outbound Message Tracking", () => {
+  let gmail: TestAccount;
+  let outlook: TestAccount;
+  let testStartTime: number;
+
+  beforeAll(async () => {
+    await initializeFlowTests();
+    const accounts = await setupFlowTest();
+    gmail = accounts.gmail;
+    outlook = accounts.outlook;
+  }, TIMEOUTS.TEST_DEFAULT);
+
+  afterEach(async () => {
+    generateTestSummary("Outbound Tracking", testStartTime);
+    clearLogs();
+  });
+
+  test(
+    "should track outbound message when user sends email",
+    async () => {
+      testStartTime = Date.now();
+
+      // ========================================
+      // Step 1: Receive an email first (to have a thread)
+      // ========================================
+      logStep("Step 1: Setting up thread with incoming email");
+
+      const incomingEmail = await sendTestEmail({
+        from: gmail,
+        to: outlook,
+        subject: "Outbound tracking test",
+        body: "Please respond to this email.",
+      });
+
+      const receivedMessage = await waitForMessageInInbox({
+        provider: outlook.emailProvider,
+        subjectContains: getTestSubjectPrefix(),
+        timeout: TIMEOUTS.EMAIL_DELIVERY,
+      });
+
+      logStep("Email received in Outlook", {
+        messageId: receivedMessage.messageId,
+        threadId: receivedMessage.threadId,
+      });
+
+      // ========================================
+      // Step 2: Send reply from Outlook (outbound message)
+      // ========================================
+      logStep("Step 2: Sending outbound reply from Outlook");
+
+      const sentReply = await sendTestReply({
+        from: outlook,
+        to: gmail,
+        threadId: receivedMessage.threadId,
+        originalMessageId: receivedMessage.messageId,
+        body: "Here is my response to your email.",
+      });
+
+      logStep("Outbound reply sent", {
+        messageId: sentReply.messageId,
+        threadId: sentReply.threadId,
+      });
+
+      // ========================================
+      // Step 3: Wait for outbound handling to process
+      // ========================================
+      logStep("Step 3: Waiting for outbound handling");
+
+      // Check that the sent message was detected
+      // The handleOutboundMessage function should have been called
+
+      // Wait a bit for async processing
+      await new Promise((resolve) => setTimeout(resolve, 5000));
+
+      // ========================================
+      // Step 4: Verify Gmail receives the reply
+      // ========================================
+      logStep("Step 4: Verifying Gmail receives reply");
+
+      const gmailReceived = await waitForMessageInInbox({
+        provider: gmail.emailProvider,
+        subjectContains: "Outbound tracking test",
+        timeout: TIMEOUTS.EMAIL_DELIVERY,
+      });
+
+      expect(gmailReceived.threadId).toBe(incomingEmail.threadId);
+
+      logStep("Reply received in Gmail, thread continuity verified");
+    },
+    TIMEOUTS.FULL_CYCLE,
+  );
+
+  test(
+    "should not create duplicate ExecutedRule for outbound messages",
+    async () => {
+      testStartTime = Date.now();
+
+      // ========================================
+      // Setup: Create a thread
+      // ========================================
+      logStep("Setting up thread");
+
+      await sendTestEmail({
+        from: gmail,
+        to: outlook,
+        subject: "No duplicate test",
+        body: "Testing no duplicate processing.",
+      });
+
+      const receivedMessage = await waitForMessageInInbox({
+        provider: outlook.emailProvider,
+        subjectContains: getTestSubjectPrefix(),
+        timeout: TIMEOUTS.EMAIL_DELIVERY,
+      });
+
+      // ========================================
+      // Send outbound message
+      // ========================================
+      logStep("Sending outbound message");
+
+      const reply = await sendTestReply({
+        from: outlook,
+        to: gmail,
+        threadId: receivedMessage.threadId,
+        originalMessageId: receivedMessage.messageId,
+        body: "This is a manual reply.",
+      });
+
+      // Wait for processing
+      await new Promise((resolve) => setTimeout(resolve, 10_000));
+
+      // ========================================
+      // Verify no ExecutedRule was created for the outbound message
+      // ========================================
+      logStep("Verifying no ExecutedRule for outbound message");
+
+      const executedRulesForSent = await prisma.executedRule.findMany({
+        where: {
+          emailAccountId: outlook.id,
+          messageId: reply.messageId,
+        },
+      });
+
+      // Outbound messages should not trigger rule execution
+      expect(executedRulesForSent).toHaveLength(0);
+
+      logStep("ExecutedRules for outbound message", {
+        count: executedRulesForSent.length,
+      });
+    },
+    TIMEOUTS.TEST_DEFAULT,
+  );
+
+  test(
+    "should update reply tracking when reply is sent",
+    async () => {
+      testStartTime = Date.now();
+
+      // ========================================
+      // Setup: Create incoming email
+      // ========================================
+      logStep("Setting up incoming email");
+
+      await sendTestEmail({
+        from: gmail,
+        to: outlook,
+        subject: "Reply tracking update test",
+        body: "Please let me know your thoughts.",
+      });
+
+      const receivedMessage = await waitForMessageInInbox({
+        provider: outlook.emailProvider,
+        subjectContains: getTestSubjectPrefix(),
+        timeout: TIMEOUTS.EMAIL_DELIVERY,
+      });
+
+      // ========================================
+      // Send reply
+      // ========================================
+      logStep("Sending reply");
+
+      await sendTestReply({
+        from: outlook,
+        to: gmail,
+        threadId: receivedMessage.threadId,
+        originalMessageId: receivedMessage.messageId,
+        body: "Here are my thoughts on this matter.",
+      });
+
+      // ========================================
+      // Wait for reply tracking to update
+      // ========================================
+      logStep("Waiting for reply tracking update");
+
+      // Check ThreadTracker for reply tracking
+      await new Promise((resolve) => setTimeout(resolve, 10_000));
+
+      // Verify the thread is now marked as "replied to"
+      const threadTracker = await prisma.threadTracker.findFirst({
+        where: {
+          emailAccountId: outlook.id,
+          threadId: receivedMessage.threadId,
+        },
+      });
+
+      // Thread tracker should exist and be marked as resolved after reply
+      expect(threadTracker).toBeDefined();
+      expect(threadTracker?.resolved).toBe(true);
+
+      logStep("Reply tracking found", {
+        resolved: threadTracker?.resolved,
+        type: threadTracker?.type,
+      });
+    },
+    TIMEOUTS.TEST_DEFAULT,
+  );
+});
diff --git a/apps/web/__tests__/e2e/flows/setup.ts b/apps/web/__tests__/e2e/flows/setup.ts
new file mode 100644
index 0000000000..75baa48388
--- /dev/null
+++ b/apps/web/__tests__/e2e/flows/setup.ts
@@ -0,0 +1,100 @@
+/**
+ * Global setup for E2E flow tests
+ *
+ * This file is run once before all flow tests.
+ * It sets up webhook subscriptions and validates configuration.
+ */
+
+import { vi } from "vitest";
+import {
+  validateConfig,
+  E2E_RUN_ID,
+  E2E_GMAIL_EMAIL,
+  E2E_OUTLOOK_EMAIL,
+} from "./config";
+import {
+  getGmailTestAccount,
+  getOutlookTestAccount,
+  ensureTestPremium,
+  ensureTestRules,
+} from "./helpers/accounts";
+import { ensureWebhookSubscription } from "./helpers/webhook";
+import { logStep } from "./helpers/logging";
+
+// Mock server-only module (Next.js specific)
+vi.mock("server-only", () => ({}));
+
+// Mock message processing lock to always succeed
+vi.mock("@/utils/redis/message-processing", () => ({
+  markMessageAsProcessing: vi.fn().mockResolvedValue(true),
+}));
+
+// Mock Next.js after() to run immediately in tests
+// This ensures webhook processing completes before assertions
+vi.mock("next/server", async () => {
+  const actual =
+    await vi.importActual<typeof import("next/server")>("next/server");
+  return {
+    ...actual,
+    after: async (fn: () => void | Promise<void>) => {
+      // Run the async function and wait for it
+      await fn();
+    },
+  };
+});
+
+/**
+ * Initialize test environment
+ *
+ * Call this in beforeAll of your test suites
+ */
+export async function initializeFlowTests(): Promise<void> {
+  logStep("=== E2E Flow Tests Initialization ===");
+  logStep("Run ID", { runId: E2E_RUN_ID });
+
+  // Validate configuration
+  const configValidation = validateConfig();
+  if (!configValidation.valid) {
+    throw new Error(
+      `Invalid E2E test configuration:\n${configValidation.errors.join("\n")}`,
+    );
+  }
+
+  logStep("Configuration validated", {
+    gmailEmail: E2E_GMAIL_EMAIL,
+    outlookEmail: E2E_OUTLOOK_EMAIL,
+  });
+
+  // Load test accounts
+  const gmail = await getGmailTestAccount();
+  const outlook = await getOutlookTestAccount();
+
+  // Ensure premium status for AI features
+  await ensureTestPremium(gmail.userId);
+  await ensureTestPremium(outlook.userId);
+
+  // Ensure rules exist for AI processing
+  await ensureTestRules(gmail.id);
+  await ensureTestRules(outlook.id);
+
+  // Set up webhook subscriptions
+  await ensureWebhookSubscription(gmail);
+  await ensureWebhookSubscription(outlook);
+
+  logStep("=== Initialization Complete ===");
+}
+
+/**
+ * Setup for individual test files
+ *
+ * Returns the test accounts ready for use
+ */
+export async function setupFlowTest(): Promise<{
+  gmail: Awaited<ReturnType<typeof getGmailTestAccount>>;
+  outlook: Awaited<ReturnType<typeof getOutlookTestAccount>>;
+}> {
+  const gmail = await getGmailTestAccount();
+  const outlook = await getOutlookTestAccount();
+
+  return { gmail, outlook };
+}
diff --git a/apps/web/__tests__/e2e/flows/teardown.ts b/apps/web/__tests__/e2e/flows/teardown.ts
new file mode 100644
index 0000000000..1a00be3eb9
--- /dev/null
+++ b/apps/web/__tests__/e2e/flows/teardown.ts
@@ -0,0 +1,111 @@
+/**
+ * Global teardown for E2E flow tests
+ *
+ * This file provides cleanup functions for flow tests.
+ */
+
+import { getTestSubjectPrefix } from "./config";
+import {
+  getGmailTestAccount,
+  getOutlookTestAccount,
+  clearAccountCache,
+} from "./helpers/accounts";
+import { cleanupTestEmails } from "./helpers/email";
+import {
+  clearLogs,
+  logStep,
+  logTestSummary,
+  getWebhookLog,
+  getApiCallLog,
+} from "./helpers/logging";
+
+/**
+ * Clean up test artifacts after a test run
+ *
+ * Options:
+ * - keepOnFailure: If true, skip cleanup when test failed (for debugging)
+ */
+export async function cleanupFlowTest(options: {
+  testPassed: boolean;
+  keepOnFailure?: boolean;
+}): Promise<void> {
+  const { testPassed, keepOnFailure = true } = options;
+
+  if (!testPassed && keepOnFailure) {
+    logStep("Skipping cleanup - test failed and keepOnFailure is enabled");
+    return;
+  }
+
+  logStep("Cleaning up test artifacts");
+
+  try {
+    const gmail = await getGmailTestAccount();
+    const outlook = await getOutlookTestAccount();
+
+    const prefix = getTestSubjectPrefix();
+
+    // Clean up test emails in both accounts
+    await Promise.all([
+      cleanupTestEmails({
+        provider: gmail.emailProvider,
+        subjectPrefix: prefix,
+        markAsRead: true,
+      }),
+      cleanupTestEmails({
+        provider: outlook.emailProvider,
+        subjectPrefix: prefix,
+        markAsRead: true,
+      }),
+    ]);
+
+    logStep("Cleanup complete");
+  } catch (error) {
+    // Log but don't throw - cleanup is best effort
+    logStep("Error during cleanup", { error: String(error) });
+  }
+}
+
+/**
+ * Full teardown - call when completely done with all tests
+ */
+export async function teardownFlowTests(): Promise<void> {
+  logStep("=== E2E Flow Tests Teardown ===");
+
+  try {
+    // Load accounts to ensure they're initialized before cleanup
+    // (needed if we want to add webhook teardown later)
+    await getGmailTestAccount();
+    await getOutlookTestAccount();
+
+    // Clear account cache
+    clearAccountCache();
+
+    // Clear logs
+    clearLogs();
+
+    logStep("=== Teardown Complete ===");
+  } catch (error) {
+    logStep("Error during teardown", { error: String(error) });
+  }
+}
+
+/**
+ * Generate test summary with timing and stats
+ */
+export function generateTestSummary(
+  testName: string,
+  startTime: number,
+  error?: Error,
+): void {
+  const duration = Date.now() - startTime;
+  const webhooks = getWebhookLog();
+  const apiCalls = getApiCallLog();
+
+  logTestSummary(testName, {
+    passed: !error,
+    duration,
+    webhooksReceived: webhooks.length,
+    apiCalls: apiCalls.length,
+    error: error?.message,
+  });
+}
diff --git a/apps/web/package.json b/apps/web/package.json
index e6dd462d46..a09885d941 100644
--- a/apps/web/package.json
+++ b/apps/web/package.json
@@ -4,6 +4,7 @@
   "private": true,
   "scripts": {
     "dev": "cross-env NODE_OPTIONS=--max_old_space_size=16384 next dev --turbopack",
+    "dev:e2e": "dotenv -e .env.e2e -- cross-env NODE_OPTIONS=--max_old_space_size=16384 next dev --turbopack",
     "build": "cross-env NODE_OPTIONS=--max_old_space_size=16384 prisma migrate deploy && next build",
     "start": "next start",
     "start:standalone": "node .next/standalone/server.js",
@@ -12,6 +13,8 @@
     "test": "cross-env RUN_AI_TESTS=false vitest",
     "test-ai": "cross-env RUN_AI_TESTS=true vitest --run",
     "test-e2e": "cross-env RUN_E2E_TESTS=true vitest --run",
+    "test-e2e:flows": "cross-env RUN_E2E_FLOW_TESTS=true vitest --run --dir __tests__/e2e/flows",
+    "prisma:migrate:e2e": "dotenv -e .env.e2e -- prisma migrate deploy",
     "preinstall": "npx only-allow pnpm",
     "postinstall": "prisma generate"
   },
@@ -196,6 +199,7 @@
     "autoprefixer": "10.4.22",
     "cross-env": "10.1.0",
     "dotenv": "17.2.3",
+    "dotenv-cli": "11.0.0",
     "postcss": "8.5.6",
     "serwist": "9.4.2",
     "tailwindcss": "3.4.17",
diff --git a/apps/web/vitest.config.mts b/apps/web/vitest.config.mts
index 01435a0757..00ecd3718c 100644
--- a/apps/web/vitest.config.mts
+++ b/apps/web/vitest.config.mts
@@ -2,13 +2,16 @@ import { config } from "dotenv";
 import { defineConfig } from "vitest/config";
 import tsconfigPaths from "vite-tsconfig-paths";
 
+const isE2E = process.env.RUN_E2E_FLOW_TESTS === "true";
+const envFile = isE2E ? "./.env.e2e" : "./.env.test";
+
 export default defineConfig({
   plugins: [tsconfigPaths()],
   test: {
     environment: "node",
     setupFiles: ["./__tests__/setup.ts"],
     env: {
-      ...config({ path: "./.env.test" }).parsed,
+      ...config({ path: envFile }).parsed,
     },
   },
 });
diff --git a/package.json b/package.json
index 8e4c0b69f5..afe56fbb86 100644
--- a/package.json
+++ b/package.json
@@ -11,6 +11,7 @@
     "check": "ultracite check",
     "fix": "ultracite fix",
     "setup": "tsx packages/cli/src/main.ts setup",
+    "start:cli": "tsx packages/cli/src/main.ts start",
     "docker:local:build": "./docker/scripts/publish-ghcr.sh --local",
     "docker:local:push": "./docker/scripts/publish-ghcr.sh",
     "docker:local:run": "./docker/scripts/run-local.sh"
diff --git a/packages/cli/src/main.ts b/packages/cli/src/main.ts
index ce0d544455..154f0e8d28 100644
--- a/packages/cli/src/main.ts
+++ b/packages/cli/src/main.ts
@@ -72,6 +72,7 @@ async function main() {
   program
     .command("setup")
     .description("Interactive setup for Inbox Zero")
+    .option("-n, --name <name>", "Configuration name (creates .env.<name>)")
     .action(runSetup);
 
   program
@@ -114,8 +115,9 @@ async function main() {
 // Setup Command
 // ═══════════════════════════════════════════════════════════════════════════
 
-async function runSetup() {
-  p.intro("🚀 Inbox Zero Setup");
+async function runSetup(options: { name?: string }) {
+  const configName = options.name;
+  p.intro(`🚀 Inbox Zero Setup${configName ? ` (${configName})` : ""}`);
 
   // Ask about environment mode
   const envMode = await p.select({
@@ -213,9 +215,10 @@ async function runSetup() {
 
   // Determine paths - if in repo, write to apps/web/.env, otherwise use standalone
   const configDir = REPO_ROOT ?? STANDALONE_CONFIG_DIR;
+  const envFileName = configName ? `.env.${configName}` : ".env";
   const envFile = REPO_ROOT
-    ? resolve(REPO_ROOT, "apps/web/.env")
-    : STANDALONE_ENV_FILE;
+    ? resolve(REPO_ROOT, "apps/web", envFileName)
+    : resolve(STANDALONE_CONFIG_DIR, envFileName);
   const composeFile = REPO_ROOT
     ? resolve(REPO_ROOT, "docker-compose.yml")
     : STANDALONE_COMPOSE_FILE;
diff --git a/pnpm-lock.yaml b/pnpm-lock.yaml
index 92144def5b..9279ec9425 100644
--- a/pnpm-lock.yaml
+++ b/pnpm-lock.yaml
@@ -632,6 +632,9 @@ importers:
       dotenv:
         specifier: 17.2.3
         version: 17.2.3
+      dotenv-cli:
+        specifier: 11.0.0
+        version: 11.0.0
       postcss:
         specifier: 8.5.6
         version: 8.5.6
@@ -7791,6 +7794,14 @@ packages:
     resolution: {integrity: sha512-QM8q3zDe58hqUqjraQOmzZ1LIH9SWQJTlEKCH4kJ2oQvLZk7RbQXvtDM2XEq3fwkV9CCvvH4LA0AV+ogFsBM2Q==}
     engines: {node: '>=8'}
 
+  dotenv-cli@11.0.0:
+    resolution: {integrity: sha512-r5pA8idbk7GFWuHEU7trSTflWcdBpQEK+Aw17UrSHjS6CReuhrrPcyC3zcQBPQvhArRHnBo/h6eLH1fkCvNlww==}
+    hasBin: true
+
+  dotenv-expand@12.0.3:
+    resolution: {integrity: sha512-uc47g4b+4k/M/SeaW1y4OApx+mtLWl92l5LMPP0GNXctZqELk+YGgOPIIC5elYmUH4OuoK3JLhuRUYegeySiFA==}
+    engines: {node: '>=12'}
+
   dotenv@16.6.1:
     resolution: {integrity: sha512-uBq4egWHTcTt33a72vpSG0z3HnPuIl6NqYcTrKEg2azoEyl2hpW0zqlxysq2pK9HlDIHyHyakeYaYnSAwd8bow==}
     engines: {node: '>=12'}
@@ -21349,6 +21360,17 @@ snapshots:
     dependencies:
       is-obj: 2.0.0
 
+  dotenv-cli@11.0.0:
+    dependencies:
+      cross-spawn: 7.0.6
+      dotenv: 17.2.3
+      dotenv-expand: 12.0.3
+      minimist: 1.2.8
+
+  dotenv-expand@12.0.3:
+    dependencies:
+      dotenv: 16.6.1
+
   dotenv@16.6.1: {}
 
   dotenv@17.2.3: {}