storybookjs · yannbf · May 5, 2026 · May 4, 2026 · May 5, 2026 · May 5, 2026
diff --git a/code/addons/vitest/src/vitest-plugin/index.ts b/code/addons/vitest/src/vitest-plugin/index.ts
@@ -464,8 +464,15 @@ export const storybookTest = async (options?: UserOptions): Promise<Plugin[]> =>
 
       if (isTelemetryModuleEnabled()) {
         // When an agent is running vitest via CLI, inject a reporter that sends
-        // detailed test result telemetry (pass/fail, error analysis, empty renders)
-        if (agent && withinAgenticSetupSession) {
+        // detailed test result telemetry (pass/fail, error analysis, empty renders).
+        //
+        // STORYBOOK_INTERNAL_TEST_RUN is set by the dev server when it spawns
+        // vitest internally (ghost-stories, ai-setup-final-scoring). Those runs
+        // are not part of the agent's iterative self-healing loop, so we skip
+        // installing the reporter to avoid emitting `ai-setup-self-healing-scoring`
+        // events whose results would misleadingly attribute ghost-stories /
+        // final-scoring outcomes to the self-healing loop.
+        if (agent && withinAgenticSetupSession && !process.env.STORYBOOK_INTERNAL_TEST_RUN) {
           context.vitest.config.reporters.push(
             new AgentTelemetryReporter({
               configDir: finalOptions.configDir,

diff --git a/code/core/src/core-server/server-channel/ghost-stories-channel.test.ts b/code/core/src/core-server/server-channel/ghost-stories-channel.test.ts
@@ -197,6 +197,7 @@ describe('ghostStoriesChannel', () => {
         ],
         stdio: 'pipe',
         env: {
+          STORYBOOK_INTERNAL_TEST_RUN: '1',
           STORYBOOK_COMPONENT_PATHS: 'component1.tsx;component2.tsx',
         },
       } as any);
@@ -221,14 +222,6 @@ describe('ghostStoriesChannel', () => {
           runCssCheck: 'not-run',
           runUniqueErrorCount: 0,
           runPassedButEmptyRender: 0,
-          cumulativeTotal: 2,
-          cumulativePassed: 2,
-          cumulativeSuccessRate: 1,
-          cumulativeSuccessRateWithoutEmptyRender: 1,
-          cumulativeCategorizedErrors: expect.any(Object),
-          cumulativeCssCheck: 'not-run',
-          cumulativeUniqueErrorCount: 0,
-          cumulativePassedButEmptyRender: 0,
         },
       });
     });
@@ -303,6 +296,7 @@ describe('ghostStoriesChannel', () => {
         ],
         stdio: 'pipe',
         env: {
+          STORYBOOK_INTERNAL_TEST_RUN: '1',
           STORYBOOK_COMPONENT_PATHS: 'component1.tsx;component2.tsx',
         },
       } as any);

diff --git a/code/core/src/core-server/utils/ghost-stories/parse-vitest-report.test.ts b/code/core/src/core-server/utils/ghost-stories/parse-vitest-report.test.ts
@@ -50,14 +50,6 @@ describe('parse-vitest-report', () => {
         runUniqueErrorCount: 0,
         runCategorizedErrors: {},
         runCssCheck: 'not-run',
-        cumulativeTotal: 3,
-        cumulativePassed: 3,
-        cumulativePassedButEmptyRender: 0,
-        cumulativeSuccessRate: 1.0,
-        cumulativeSuccessRateWithoutEmptyRender: 1.0,
-        cumulativeUniqueErrorCount: 0,
-        cumulativeCategorizedErrors: {},
-        cumulativeCssCheck: 'not-run',
       });
     });
 

diff --git a/code/core/src/core-server/utils/ghost-stories/run-story-tests.ts b/code/core/src/core-server/utils/ghost-stories/run-story-tests.ts
@@ -34,7 +34,14 @@ export async function runStoryTests(
     let testFailureMessage;
 
     try {
-      // Execute the test runner command with specific story files
+      // Execute the test runner command with specific story files.
+      //
+      // STORYBOOK_INTERNAL_TEST_RUN marks this as a dev-server-initiated run
+      // (ghost-stories or ai-setup-final-scoring) so the vitest plugin can
+      // skip telemetry meant for agent-driven external runs. Without this,
+      // ghost-stories runs would trigger `ai-setup-self-healing-scoring`
+      // events whose results have nothing to do with the agent's iterative
+      // self-healing loop.
       const testProcess = executeCommand({
         command: 'npx',
         args: [
@@ -47,9 +54,10 @@ export async function runStoryTests(
         ],
         cwd,
         stdio: 'pipe',
-        ...(options?.ghostRun
-          ? { env: { STORYBOOK_COMPONENT_PATHS: componentFilePaths.join(';') } }
-          : {}),
+        env: {
+          STORYBOOK_INTERNAL_TEST_RUN: '1',
+          ...(options?.ghostRun ? { STORYBOOK_COMPONENT_PATHS: componentFilePaths.join(';') } : {}),
+        },
       });
 
       await testProcess;

diff --git a/code/core/src/shared/utils/analyze-test-results.test.ts b/code/core/src/shared/utils/analyze-test-results.test.ts
@@ -78,14 +78,6 @@ describe('analyze-test-results', () => {
         runUniqueErrorCount: 0,
         runCategorizedErrors: {},
         runCssCheck: 'not-run',
-        cumulativeTotal: 3,
-        cumulativePassed: 3,
-        cumulativePassedButEmptyRender: 0,
-        cumulativeSuccessRate: 1.0,
-        cumulativeSuccessRateWithoutEmptyRender: 1.0,
-        cumulativeUniqueErrorCount: 0,
-        cumulativeCategorizedErrors: {},
-        cumulativeCssCheck: 'not-run',
       });
     });
 
@@ -119,7 +111,7 @@ describe('analyze-test-results', () => {
       expect(analysis.runTotal).toBe(0);
       expect(analysis.runSuccessRate).toBe(0);
       expect(analysis.runSuccessRateWithoutEmptyRender).toBe(0);
-      expect(analysis.cumulativeTotal).toBe(0);
+      expect(analysis.cumulativeTotal).toBeUndefined();
     });
 
     it('should handle PENDING tests by not counting them as passed', () => {
@@ -134,16 +126,18 @@ describe('analyze-test-results', () => {
     });
 
     describe('cumulative stats', () => {
-      it('mirrors run stats when no cumulative results are provided', () => {
+      it('omits all cumulative fields when no cumulative results are provided', () => {
         const results: StoryTestResult[] = [
           { storyId: 's1', status: 'PASS' },
           { storyId: 's2', status: 'FAIL', error: 'oops' },
         ];
         const analysis = analyzeTestResults(results);
-        expect(analysis.cumulativeTotal).toBe(analysis.runTotal);
-        expect(analysis.cumulativePassed).toBe(analysis.runPassed);
-        expect(analysis.cumulativeSuccessRate).toBe(analysis.runSuccessRate);
-        expect(analysis.cumulativeUniqueErrorCount).toBe(analysis.runUniqueErrorCount);
+        expect(analysis.cumulativeTotal).toBeUndefined();
+        expect(analysis.cumulativePassed).toBeUndefined();
+        expect(analysis.cumulativeSuccessRate).toBeUndefined();
+        expect(analysis.cumulativeUniqueErrorCount).toBeUndefined();
+        expect(analysis.cumulativeCategorizedErrors).toBeUndefined();
+        expect(analysis.cumulativeCssCheck).toBeUndefined();
-      it('omits all cumulative fields when no cumulative results are provided', () => {
-        const results: StoryTestResult[] = [
-          { storyId: 's1', status: 'PASS' },
-          { storyId: 's2', status: 'FAIL', error: 'oops' },
-        ];
-        const analysis = analyzeTestResults(results);
-        expect(analysis.cumulativeTotal).toBe(analysis.runTotal);
-        expect(analysis.cumulativePassed).toBe(analysis.runPassed);
-        expect(analysis.cumulativeSuccessRate).toBe(analysis.runSuccessRate);
-        expect(analysis.cumulativeUniqueErrorCount).toBe(analysis.runUniqueErrorCount);
-        expect(analysis.cumulativeTotal).toBeUndefined();
-        expect(analysis.cumulativePassed).toBeUndefined();
-        expect(analysis.cumulativeSuccessRate).toBeUndefined();
-        expect(analysis.cumulativeUniqueErrorCount).toBeUndefined();
-        expect(analysis.cumulativeCategorizedErrors).toBeUndefined();
-        expect(analysis.cumulativeCssCheck).toBeUndefined();
+      it('omits all cumulative fields when no cumulative results are provided', () => {
+        const results: StoryTestResult[] = [
+          { storyId: 's1', status: 'PASS' },
+          { storyId: 's2', status: 'FAIL', error: 'oops' },
+        ];
+        const analysis = analyzeTestResults(results);
+        expect(analysis.cumulativeTotal).toBeUndefined();
+        expect(analysis.cumulativePassed).toBeUndefined();
+        expect(analysis.cumulativePassedButEmptyRender).toBeUndefined();
+        expect(analysis.cumulativeSuccessRate).toBeUndefined();
+        expect(analysis.cumulativeSuccessRateWithoutEmptyRender).toBeUndefined();
+        expect(analysis.cumulativeUniqueErrorCount).toBeUndefined();
+        expect(analysis.cumulativeCategorizedErrors).toBeUndefined();
+        expect(analysis.cumulativeCssCheck).toBeUndefined();
-      it('omits all cumulative fields when no cumulative results are provided', () => {
-        const results: StoryTestResult[] = [
-          { storyId: 's1', status: 'PASS' },
-          { storyId: 's2', status: 'FAIL', error: 'oops' },
-        ];
-        const analysis = analyzeTestResults(results);
-        expect(analysis.cumulativeTotal).toBe(analysis.runTotal);
-        expect(analysis.cumulativePassed).toBe(analysis.runPassed);
-        expect(analysis.cumulativeSuccessRate).toBe(analysis.runSuccessRate);
-        expect(analysis.cumulativeUniqueErrorCount).toBe(analysis.runUniqueErrorCount);
-        expect(analysis.cumulativeTotal).toBeUndefined();
-        expect(analysis.cumulativePassed).toBeUndefined();
-        expect(analysis.cumulativeSuccessRate).toBeUndefined();
-        expect(analysis.cumulativeUniqueErrorCount).toBeUndefined();
-        expect(analysis.cumulativeCategorizedErrors).toBeUndefined();
-        expect(analysis.cumulativeCssCheck).toBeUndefined();
+      it('omits all cumulative fields when no cumulative results are provided', () => {
+        const results: StoryTestResult[] = [
+          { storyId: 's1', status: 'PASS' },
+          { storyId: 's2', status: 'FAIL', error: 'oops' },
+        ];
+        const analysis = analyzeTestResults(results);
+        expect(analysis.cumulativeTotal).toBeUndefined();
+        expect(analysis.cumulativePassed).toBeUndefined();
+        expect(analysis.cumulativePassedButEmptyRender).toBeUndefined();
+        expect(analysis.cumulativeSuccessRate).toBeUndefined();
+        expect(analysis.cumulativeSuccessRateWithoutEmptyRender).toBeUndefined();
+        expect(analysis.cumulativeUniqueErrorCount).toBeUndefined();
+        expect(analysis.cumulativeCategorizedErrors).toBeUndefined();
+        expect(analysis.cumulativeCssCheck).toBeUndefined();
       });
 
       it('reports cumulative stats independently when provided', () => {

diff --git a/code/core/src/shared/utils/analyze-test-results.ts b/code/core/src/shared/utils/analyze-test-results.ts
@@ -117,17 +117,17 @@ function summarizeResults(results: StoryTestResult[]): ResultSummary {
  * rates, empty render detection, and categorized errors.
  *
  * @param results Story results from the current run.
- * @param cumulativeResults Optional aggregated results across all runs (latest outcome per story).
- *   When omitted, cumulative stats mirror the run stats.
+ * @param cumulativeResults Optional aggregated results across runs (latest outcome per story).
+ *   Only the agent self-healing flow tracks history and passes this; when omitted the returned
+ *   analysis only contains `run*` fields and no `cumulative*` fields are emitted.
  */
 export function analyzeTestResults(
   results: StoryTestResult[],
   cumulativeResults?: StoryTestResult[]
 ): TestRunAnalysis {
   const run = summarizeResults(results);
-  const cumulative = cumulativeResults ? summarizeResults(cumulativeResults) : run;
 
-  return {
+  const analysis: TestRunAnalysis = {
     runTotal: run.total,
     runPassed: run.passed,
     runPassedButEmptyRender: run.passedButEmptyRender,
@@ -136,14 +136,19 @@ export function analyzeTestResults(
     runUniqueErrorCount: run.uniqueErrorCount,
     runCategorizedErrors: run.categorizedErrors,
     runCssCheck: run.cssCheck,
-
-    cumulativeTotal: cumulative.total,
-    cumulativePassed: cumulative.passed,
-    cumulativePassedButEmptyRender: cumulative.passedButEmptyRender,
-    cumulativeSuccessRate: cumulative.successRate,
-    cumulativeSuccessRateWithoutEmptyRender: cumulative.successRateWithoutEmptyRender,
-    cumulativeUniqueErrorCount: cumulative.uniqueErrorCount,
-    cumulativeCategorizedErrors: cumulative.categorizedErrors,
-    cumulativeCssCheck: cumulative.cssCheck,
   };
+
+  if (cumulativeResults) {
+    const cumulative = summarizeResults(cumulativeResults);
+    analysis.cumulativeTotal = cumulative.total;
+    analysis.cumulativePassed = cumulative.passed;
+    analysis.cumulativePassedButEmptyRender = cumulative.passedButEmptyRender;
+    analysis.cumulativeSuccessRate = cumulative.successRate;
+    analysis.cumulativeSuccessRateWithoutEmptyRender = cumulative.successRateWithoutEmptyRender;
+    analysis.cumulativeUniqueErrorCount = cumulative.uniqueErrorCount;
+    analysis.cumulativeCategorizedErrors = cumulative.categorizedErrors;
+    analysis.cumulativeCssCheck = cumulative.cssCheck;
+  }
+
+  return analysis;
 }
diff --git a/code/core/src/shared/utils/test-result-types.ts b/code/core/src/shared/utils/test-result-types.ts
@@ -61,15 +61,16 @@ export interface TestRunAnalysis {
 
   /**
    * Stats accumulated across runs: for every story we've ever seen, we
-   * keep the most recent outcome (by timestamp). When no history is
-   * available these mirror the `run*` fields.
+   * keep the most recent outcome (by timestamp). Only emitted by the
+   * agent self-healing flow, which is the only consumer that persists
+   * a per-story history in the Storybook cache.
    */
-  cumulativeTotal: number;
-  cumulativePassed: number;
-  cumulativePassedButEmptyRender: number;
-  cumulativeSuccessRate: number;
-  cumulativeSuccessRateWithoutEmptyRender: number;
-  cumulativeUniqueErrorCount: number;
-  cumulativeCategorizedErrors: Record<string, CategorizedError>;
-  cumulativeCssCheck: CssCheckOutcome;
+  cumulativeTotal?: number;
+  cumulativePassed?: number;
+  cumulativePassedButEmptyRender?: number;
+  cumulativeSuccessRate?: number;
+  cumulativeSuccessRateWithoutEmptyRender?: number;
+  cumulativeUniqueErrorCount?: number;
+  cumulativeCategorizedErrors?: Record<string, CategorizedError>;
+  cumulativeCssCheck?: CssCheckOutcome;
 }
diff --git a/code/lib/cli-storybook/src/ai/index.ts b/code/lib/cli-storybook/src/ai/index.ts
@@ -39,7 +39,7 @@ export async function aiSetup(options: AiSetupOptions): Promise<void> {
     const detectedLanguage = await projectTypeService.detectLanguage();
     const language = detectedLanguage === SupportedLanguage.TYPESCRIPT ? 'ts' : 'js';
 
-    const needsUserOnboarding = await cache.get('onboarding-pending');
+    const needsUserOnboarding = await cache.get<boolean>('onboarding-pending', false);
 
     projectInfo = {
       storybookVersion: data.versionInstalled,