storybookjs · Sidnioulz · May 4, 2026 · May 4, 2026 · May 4, 2026
diff --git a/code/addons/vitest/src/vitest-plugin/agent-story-history-cache.ts b/code/addons/vitest/src/vitest-plugin/agent-story-history-cache.ts
@@ -0,0 +1,37 @@
+import { createFileSystemCache, resolvePathInStorybookCache } from 'storybook/internal/common';
+import type { StoryTestResult, StoryTestResultHistory } from 'storybook/internal/core-server';
+
+const CACHE_KEY = 'agent-self-healing-story-history';
+
+const historyCache = createFileSystemCache({
+  basePath: resolvePathInStorybookCache('agent-self-healing'),
+  ns: 'storybook',
+});
+
+export async function readStoryHistory(): Promise<StoryTestResultHistory> {
+  return (await historyCache.get<StoryTestResultHistory>(CACHE_KEY)) ?? {};
+}
+
+/**
+ * Merge the current run's results into the persisted history (keeping the most
+ * recent result per storyId) and return the full set of stories ever observed.
+ * The history lives only on disk in the Storybook cache — its entries (which
+ * include storyIds) are never sent in telemetry.
+ */
+export async function mergeAndWriteStoryHistory(
+  results: StoryTestResult[]
+): Promise<StoryTestResult[]> {
+  const history = await readStoryHistory();
+  const now = Date.now();
+
+  for (const result of results) {
+    const existing = history[result.storyId];
+    if (!existing || existing.timestamp <= now) {
+      history[result.storyId] = { ...result, timestamp: now };
+    }
+  }
+
+  await historyCache.set(CACHE_KEY, history);
+
+  return Object.values(history).map(({ timestamp, ...rest }) => rest);
+}
diff --git a/code/addons/vitest/src/vitest-plugin/agent-telemetry-reporter.test.ts b/code/addons/vitest/src/vitest-plugin/agent-telemetry-reporter.test.ts
@@ -12,7 +12,12 @@ vi.mock('storybook/internal/telemetry', () => ({
   ),
 }));
 
+vi.mock('./agent-story-history-cache.ts', () => ({
+  mergeAndWriteStoryHistory: vi.fn(async (results) => results),
+}));
+
 const { telemetry } = await import('storybook/internal/telemetry');
+const { mergeAndWriteStoryHistory } = await import('./agent-story-history-cache.ts');
 
 function createMockTestCase({
   storyId,
@@ -63,6 +68,7 @@ describe('AgentTelemetryReporter', () => {
 
   beforeEach(() => {
     vi.clearAllMocks();
+    vi.mocked(mergeAndWriteStoryHistory).mockImplementation(async (results) => results);
     reporter = new AgentTelemetryReporter({
       configDir: '.storybook',
       agent: { name: 'claude' },
@@ -96,7 +102,7 @@ describe('AgentTelemetryReporter', () => {
   });
 
   describe('onTestRunEnd', () => {
-    it('should send telemetry with analysis of collected results', async () => {
+    it('should send telemetry with run analysis of collected results', async () => {
       reporter.onInit({ config: { watch: false } } as any);
 
       reporter.onTestCaseResult(createMockTestCase({ storyId: 's1', status: 'passed' }) as any);
@@ -122,12 +128,12 @@ describe('AgentTelemetryReporter', () => {
         expect.objectContaining({
           agent: { name: 'claude' },
           analysis: expect.objectContaining({
-            total: 3,
-            passed: 2,
-            passedButEmptyRender: 1,
-            successRate: 0.67,
-            successRateWithoutEmptyRender: 0.33,
-            uniqueErrorCount: 1,
+            runTotal: 3,
+            runPassed: 2,
+            runPassedButEmptyRender: 1,
+            runSuccessRate: 0.67,
+            runSuccessRateWithoutEmptyRender: 0.33,
+            runUniqueErrorCount: 1,
           }),
           unhandledErrorCount: 0,
           watch: false,
@@ -136,6 +142,66 @@ describe('AgentTelemetryReporter', () => {
       );
     });
 
+    it('should include cumulative stats merged from cache history', async () => {
+      reporter.onInit({ config: { watch: false } } as any);
+
+      // Current run: 1 failed story
+      reporter.onTestCaseResult(
+        createMockTestCase({
+          storyId: 'current-run',
+          status: 'failed',
+          errors: [{ message: 'boom' }],
+        }) as any
+      );
+
+      // Cumulative cache returns history with previously-passing stories on top
+      vi.mocked(mergeAndWriteStoryHistory).mockResolvedValueOnce([
+        { storyId: 'current-run', status: 'FAIL', error: 'boom', stack: undefined },
+        { storyId: 'previous-1', status: 'PASS' },
+        { storyId: 'previous-2', status: 'PASS' },
+      ]);
+
+      await reporter.onTestRunEnd(createMockTestModules({ passed: 0, failed: 1 }) as any, []);
+
+      expect(telemetry).toHaveBeenCalledWith(
+        'ai-setup-self-healing-scoring',
+        expect.objectContaining({
+          analysis: expect.objectContaining({
+            runTotal: 1,
+            runPassed: 0,
+            cumulativeTotal: 3,
+            cumulativePassed: 2,
+          }),
+        }),
+        expect.anything()
+      );
+    });
+
+    it('should pass collected results to mergeAndWriteStoryHistory', async () => {
+      reporter.onInit({ config: { watch: false } } as any);
+
+      reporter.onTestCaseResult(createMockTestCase({ storyId: 's1', status: 'passed' }) as any);
+
+      await reporter.onTestRunEnd(createMockTestModules({ passed: 1, failed: 0 }) as any, []);
+
+      expect(mergeAndWriteStoryHistory).toHaveBeenCalledWith([
+        expect.objectContaining({ storyId: 's1', status: 'PASS' }),
+      ]);
+    });
+
+    it('should not include storyId in the telemetry payload', async () => {
+      reporter.onInit({ config: { watch: false } } as any);
+
+      reporter.onTestCaseResult(
+        createMockTestCase({ storyId: 'my-secret-story-id', status: 'passed' }) as any
+      );
+
+      await reporter.onTestRunEnd(createMockTestModules({ passed: 1, failed: 0 }) as any, []);
+
+      const payload = JSON.stringify(vi.mocked(telemetry).mock.calls[0][1]);
+      expect(payload).not.toContain('my-secret-story-id');
+    });
+
     it('should filter out example stories from analysis', async () => {
       reporter.onInit({ config: { watch: false } } as any);
 
@@ -152,8 +218,8 @@ describe('AgentTelemetryReporter', () => {
         'ai-setup-self-healing-scoring',
         expect.objectContaining({
           analysis: expect.objectContaining({
-            total: 1,
-            passed: 1,
+            runTotal: 1,
+            runPassed: 1,
           }),
         }),
         expect.anything()
@@ -196,8 +262,8 @@ describe('AgentTelemetryReporter', () => {
       expect(secondCall[1]).toEqual(
         expect.objectContaining({
           analysis: expect.objectContaining({
-            total: 1,
-            passed: 0,
+            runTotal: 1,
+            runPassed: 0,
           }),
         })
       );

diff --git a/code/addons/vitest/src/vitest-plugin/agent-telemetry-reporter.ts b/code/addons/vitest/src/vitest-plugin/agent-telemetry-reporter.ts
@@ -9,6 +9,8 @@ import type { StoryTestResult } from 'storybook/internal/core-server';
 import { isExampleStoryId, telemetry } from 'storybook/internal/telemetry';
 import type { AgentInfo } from 'storybook/internal/telemetry';
 
+import { mergeAndWriteStoryHistory } from './agent-story-history-cache.ts';
+
 interface AgentTelemetryReporterOptions {
   configDir: string;
   agent: AgentInfo;
@@ -63,7 +65,11 @@ export class AgentTelemetryReporter implements Reporter {
     testModules: readonly TestModule[],
     unhandledErrors: readonly SerializedError[]
   ) {
-    const analysis = analyzeTestResults(this.testResults);
+    // Merge the current run into the persisted per-story history (kept on
+    // disk only — storyIds never enter telemetry) and use the merged set
+    // to compute cumulative stats across runs.
+    const cumulativeResults = await mergeAndWriteStoryHistory(this.testResults);
+    const analysis = analyzeTestResults(this.testResults, cumulativeResults);
     const duration = Date.now() - this.startTime;
 
     const testModulesErrors = testModules.flatMap((t) => t.errors());

diff --git a/code/core/src/core-server/index.ts b/code/core/src/core-server/index.ts
@@ -41,5 +41,9 @@ export { runStoryTests } from './utils/ghost-stories/run-story-tests.ts';
 export { getServerPort } from './utils/server-address.ts';
 
 export { analyzeTestResults } from '../shared/utils/analyze-test-results.ts';
-export type { StoryTestResult } from '../shared/utils/test-result-types.ts';
+export type {
+  StoryTestResult,
+  StoryTestResultHistory,
+  StoryTestResultHistoryEntry,
+} from '../shared/utils/test-result-types.ts';
 export { toStoryTestResult } from '../shared/utils/to-story-test-result.ts';
diff --git a/code/core/src/core-server/server-channel/ghost-stories-channel.test.ts b/code/core/src/core-server/server-channel/ghost-stories-channel.test.ts
@@ -213,14 +213,22 @@ describe('ghostStoriesChannel', () => {
           testRunDuration: expect.any(Number),
         },
         results: {
-          total: 2,
-          passed: 2,
-          successRate: 1,
-          successRateWithoutEmptyRender: 1,
-          categorizedErrors: expect.any(Object),
-          cssCheck: 'not-run',
-          uniqueErrorCount: 0,
-          passedButEmptyRender: 0,
+          runTotal: 2,
+          runPassed: 2,
+          runSuccessRate: 1,
+          runSuccessRateWithoutEmptyRender: 1,
+          runCategorizedErrors: expect.any(Object),
+          runCssCheck: 'not-run',
+          runUniqueErrorCount: 0,
+          runPassedButEmptyRender: 0,
+          cumulativeTotal: 2,
+          cumulativePassed: 2,
+          cumulativeSuccessRate: 1,
+          cumulativeSuccessRateWithoutEmptyRender: 1,
+          cumulativeCategorizedErrors: expect.any(Object),
+          cumulativeCssCheck: 'not-run',
+          cumulativeUniqueErrorCount: 0,
+          cumulativePassedButEmptyRender: 0,
         },
       });
     });
@@ -312,14 +320,14 @@ describe('ghostStoriesChannel', () => {
             testRunDuration: expect.any(Number),
           },
           results: expect.objectContaining({
-            total: 2,
-            passed: 0,
-            successRate: 0,
-            // categorizedErrors is now an object with categories as keys
-            categorizedErrors: expect.any(Object),
-            cssCheck: 'not-run',
-            uniqueErrorCount: expect.any(Number),
-            passedButEmptyRender: 0,
+            runTotal: 2,
+            runPassed: 0,
+            runSuccessRate: 0,
+            // runCategorizedErrors is an object keyed by error category
+            runCategorizedErrors: expect.any(Object),
+            runCssCheck: 'not-run',
+            runUniqueErrorCount: expect.any(Number),
+            runPassedButEmptyRender: 0,
           }),
         })
       );

diff --git a/code/core/src/core-server/utils/ghost-stories/parse-vitest-report.test.ts b/code/core/src/core-server/utils/ghost-stories/parse-vitest-report.test.ts
@@ -42,14 +42,22 @@ describe('parse-vitest-report', () => {
       const result = parseVitestResults(mockVitestResults);
 
       expect(result.summary).toEqual({
-        total: 3,
-        passed: 3,
-        passedButEmptyRender: 0,
-        successRate: 1.0,
-        successRateWithoutEmptyRender: 1.0,
-        uniqueErrorCount: 0,
-        categorizedErrors: {},
-        cssCheck: 'not-run',
+        runTotal: 3,
+        runPassed: 3,
+        runPassedButEmptyRender: 0,
+        runSuccessRate: 1.0,
+        runSuccessRateWithoutEmptyRender: 1.0,
+        runUniqueErrorCount: 0,
+        runCategorizedErrors: {},
+        runCssCheck: 'not-run',
+        cumulativeTotal: 3,
+        cumulativePassed: 3,
+        cumulativePassedButEmptyRender: 0,
+        cumulativeSuccessRate: 1.0,
+        cumulativeSuccessRateWithoutEmptyRender: 1.0,
+        cumulativeUniqueErrorCount: 0,
+        cumulativeCategorizedErrors: {},
+        cumulativeCssCheck: 'not-run',
       });
     });
 
@@ -86,10 +94,10 @@ describe('parse-vitest-report', () => {
 
       const result = parseVitestResults(mockVitestResults);
 
-      expect(result.summary?.total).toBe(3);
-      expect(result.summary?.passed).toBe(1);
-      expect(result.summary?.successRate).toBe(0.33);
-      expect(result.summary?.uniqueErrorCount).toBe(2);
+      expect(result.summary?.runTotal).toBe(3);
+      expect(result.summary?.runPassed).toBe(1);
+      expect(result.summary?.runSuccessRate).toBe(0.33);
+      expect(result.summary?.runUniqueErrorCount).toBe(2);
     });
 
     it('should categorize errors and include them in the summary', () => {
@@ -137,10 +145,10 @@ describe('parse-vitest-report', () => {
 
       const result = parseVitestResults(mockVitestResults);
 
-      expect(result.summary?.total).toBe(5);
-      expect(result.summary?.passed).toBe(1);
-      expect(result.summary?.uniqueErrorCount).toBe(3);
-      expect(result.summary?.categorizedErrors).toEqual({
+      expect(result.summary?.runTotal).toBe(5);
+      expect(result.summary?.runPassed).toBe(1);
+      expect(result.summary?.runUniqueErrorCount).toBe(3);
+      expect(result.summary?.runCategorizedErrors).toEqual({
         HOOK_USAGE_ERROR: {
           uniqueCount: 1,
           count: 1,
@@ -199,9 +207,9 @@ describe('parse-vitest-report', () => {
 
       const result = parseVitestResults(mockVitestResults);
 
-      expect(result.summary?.passedButEmptyRender).toBe(2);
-      expect(result.summary?.successRate).toBe(1.0);
-      expect(result.summary?.successRateWithoutEmptyRender).toBe(0.33);
+      expect(result.summary?.runPassedButEmptyRender).toBe(2);
+      expect(result.summary?.runSuccessRate).toBe(1.0);
+      expect(result.summary?.runSuccessRateWithoutEmptyRender).toBe(0.33);
     });
 
     it('should handle multiple test suites', () => {
@@ -244,8 +252,8 @@ describe('parse-vitest-report', () => {
 
       const result = parseVitestResults(mockVitestResults);
 
-      expect(result.summary?.total).toBe(4);
-      expect(result.summary?.passed).toBe(3);
+      expect(result.summary?.runTotal).toBe(4);
+      expect(result.summary?.runPassed).toBe(3);
     });
 
     it('should handle zero total tests', () => {
@@ -259,11 +267,11 @@ describe('parse-vitest-report', () => {
 
       const result = parseVitestResults(mockVitestResults);
 
-      expect(result.summary?.total).toBe(0);
-      expect(result.summary?.successRate).toBe(0);
+      expect(result.summary?.runTotal).toBe(0);
+      expect(result.summary?.runSuccessRate).toBe(0);
     });
 
-    it('surfaces the CssCheck story outcome via summary.cssCheck', () => {
+    it('surfaces the CssCheck story outcome via summary.runCssCheck', () => {
       const mockVitestResults = {
         success: false,
         numTotalTests: 2,
@@ -291,7 +299,7 @@ describe('parse-vitest-report', () => {
 
       const result = parseVitestResults(mockVitestResults);
 
-      expect(result.summary?.cssCheck).toBe('fail');
+      expect(result.summary?.runCssCheck).toBe('fail');
     });
   });
 });