Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
37 changes: 37 additions & 0 deletions code/addons/vitest/src/vitest-plugin/agent-story-history-cache.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,37 @@
import { createFileSystemCache, resolvePathInStorybookCache } from 'storybook/internal/common';
import type { StoryTestResult, StoryTestResultHistory } from 'storybook/internal/core-server';

const CACHE_KEY = 'agent-self-healing-story-history';

const historyCache = createFileSystemCache({
basePath: resolvePathInStorybookCache('agent-self-healing'),
ns: 'storybook',
});

export async function readStoryHistory(): Promise<StoryTestResultHistory> {
return (await historyCache.get<StoryTestResultHistory>(CACHE_KEY)) ?? {};
}

/**
* Merge the current run's results into the persisted history (keeping the most
* recent result per storyId) and return the full set of stories ever observed.
* The history lives only on disk in the Storybook cache — its entries (which
* include storyIds) are never sent in telemetry.
*/
export async function mergeAndWriteStoryHistory(
results: StoryTestResult[]
): Promise<StoryTestResult[]> {
const history = await readStoryHistory();
const now = Date.now();

for (const result of results) {
const existing = history[result.storyId];
if (!existing || existing.timestamp <= now) {
history[result.storyId] = { ...result, timestamp: now };
}
}

await historyCache.set(CACHE_KEY, history);

Comment thread
yannbf marked this conversation as resolved.
return Object.values(history).map(({ timestamp, ...rest }) => rest);
}
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,12 @@ vi.mock('storybook/internal/telemetry', () => ({
),
}));

vi.mock('./agent-story-history-cache.ts', () => ({
mergeAndWriteStoryHistory: vi.fn(async (results) => results),
}));

const { telemetry } = await import('storybook/internal/telemetry');
const { mergeAndWriteStoryHistory } = await import('./agent-story-history-cache.ts');

function createMockTestCase({
storyId,
Expand Down Expand Up @@ -63,6 +68,7 @@ describe('AgentTelemetryReporter', () => {

beforeEach(() => {
vi.clearAllMocks();
vi.mocked(mergeAndWriteStoryHistory).mockImplementation(async (results) => results);
reporter = new AgentTelemetryReporter({
configDir: '.storybook',
agent: { name: 'claude' },
Expand Down Expand Up @@ -96,7 +102,7 @@ describe('AgentTelemetryReporter', () => {
});

describe('onTestRunEnd', () => {
it('should send telemetry with analysis of collected results', async () => {
it('should send telemetry with run analysis of collected results', async () => {
reporter.onInit({ config: { watch: false } } as any);

reporter.onTestCaseResult(createMockTestCase({ storyId: 's1', status: 'passed' }) as any);
Expand All @@ -122,12 +128,12 @@ describe('AgentTelemetryReporter', () => {
expect.objectContaining({
agent: { name: 'claude' },
analysis: expect.objectContaining({
total: 3,
passed: 2,
passedButEmptyRender: 1,
successRate: 0.67,
successRateWithoutEmptyRender: 0.33,
uniqueErrorCount: 1,
runTotal: 3,
runPassed: 2,
runPassedButEmptyRender: 1,
runSuccessRate: 0.67,
runSuccessRateWithoutEmptyRender: 0.33,
runUniqueErrorCount: 1,
}),
unhandledErrorCount: 0,
watch: false,
Expand All @@ -136,6 +142,66 @@ describe('AgentTelemetryReporter', () => {
);
});

it('should include cumulative stats merged from cache history', async () => {
reporter.onInit({ config: { watch: false } } as any);

// Current run: 1 failed story
reporter.onTestCaseResult(
createMockTestCase({
storyId: 'current-run',
status: 'failed',
errors: [{ message: 'boom' }],
}) as any
);

// Cumulative cache returns history with previously-passing stories on top
vi.mocked(mergeAndWriteStoryHistory).mockResolvedValueOnce([
{ storyId: 'current-run', status: 'FAIL', error: 'boom', stack: undefined },
{ storyId: 'previous-1', status: 'PASS' },
{ storyId: 'previous-2', status: 'PASS' },
]);

await reporter.onTestRunEnd(createMockTestModules({ passed: 0, failed: 1 }) as any, []);

expect(telemetry).toHaveBeenCalledWith(
'ai-setup-self-healing-scoring',
expect.objectContaining({
analysis: expect.objectContaining({
runTotal: 1,
runPassed: 0,
cumulativeTotal: 3,
cumulativePassed: 2,
}),
}),
expect.anything()
);
});

it('should pass collected results to mergeAndWriteStoryHistory', async () => {
reporter.onInit({ config: { watch: false } } as any);

reporter.onTestCaseResult(createMockTestCase({ storyId: 's1', status: 'passed' }) as any);

await reporter.onTestRunEnd(createMockTestModules({ passed: 1, failed: 0 }) as any, []);

expect(mergeAndWriteStoryHistory).toHaveBeenCalledWith([
expect.objectContaining({ storyId: 's1', status: 'PASS' }),
]);
});

it('should not include storyId in the telemetry payload', async () => {
reporter.onInit({ config: { watch: false } } as any);

reporter.onTestCaseResult(
createMockTestCase({ storyId: 'my-secret-story-id', status: 'passed' }) as any
);

await reporter.onTestRunEnd(createMockTestModules({ passed: 1, failed: 0 }) as any, []);

const payload = JSON.stringify(vi.mocked(telemetry).mock.calls[0][1]);
expect(payload).not.toContain('my-secret-story-id');
});

it('should filter out example stories from analysis', async () => {
reporter.onInit({ config: { watch: false } } as any);

Expand All @@ -152,8 +218,8 @@ describe('AgentTelemetryReporter', () => {
'ai-setup-self-healing-scoring',
expect.objectContaining({
analysis: expect.objectContaining({
total: 1,
passed: 1,
runTotal: 1,
runPassed: 1,
}),
}),
expect.anything()
Expand Down Expand Up @@ -196,8 +262,8 @@ describe('AgentTelemetryReporter', () => {
expect(secondCall[1]).toEqual(
expect.objectContaining({
analysis: expect.objectContaining({
total: 1,
passed: 0,
runTotal: 1,
runPassed: 0,
}),
})
);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,8 @@ import type { StoryTestResult } from 'storybook/internal/core-server';
import { isExampleStoryId, telemetry } from 'storybook/internal/telemetry';
import type { AgentInfo } from 'storybook/internal/telemetry';

import { mergeAndWriteStoryHistory } from './agent-story-history-cache.ts';

interface AgentTelemetryReporterOptions {
configDir: string;
agent: AgentInfo;
Expand Down Expand Up @@ -63,7 +65,11 @@ export class AgentTelemetryReporter implements Reporter {
testModules: readonly TestModule[],
unhandledErrors: readonly SerializedError[]
) {
const analysis = analyzeTestResults(this.testResults);
// Merge the current run into the persisted per-story history (kept on
// disk only — storyIds never enter telemetry) and use the merged set
// to compute cumulative stats across runs.
const cumulativeResults = await mergeAndWriteStoryHistory(this.testResults);
const analysis = analyzeTestResults(this.testResults, cumulativeResults);
const duration = Date.now() - this.startTime;

const testModulesErrors = testModules.flatMap((t) => t.errors());
Expand Down
6 changes: 5 additions & 1 deletion code/core/src/core-server/index.ts
Original file line number Diff line number Diff line change
Expand Up @@ -41,5 +41,9 @@ export { runStoryTests } from './utils/ghost-stories/run-story-tests.ts';
export { getServerPort } from './utils/server-address.ts';

export { analyzeTestResults } from '../shared/utils/analyze-test-results.ts';
export type { StoryTestResult } from '../shared/utils/test-result-types.ts';
export type {
StoryTestResult,
StoryTestResultHistory,
StoryTestResultHistoryEntry,
} from '../shared/utils/test-result-types.ts';
export { toStoryTestResult } from '../shared/utils/to-story-test-result.ts';
Original file line number Diff line number Diff line change
Expand Up @@ -213,14 +213,22 @@ describe('ghostStoriesChannel', () => {
testRunDuration: expect.any(Number),
},
results: {
total: 2,
passed: 2,
successRate: 1,
successRateWithoutEmptyRender: 1,
categorizedErrors: expect.any(Object),
cssCheck: 'not-run',
uniqueErrorCount: 0,
passedButEmptyRender: 0,
runTotal: 2,
runPassed: 2,
runSuccessRate: 1,
runSuccessRateWithoutEmptyRender: 1,
runCategorizedErrors: expect.any(Object),
runCssCheck: 'not-run',
runUniqueErrorCount: 0,
runPassedButEmptyRender: 0,
cumulativeTotal: 2,
cumulativePassed: 2,
cumulativeSuccessRate: 1,
cumulativeSuccessRateWithoutEmptyRender: 1,
cumulativeCategorizedErrors: expect.any(Object),
cumulativeCssCheck: 'not-run',
cumulativeUniqueErrorCount: 0,
cumulativePassedButEmptyRender: 0,
},
});
});
Expand Down Expand Up @@ -312,14 +320,14 @@ describe('ghostStoriesChannel', () => {
testRunDuration: expect.any(Number),
},
results: expect.objectContaining({
total: 2,
passed: 0,
successRate: 0,
// categorizedErrors is now an object with categories as keys
categorizedErrors: expect.any(Object),
cssCheck: 'not-run',
uniqueErrorCount: expect.any(Number),
passedButEmptyRender: 0,
runTotal: 2,
runPassed: 0,
runSuccessRate: 0,
// runCategorizedErrors is an object keyed by error category
runCategorizedErrors: expect.any(Object),
runCssCheck: 'not-run',
runUniqueErrorCount: expect.any(Number),
runPassedButEmptyRender: 0,
}),
})
);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -42,14 +42,22 @@ describe('parse-vitest-report', () => {
const result = parseVitestResults(mockVitestResults);

expect(result.summary).toEqual({
total: 3,
passed: 3,
passedButEmptyRender: 0,
successRate: 1.0,
successRateWithoutEmptyRender: 1.0,
uniqueErrorCount: 0,
categorizedErrors: {},
cssCheck: 'not-run',
runTotal: 3,
runPassed: 3,
runPassedButEmptyRender: 0,
runSuccessRate: 1.0,
runSuccessRateWithoutEmptyRender: 1.0,
runUniqueErrorCount: 0,
runCategorizedErrors: {},
runCssCheck: 'not-run',
cumulativeTotal: 3,
cumulativePassed: 3,
cumulativePassedButEmptyRender: 0,
cumulativeSuccessRate: 1.0,
cumulativeSuccessRateWithoutEmptyRender: 1.0,
cumulativeUniqueErrorCount: 0,
cumulativeCategorizedErrors: {},
cumulativeCssCheck: 'not-run',
});
});

Expand Down Expand Up @@ -86,10 +94,10 @@ describe('parse-vitest-report', () => {

const result = parseVitestResults(mockVitestResults);

expect(result.summary?.total).toBe(3);
expect(result.summary?.passed).toBe(1);
expect(result.summary?.successRate).toBe(0.33);
expect(result.summary?.uniqueErrorCount).toBe(2);
expect(result.summary?.runTotal).toBe(3);
expect(result.summary?.runPassed).toBe(1);
expect(result.summary?.runSuccessRate).toBe(0.33);
expect(result.summary?.runUniqueErrorCount).toBe(2);
});

it('should categorize errors and include them in the summary', () => {
Expand Down Expand Up @@ -137,10 +145,10 @@ describe('parse-vitest-report', () => {

const result = parseVitestResults(mockVitestResults);

expect(result.summary?.total).toBe(5);
expect(result.summary?.passed).toBe(1);
expect(result.summary?.uniqueErrorCount).toBe(3);
expect(result.summary?.categorizedErrors).toEqual({
expect(result.summary?.runTotal).toBe(5);
expect(result.summary?.runPassed).toBe(1);
expect(result.summary?.runUniqueErrorCount).toBe(3);
expect(result.summary?.runCategorizedErrors).toEqual({
HOOK_USAGE_ERROR: {
uniqueCount: 1,
count: 1,
Expand Down Expand Up @@ -199,9 +207,9 @@ describe('parse-vitest-report', () => {

const result = parseVitestResults(mockVitestResults);

expect(result.summary?.passedButEmptyRender).toBe(2);
expect(result.summary?.successRate).toBe(1.0);
expect(result.summary?.successRateWithoutEmptyRender).toBe(0.33);
expect(result.summary?.runPassedButEmptyRender).toBe(2);
expect(result.summary?.runSuccessRate).toBe(1.0);
expect(result.summary?.runSuccessRateWithoutEmptyRender).toBe(0.33);
});

it('should handle multiple test suites', () => {
Expand Down Expand Up @@ -244,8 +252,8 @@ describe('parse-vitest-report', () => {

const result = parseVitestResults(mockVitestResults);

expect(result.summary?.total).toBe(4);
expect(result.summary?.passed).toBe(3);
expect(result.summary?.runTotal).toBe(4);
expect(result.summary?.runPassed).toBe(3);
});

it('should handle zero total tests', () => {
Expand All @@ -259,11 +267,11 @@ describe('parse-vitest-report', () => {

const result = parseVitestResults(mockVitestResults);

expect(result.summary?.total).toBe(0);
expect(result.summary?.successRate).toBe(0);
expect(result.summary?.runTotal).toBe(0);
expect(result.summary?.runSuccessRate).toBe(0);
});

it('surfaces the CssCheck story outcome via summary.cssCheck', () => {
it('surfaces the CssCheck story outcome via summary.runCssCheck', () => {
const mockVitestResults = {
success: false,
numTotalTests: 2,
Expand Down Expand Up @@ -291,7 +299,7 @@ describe('parse-vitest-report', () => {

const result = parseVitestResults(mockVitestResults);

expect(result.summary?.cssCheck).toBe('fail');
expect(result.summary?.runCssCheck).toBe('fail');
});
});
});
Loading
Loading