Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
11 changes: 9 additions & 2 deletions code/addons/vitest/src/vitest-plugin/index.ts
Original file line number Diff line number Diff line change
Expand Up @@ -464,8 +464,15 @@ export const storybookTest = async (options?: UserOptions): Promise<Plugin[]> =>

if (isTelemetryModuleEnabled()) {
// When an agent is running vitest via CLI, inject a reporter that sends
// detailed test result telemetry (pass/fail, error analysis, empty renders)
if (agent && withinAgenticSetupSession) {
// detailed test result telemetry (pass/fail, error analysis, empty renders).
//
// STORYBOOK_INTERNAL_TEST_RUN is set by the dev server when it spawns
// vitest internally (ghost-stories, ai-setup-final-scoring). Those runs
// are not part of the agent's iterative self-healing loop, so we skip
// installing the reporter to avoid emitting `ai-setup-self-healing-scoring`
// events whose results would misleadingly attribute ghost-stories /
// final-scoring outcomes to the self-healing loop.
if (agent && withinAgenticSetupSession && !process.env.STORYBOOK_INTERNAL_TEST_RUN) {
context.vitest.config.reporters.push(
new AgentTelemetryReporter({
configDir: finalOptions.configDir,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -197,6 +197,7 @@ describe('ghostStoriesChannel', () => {
],
stdio: 'pipe',
env: {
STORYBOOK_INTERNAL_TEST_RUN: '1',
STORYBOOK_COMPONENT_PATHS: 'component1.tsx;component2.tsx',
},
} as any);
Expand All @@ -221,14 +222,6 @@ describe('ghostStoriesChannel', () => {
runCssCheck: 'not-run',
runUniqueErrorCount: 0,
runPassedButEmptyRender: 0,
cumulativeTotal: 2,
cumulativePassed: 2,
cumulativeSuccessRate: 1,
cumulativeSuccessRateWithoutEmptyRender: 1,
cumulativeCategorizedErrors: expect.any(Object),
cumulativeCssCheck: 'not-run',
cumulativeUniqueErrorCount: 0,
cumulativePassedButEmptyRender: 0,
},
});
});
Expand Down Expand Up @@ -303,6 +296,7 @@ describe('ghostStoriesChannel', () => {
],
stdio: 'pipe',
env: {
STORYBOOK_INTERNAL_TEST_RUN: '1',
STORYBOOK_COMPONENT_PATHS: 'component1.tsx;component2.tsx',
},
} as any);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -50,14 +50,6 @@ describe('parse-vitest-report', () => {
runUniqueErrorCount: 0,
runCategorizedErrors: {},
runCssCheck: 'not-run',
cumulativeTotal: 3,
cumulativePassed: 3,
cumulativePassedButEmptyRender: 0,
cumulativeSuccessRate: 1.0,
cumulativeSuccessRateWithoutEmptyRender: 1.0,
cumulativeUniqueErrorCount: 0,
cumulativeCategorizedErrors: {},
cumulativeCssCheck: 'not-run',
});
});

Expand Down
16 changes: 12 additions & 4 deletions code/core/src/core-server/utils/ghost-stories/run-story-tests.ts
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,14 @@ export async function runStoryTests(
let testFailureMessage;

try {
// Execute the test runner command with specific story files
// Execute the test runner command with specific story files.
//
// STORYBOOK_INTERNAL_TEST_RUN marks this as a dev-server-initiated run
// (ghost-stories or ai-setup-final-scoring) so the vitest plugin can
// skip telemetry meant for agent-driven external runs. Without this,
// ghost-stories runs would trigger `ai-setup-self-healing-scoring`
// events whose results have nothing to do with the agent's iterative
// self-healing loop.
const testProcess = executeCommand({
command: 'npx',
args: [
Expand All @@ -47,9 +54,10 @@ export async function runStoryTests(
],
cwd,
stdio: 'pipe',
...(options?.ghostRun
? { env: { STORYBOOK_COMPONENT_PATHS: componentFilePaths.join(';') } }
: {}),
env: {
STORYBOOK_INTERNAL_TEST_RUN: '1',
...(options?.ghostRun ? { STORYBOOK_COMPONENT_PATHS: componentFilePaths.join(';') } : {}),
},
});

await testProcess;
Expand Down
22 changes: 8 additions & 14 deletions code/core/src/shared/utils/analyze-test-results.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -78,14 +78,6 @@ describe('analyze-test-results', () => {
runUniqueErrorCount: 0,
runCategorizedErrors: {},
runCssCheck: 'not-run',
cumulativeTotal: 3,
cumulativePassed: 3,
cumulativePassedButEmptyRender: 0,
cumulativeSuccessRate: 1.0,
cumulativeSuccessRateWithoutEmptyRender: 1.0,
cumulativeUniqueErrorCount: 0,
cumulativeCategorizedErrors: {},
cumulativeCssCheck: 'not-run',
});
});

Expand Down Expand Up @@ -119,7 +111,7 @@ describe('analyze-test-results', () => {
expect(analysis.runTotal).toBe(0);
expect(analysis.runSuccessRate).toBe(0);
expect(analysis.runSuccessRateWithoutEmptyRender).toBe(0);
expect(analysis.cumulativeTotal).toBe(0);
expect(analysis.cumulativeTotal).toBeUndefined();
});

it('should handle PENDING tests by not counting them as passed', () => {
Expand All @@ -134,16 +126,18 @@ describe('analyze-test-results', () => {
});

describe('cumulative stats', () => {
it('mirrors run stats when no cumulative results are provided', () => {
it('omits all cumulative fields when no cumulative results are provided', () => {
const results: StoryTestResult[] = [
{ storyId: 's1', status: 'PASS' },
{ storyId: 's2', status: 'FAIL', error: 'oops' },
];
const analysis = analyzeTestResults(results);
expect(analysis.cumulativeTotal).toBe(analysis.runTotal);
expect(analysis.cumulativePassed).toBe(analysis.runPassed);
expect(analysis.cumulativeSuccessRate).toBe(analysis.runSuccessRate);
expect(analysis.cumulativeUniqueErrorCount).toBe(analysis.runUniqueErrorCount);
expect(analysis.cumulativeTotal).toBeUndefined();
expect(analysis.cumulativePassed).toBeUndefined();
expect(analysis.cumulativeSuccessRate).toBeUndefined();
expect(analysis.cumulativeUniqueErrorCount).toBeUndefined();
expect(analysis.cumulativeCategorizedErrors).toBeUndefined();
expect(analysis.cumulativeCssCheck).toBeUndefined();
Comment on lines +129 to +140

Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

⚠️ Potential issue | 🟡 Minor | ⚡ Quick win

“All cumulative fields” test misses two cumulative properties.

At Line 129 the test claims full coverage, but cumulativePassedButEmptyRender and cumulativeSuccessRateWithoutEmptyRender are not asserted.

Suggested patch
       expect(analysis.cumulativeTotal).toBeUndefined();
       expect(analysis.cumulativePassed).toBeUndefined();
+      expect(analysis.cumulativePassedButEmptyRender).toBeUndefined();
       expect(analysis.cumulativeSuccessRate).toBeUndefined();
+      expect(analysis.cumulativeSuccessRateWithoutEmptyRender).toBeUndefined();
       expect(analysis.cumulativeUniqueErrorCount).toBeUndefined();
       expect(analysis.cumulativeCategorizedErrors).toBeUndefined();
       expect(analysis.cumulativeCssCheck).toBeUndefined();
📝 Committable suggestion

‼️ IMPORTANT
Carefully review the code before committing. Ensure that it accurately replaces the highlighted code, contains no missing lines, and has no issues with indentation. Thoroughly test & benchmark the code to ensure it meets the requirements.

Suggested change
it('omits all cumulative fields when no cumulative results are provided', () => {
const results: StoryTestResult[] = [
{ storyId: 's1', status: 'PASS' },
{ storyId: 's2', status: 'FAIL', error: 'oops' },
];
const analysis = analyzeTestResults(results);
expect(analysis.cumulativeTotal).toBe(analysis.runTotal);
expect(analysis.cumulativePassed).toBe(analysis.runPassed);
expect(analysis.cumulativeSuccessRate).toBe(analysis.runSuccessRate);
expect(analysis.cumulativeUniqueErrorCount).toBe(analysis.runUniqueErrorCount);
expect(analysis.cumulativeTotal).toBeUndefined();
expect(analysis.cumulativePassed).toBeUndefined();
expect(analysis.cumulativeSuccessRate).toBeUndefined();
expect(analysis.cumulativeUniqueErrorCount).toBeUndefined();
expect(analysis.cumulativeCategorizedErrors).toBeUndefined();
expect(analysis.cumulativeCssCheck).toBeUndefined();
it('omits all cumulative fields when no cumulative results are provided', () => {
const results: StoryTestResult[] = [
{ storyId: 's1', status: 'PASS' },
{ storyId: 's2', status: 'FAIL', error: 'oops' },
];
const analysis = analyzeTestResults(results);
expect(analysis.cumulativeTotal).toBeUndefined();
expect(analysis.cumulativePassed).toBeUndefined();
expect(analysis.cumulativePassedButEmptyRender).toBeUndefined();
expect(analysis.cumulativeSuccessRate).toBeUndefined();
expect(analysis.cumulativeSuccessRateWithoutEmptyRender).toBeUndefined();
expect(analysis.cumulativeUniqueErrorCount).toBeUndefined();
expect(analysis.cumulativeCategorizedErrors).toBeUndefined();
expect(analysis.cumulativeCssCheck).toBeUndefined();
🤖 Prompt for AI Agents
Verify each finding against current code. Fix only still-valid issues, skip the
rest with a brief reason, keep changes minimal, and validate.

In `@code/core/src/shared/utils/analyze-test-results.test.ts` around lines 129 -
140, The test for analyzeTestResults that "omits all cumulative fields when no
cumulative results are provided" is missing assertions for
cumulativePassedButEmptyRender and cumulativeSuccessRateWithoutEmptyRender;
update the test that calls analyzeTestResults(results) to also assert that
analysis.cumulativePassedButEmptyRender and
analysis.cumulativeSuccessRateWithoutEmptyRender are undefined so all cumulative
properties returned by analyzeTestResults are covered.

});

it('reports cumulative stats independently when provided', () => {
Expand Down
31 changes: 18 additions & 13 deletions code/core/src/shared/utils/analyze-test-results.ts
Original file line number Diff line number Diff line change
Expand Up @@ -117,17 +117,17 @@ function summarizeResults(results: StoryTestResult[]): ResultSummary {
* rates, empty render detection, and categorized errors.
*
* @param results Story results from the current run.
* @param cumulativeResults Optional aggregated results across all runs (latest outcome per story).
* When omitted, cumulative stats mirror the run stats.
* @param cumulativeResults Optional aggregated results across runs (latest outcome per story).
* Only the agent self-healing flow tracks history and passes this; when omitted the returned
* analysis only contains `run*` fields and no `cumulative*` fields are emitted.
*/
export function analyzeTestResults(
results: StoryTestResult[],
cumulativeResults?: StoryTestResult[]
): TestRunAnalysis {
const run = summarizeResults(results);
const cumulative = cumulativeResults ? summarizeResults(cumulativeResults) : run;

return {
const analysis: TestRunAnalysis = {
runTotal: run.total,
runPassed: run.passed,
runPassedButEmptyRender: run.passedButEmptyRender,
Expand All @@ -136,14 +136,19 @@ export function analyzeTestResults(
runUniqueErrorCount: run.uniqueErrorCount,
runCategorizedErrors: run.categorizedErrors,
runCssCheck: run.cssCheck,

cumulativeTotal: cumulative.total,
cumulativePassed: cumulative.passed,
cumulativePassedButEmptyRender: cumulative.passedButEmptyRender,
cumulativeSuccessRate: cumulative.successRate,
cumulativeSuccessRateWithoutEmptyRender: cumulative.successRateWithoutEmptyRender,
cumulativeUniqueErrorCount: cumulative.uniqueErrorCount,
cumulativeCategorizedErrors: cumulative.categorizedErrors,
cumulativeCssCheck: cumulative.cssCheck,
};

if (cumulativeResults) {
const cumulative = summarizeResults(cumulativeResults);
analysis.cumulativeTotal = cumulative.total;
analysis.cumulativePassed = cumulative.passed;
analysis.cumulativePassedButEmptyRender = cumulative.passedButEmptyRender;
analysis.cumulativeSuccessRate = cumulative.successRate;
analysis.cumulativeSuccessRateWithoutEmptyRender = cumulative.successRateWithoutEmptyRender;
analysis.cumulativeUniqueErrorCount = cumulative.uniqueErrorCount;
analysis.cumulativeCategorizedErrors = cumulative.categorizedErrors;
analysis.cumulativeCssCheck = cumulative.cssCheck;
}

return analysis;
}
21 changes: 11 additions & 10 deletions code/core/src/shared/utils/test-result-types.ts
Original file line number Diff line number Diff line change
Expand Up @@ -61,15 +61,16 @@ export interface TestRunAnalysis {

/**
* Stats accumulated across runs: for every story we've ever seen, we
* keep the most recent outcome (by timestamp). When no history is
* available these mirror the `run*` fields.
* keep the most recent outcome (by timestamp). Only emitted by the
* agent self-healing flow, which is the only consumer that persists
* a per-story history in the Storybook cache.
*/
cumulativeTotal: number;
cumulativePassed: number;
cumulativePassedButEmptyRender: number;
cumulativeSuccessRate: number;
cumulativeSuccessRateWithoutEmptyRender: number;
cumulativeUniqueErrorCount: number;
cumulativeCategorizedErrors: Record<string, CategorizedError>;
cumulativeCssCheck: CssCheckOutcome;
cumulativeTotal?: number;
cumulativePassed?: number;
cumulativePassedButEmptyRender?: number;
cumulativeSuccessRate?: number;
cumulativeSuccessRateWithoutEmptyRender?: number;
cumulativeUniqueErrorCount?: number;
cumulativeCategorizedErrors?: Record<string, CategorizedError>;
cumulativeCssCheck?: CssCheckOutcome;
}
2 changes: 1 addition & 1 deletion code/lib/cli-storybook/src/ai/index.ts
Original file line number Diff line number Diff line change
Expand Up @@ -39,7 +39,7 @@ export async function aiSetup(options: AiSetupOptions): Promise<void> {
const detectedLanguage = await projectTypeService.detectLanguage();
const language = detectedLanguage === SupportedLanguage.TYPESCRIPT ? 'ts' : 'js';

const needsUserOnboarding = await cache.get('onboarding-pending');
const needsUserOnboarding = await cache.get<boolean>('onboarding-pending', false);

projectInfo = {
storybookVersion: data.versionInstalled,
Expand Down
Loading