diff --git a/code/lib/cli-storybook/src/ai/index.test.ts b/code/lib/cli-storybook/src/ai/index.test.ts new file mode 100644 index 000000000000..d483b5833cef --- /dev/null +++ b/code/lib/cli-storybook/src/ai/index.test.ts @@ -0,0 +1,96 @@ +import { beforeEach, describe, expect, it, vi } from 'vitest'; + +vi.mock('storybook/internal/common', async () => { + const actual = await vi.importActual( + 'storybook/internal/common' + ); + return { + ...actual, + cache: { set: vi.fn(), get: vi.fn(), remove: vi.fn() }, + }; +}); + +vi.mock('storybook/internal/telemetry', () => ({ + telemetry: vi.fn(), + getSessionId: vi.fn().mockResolvedValue('session-xyz'), + snapshotPreviewFile: vi + .fn() + .mockResolvedValue({ previewPath: '/proj/.storybook/preview.ts', previewHash: 'abc' }), + isTelemetryModuleEnabled: vi.fn(() => true), +})); + +vi.mock('storybook/internal/node-logger', () => ({ + logger: { log: vi.fn(), error: vi.fn(), warn: vi.fn(), debug: vi.fn() }, +})); + +vi.mock('../../../create-storybook/src/services/ProjectTypeService.ts', () => ({ + ProjectTypeService: class { + async detectLanguage() { + return 'ts'; + } + }, +})); + +vi.mock('../automigrate/helpers/mainConfigFile.ts', () => ({ + getStorybookData: vi.fn().mockResolvedValue({ + versionInstalled: '10.4.0', + frameworkPackage: '@storybook/react-vite', + rendererPackage: '@storybook/react', + renderer: 'react', + builderPackage: '@storybook/builder-vite', + addons: [], + configDir: '/proj/.storybook', + storiesPaths: [], + hasCsfFactoryPreview: false, + packageManager: {}, + }), +})); + +import { cache } from 'storybook/internal/common'; +import { + isTelemetryModuleEnabled, + snapshotPreviewFile, + telemetry, +} from 'storybook/internal/telemetry'; + +import { aiSetup } from './index.ts'; + +beforeEach(() => { + vi.mocked(cache.set).mockClear(); + vi.mocked(snapshotPreviewFile).mockClear(); + vi.mocked(telemetry).mockClear(); +}); + +describe('aiSetup telemetry gating', () => { + it('records ai-setup-pending + preview snapshot when telemetry is enabled', async () => { + await aiSetup({ configDir: '/proj/.storybook', disableTelemetry: false }); + + expect(vi.mocked(snapshotPreviewFile)).toHaveBeenCalledTimes(1); + expect(vi.mocked(cache.set)).toHaveBeenCalledWith( + 'ai-setup-pending', + expect.objectContaining({ + configDir: expect.stringContaining('.storybook'), + sessionId: 'session-xyz', + previewPath: '/proj/.storybook/preview.ts', + previewHash: 'abc', + }) + ); + expect(vi.mocked(telemetry)).toHaveBeenCalledWith('ai-setup', expect.any(Object)); + }); + + it('skips snapshot + cache write when telemetry is disabled', async () => { + vi.mocked(isTelemetryModuleEnabled).mockReturnValueOnce(false); + + await aiSetup({ configDir: '/proj/.storybook', disableTelemetry: true }); + + expect(vi.mocked(snapshotPreviewFile)).not.toHaveBeenCalled(); + expect(vi.mocked(cache.set)).not.toHaveBeenCalled(); + }); + + it('treats missing disableTelemetry as enabled (backwards compatible default)', async () => { + await aiSetup({ configDir: '/proj/.storybook' }); + + expect(vi.mocked(snapshotPreviewFile)).toHaveBeenCalledTimes(1); + expect(vi.mocked(cache.set)).toHaveBeenCalledWith('ai-setup-pending', expect.any(Object)); + }); +}); diff --git a/code/lib/cli-storybook/src/ai/index.ts b/code/lib/cli-storybook/src/ai/index.ts index 4107590ffc68..ad08be694cba 100644 --- a/code/lib/cli-storybook/src/ai/index.ts +++ b/code/lib/cli-storybook/src/ai/index.ts @@ -6,6 +6,7 @@ import { cache } from 'storybook/internal/common'; import { logger } from 'storybook/internal/node-logger'; import { getSessionId, + isTelemetryModuleEnabled, snapshotPreviewFile, telemetry, type AiSetupPendingRecord, @@ -79,7 +80,7 @@ export async function aiSetup(options: AiSetupOptions): Promise { return; } - const result = generateMarkdownOutput(projectInfo); + const result = await generateMarkdownOutput(projectInfo); const markdownOutput = result.markdown; await telemetry('ai-setup', { @@ -99,17 +100,21 @@ export async function aiSetup(options: AiSetupOptions): Promise { // Snapshot the preview file baseline and cache the pending setup record. // Subsequent CLI entry points (dev, build, doctor, etc.) read this to - // collect evidence of what the agent accomplished. - const resolvedConfigDir = resolve(projectInfo.configDir); - const previewSnapshot = await snapshotPreviewFile(resolvedConfigDir); - const sessionId = await getSessionId(); - const pendingRecord: AiSetupPendingRecord = { - timestamp: Date.now(), - sessionId, - configDir: resolvedConfigDir, - ...previewSnapshot, - }; - await cache.set('ai-setup-pending', pendingRecord); + // collect evidence of what the agent accomplished — but only via telemetry + // (the `ai-setup-evidence` event). Skip the snapshot + cache write when + // telemetry is disabled so there's nobody to read it. + if (isTelemetryModuleEnabled()) { + const resolvedConfigDir = resolve(projectInfo.configDir); + const previewSnapshot = await snapshotPreviewFile(resolvedConfigDir); + const sessionId = await getSessionId(); + const pendingRecord: AiSetupPendingRecord = { + timestamp: Date.now(), + sessionId, + configDir: resolvedConfigDir, + ...previewSnapshot, + }; + await cache.set('ai-setup-pending', pendingRecord); + } if (output) { const outputPath = resolve(output); diff --git a/code/lib/cli-storybook/src/ai/prompt.ts b/code/lib/cli-storybook/src/ai/prompt.ts index 1ad37f97de71..55d9a5a2b52b 100644 --- a/code/lib/cli-storybook/src/ai/prompt.ts +++ b/code/lib/cli-storybook/src/ai/prompt.ts @@ -1,708 +1,7 @@ import { dedent } from 'ts-dedent'; -import type { ProjectInfo, AiPrompt } from './types.ts'; - -/** - * Builds a markdown-format docs URL with renderer and language query parameters. - * Appending .md to any Storybook docs URL returns clean markdown with code examples. - */ -export function getDocsMarkdownUrl( - path: string, - projectInfo?: Pick -): string { - const { majorVersion, renderer = 'react', language = 'ts' } = projectInfo ?? {}; - const versionSegment = majorVersion ? `/${majorVersion}` : ''; - const params = new URLSearchParams(); - if (renderer) { - params.set('renderer', renderer); - } - params.set('language', language); - const query = params.toString(); - return `https://storybook.js.org/docs${versionSegment}/${path}.md${query ? `?${query}` : ''}`; -} - -export function getPrompts(projectInfo: ProjectInfo): { - prompts: AiPrompt[]; -} { - const aiPrompts: AiPrompt[] = []; - - aiPrompts.push({ - name: 'setup', - description: 'Set up Storybook for success', - instructions: getSetupInstructions(projectInfo), - }); - - return { prompts: aiPrompts }; -} - -function getTypeImportSource(projectInfo: ProjectInfo): string { - return projectInfo.framework || projectInfo.rendererPackage || '@storybook/react'; -} - -function getSetupInstructions(projectInfo: ProjectInfo): string { - const configDir = projectInfo.configDir; - const typeImport = getTypeImportSource(projectInfo); - - return dedent` - Attention: The following instructions must be followed in order to successfully set up Storybook in this project. Do not skip steps or attempt to do them out of order. - - Your goal is to make Storybook fully functional in this project by analyzing the codebase, - configuring the preview with the right decorators, and writing stories for some components. - - After each created story, run Vitest to verify it renders. - If the test fails, read the error, fix the issue, and re-run until it passes before moving on. - - - Copy real patterns from the codebase - - Keep the app code unchanged - - Put the default setup in \`${configDir}/preview.tsx\` - - Keep app mocking and runtime setup in \`${configDir}/preview.tsx\`, not in the stories - - ${getDocsReferenceSection(projectInfo)} - - ### Step 1: Analyze the codebase - - Read enough of the app to understand the full runtime environment before writing any stories. - - Do not stop at \`main.tsx\` or \`App.tsx\`. - Follow imports into providers, pages, hooks, and shared components until you know: - - - which providers exist - - which CSS files are injected - - which queries fetch data - - which browser-state reads happen - - which portals and portal roots exist - - which pages and components show the real usage patterns - - Example of what to copy: - - \`\`\`tsx - // src/main.tsx - import "./index.css"; - import App from "./App"; - import { SessionProvider } from "./contexts/SessionContext"; - - createRoot(document.getElementById("root")!).render( - - - , - ); - \`\`\` - - That means Storybook should copy: - - - the \`index.css\` import - - the \`SessionProvider\` - - the same provider order - - Example of tracing the app deeper: - - \`\`\`tsx - // src/App.tsx - function App() { - const { products, loadMoreProducts } = useProducts(); - const { currentUser, signOut } = useSession(); - // ... - } - \`\`\` - - \`\`\`ts - // src/hooks/useProducts.ts - const response = await fetch(apiBaseUrl + "/products?page=1"); - \`\`\` - - \`\`\`ts - // src/hooks/useTheme.ts - const savedTheme = localStorage.getItem("theme"); - \`\`\` - - That means the default Storybook setup should discover and prepare: - - - provider state - - MSW handlers for queries - - browser-state values that are actually read during render - - ### Step 2: Build one default app environment in preview - - Set up Storybook once so most stories work without story-specific setup. - - Start with the smallest faithful environment: - - - the real provider tree - - the real root CSS - - seeded browser state if the app reads it during render - - MSW for network/data queries - - It is fine to seed browser state such as \`localStorage\`, \`sessionStorage\`, and cookies when the app reads them during render. - Seed only the specific app-owned keys and values you need. - Do not clear all \`localStorage\`, \`sessionStorage\`, or cookies, and do not reset Storybook's own state. - Do not mock or redefine the browser runtime itself. - The stories run in Vitest browser mode, so the real browser environment should already exist. - - ${getPreviewConfigExample(projectInfo)} - - Use this same idea for: - - - providers - - root CSS - - browser state - - dates, and if the app logic depends on them during render then always use \`mockdate\` - - Example with the \`mockdate\` package: - - ${getMockDateExample(projectInfo)} - - ### Step 3: Support portals with preview-body.html - - If the app uses portals, copy that setup into Storybook too. - - Look for patterns like: - - - \`createPortal(...)\` - - modal, dialog, drawer, popover, tooltip, toast, or dropdown portal components - - hard-coded roots such as \`#portal-root\`, \`#modal-root\`, \`#drawer-root\`, or \`#toast-root\` - - Example of what to copy: - - \`\`\`tsx - // real component - return createPortal(, document.getElementById("portal-root")!); - \`\`\` - - That means Storybook should create the same portal root in \`${configDir}/preview-body.html\`: - - \`\`\`html - -
- \`\`\` - - If the app uses multiple portal roots, create all of them there: - - \`\`\`html - - -
-
- \`\`\` - - If a library portals directly to \`document.body\`, do not add extra roots for it. - Make sure the copied page shell, CSS, and layout still allow overlays, fixed positioning, and z-index stacking to render correctly. - - ### Step 4: Mock side effects globally - - All network/data queries should be handled by the default Storybook environment. - - - Always use \`msw-storybook-addon\` for query mocking. - - If you introduce MSW, run \`npx msw init ./public --save\` to create the worker file. - - Make sure Storybook serves \`./public\` as a static dir so \`mockServiceWorker.js\` is available. - - Do not mock \`fetch\` directly. - - Network/data queries should return deterministic mock data. - - If you need to change dependencies, first check the lockfile and use that package manager for the change. - - Example of copying a real fetch pattern into shared handlers: - - \`\`\`ts - // real app hook - const response = await fetch( - apiBaseUrl + - "/products?" + - new URLSearchParams({ - page: "1", - sort: "featured", - }), - ); - \`\`\` - - \`\`\`ts - // ${configDir}/msw-handlers.ts - import { http, HttpResponse } from "msw"; - - export const mswHandlers = { - products: [ - http.get("https://api.example.com/products", () => - HttpResponse.json({ - items: [ - { - id: "product-1", - name: "Example product", - description: "Mock product description", - imageUrl: "https://images.example.com/product.jpg", - price: 42, - }, - ], - }), - ), - ], - }; - \`\`\` - - ${getMswPreviewExample(projectInfo)} - - \`\`\`ts - // ${configDir}/main.ts - import type { StorybookConfig } from "${typeImport}"; - - const config: StorybookConfig = { - staticDirs: ["../public"], - }; - - export default config; - \`\`\` - - Keep these mocks global. - Do not put fetch mocks in individual stories. - Only add handlers for requests that the shared preview setup or the stories actually use. - Do not add catch-all handlers that can hide unrelated failures. - If the defaults are not enough, improve the shared default setup instead. - Seed browser state when needed, but do not mock \`window\`, \`document\`, \`navigator\`, observers, or similar runtime APIs. - The only exception is \`mockdate\` when date-based rendering exists. - - ### Step 5: Write stories - - Try to find around 10 good candidate components for story files. - Write colocated stories for top-level components, from low-level reusable components up to page components. - Write up to 10 story files, or fewer only if the codebase clearly has fewer meaningful targets. - - The stories should use JSX copied from real usage patterns in: - - - pages - - app shells - - routes - - tests - - existing feature code - - As a rule of thumb, each story file should have around 3 story exports when the component or page has enough meaningful states. - It can have more when the real usage supports it, up to 10 story exports in one file. - - Always show all imports explicitly in story and preview files. - Do not rely on omitted or implied imports in examples or generated code. - - #### Story tags - - Every story meta must include the \`ai-generated\` tag to identify AI-created stories: - - ${getStoryExample(projectInfo)} - - If a story could not be fully fixed after the self-healing loop (the test still fails - or the rendering is incomplete), add the \`needs-work\` tag alongside \`ai-generated\`: - - ${getNeedsWorkTagExample(projectInfo)} - - Keep app mocking and runtime setup in preview, not in the stories. - Do not build large story-specific harnesses. - Do not write story files for subcomponents, hooks, contexts, or helpers. - Do not create new application components. - Do not add a custom \`title\`. - Do not stop after only a few easy targets if the codebase has more meaningful components or pages available. - - ### Step 6: Write a play function for every story - - Every named story export must have a \`play\` function. - The \`play\` function is not optional, even for simple stories. - - The purpose of the \`play\` function is to prove that the story actually works in the copied Storybook environment: - - - the story renders something real and non-empty - - the decorators provide the needed context - - the CSS is applied well enough for the intended state to be visible - - the MSW mocks or seeded browser state are actually being used - - important interactions, async loading states, and portals behave correctly - - Use \`play\` functions to verify behavior, not just to click around. - A story without assertions is incomplete. - - Use tools from \`storybook/test\` such as: - - - \`expect\` - - \`waitFor\` - - Prefer \`canvas\` and \`userEvent\` from the \`play\` context. - Do not destructure \`canvasElement\` just to create \`const canvas = within(canvasElement)\`. - Do not import \`userEvent\` from \`storybook/test\`; use \`userEvent\` from the \`play\` context instead. - Only use \`canvasElement.ownerDocument\` when you need to query outside the canvas, such as for portals. - - Example: - - \`\`\`tsx - import type { StoryObj } from "${typeImport}"; - - export const FilledForm: Story = { - play: async ({ canvas, userEvent }) => { - const emailInput = canvas.getByLabelText("email", { - selector: "input", - }); - - await userEvent.type(emailInput, "example-email@email.com", { - delay: 100, - }); - - const passwordInput = canvas.getByLabelText("password", { - selector: "input", - }); - - await userEvent.type(passwordInput, "ExamplePassword", { - delay: 100, - }); - - const submitButton = canvas.getByRole("button"); - await userEvent.click(submitButton); - }, - }; - \`\`\` - - The assertions should match the real pattern you copied: - - - for provider-backed stories, assert the provider-dependent UI appears correctly - - for mocked-data stories, wait for the mocked data to appear and assert on it - - for CSS-sensitive states, assert on visibility, text layout, class-driven states, or meaningful computed styles - - for routing or navigation stories, assert the routed state or navigation outcome - - for portal stories, query from \`canvasElement.ownerDocument\` when the UI renders outside the canvas - - Examples of useful checks: - - - a themed button has the expected label and is visibly enabled or disabled - - a modal opened through a decorator or provider is visible in the portal root - - mocked API data appears in the page instead of a loading spinner forever - - a selected tab actually shows the selected panel - - a toast, alert, or badge has the expected accessible text and visual state - - a CSS class or computed style confirms the real state that matters - - ### Step 7: Cover the patterns you found - - Write stories for the real patterns in the codebase, for example: - - - a low-level reusable component in real JSX usage - - a provider-backed component - - a browser-state-backed component - - a fetched-data component - - a real page component - - Use \`App.tsx\` to inspect the real provider tree and usage patterns, but do not make a story for \`App\` when the codebase has actual page components. - - Example page story: - - ${getPageStoryExample(projectInfo)} - - ### Step 8: Verify both rendering and types - - As you work, verify the stories with Vitest: - - \`\`\`bash - npx vitest --project storybook - \`\`\` - - Also verify types so you catch missing required props, broken imports, and preview typing issues. Run the same TypeScript command the project itself uses. - - \`\`\`bash - - \`\`\` - - After verification passes, review every changed file and remove anything that is not needed for the final solution, especially debug fixes, overly broad mocks, unnecessary dependencies, and eval artifacts. - - Keep iterating until: - - - every story you wrote passes - - every story you wrote has a meaningful passing \`play\` function - - the changed stories and preview setup pass the project's real TypeScript check - - the rendered output looks sensible - - the default global mocked environment is strong enough that stories do not need manual fetch overrides - - stories no longer fail because the shared preview setup and story JSX are fixed - - all passing stories have \`tags: ['ai-generated']\` in their meta - - any stories that still need work have \`tags: ['ai-generated', 'needs-work']\` in their meta - `; -} - -function getDocsReferenceSection(projectInfo: ProjectInfo): string { - const docsUrl = (path: string) => getDocsMarkdownUrl(path, projectInfo); - - return dedent` - ### Storybook Documentation Reference - - Use the following references to look up Storybook APIs, concepts, or examples: - - - Full docs index: https://storybook.js.org/llms.txt - - See code snippets only with codeOnly=true param e.g. ${docsUrl('writing-stories')}&codeOnly=true - - Key documentation pages for this task: - - Writing stories: ${docsUrl('writing-stories')} - - Decorators: ${docsUrl('writing-stories/decorators')} - - Args: ${docsUrl('writing-stories/args')} - - Play functions: ${docsUrl('writing-stories/play-function')} - - Vitest integration: ${docsUrl('writing-tests/vitest-plugin')} - - Fetch these URLs directly when you need guidance on Storybook APIs or patterns. - `; -} - -function getPreviewConfigExample(projectInfo: ProjectInfo): string { - const configDir = projectInfo.configDir; - const typeImport = getTypeImportSource(projectInfo); - - if (projectInfo.hasCsfFactoryPreview) { - return dedent` - \`\`\`tsx - // ${configDir}/preview.tsx - import '../src/index.css'; // import global styles - import MockDate from 'mockdate'; - - import { definePreview } from 'storybook/preview'; - import { SessionProvider } from '../src/contexts/SessionContext'; - - export default definePreview({ - decorators: [ - (Story) => ( - - - - ), - ], - async beforeEach() { - localStorage.setItem('theme', 'dark'); - localStorage.setItem('sidebar:open', 'true'); - MockDate.set('2024-04-01T12:00:00Z'); - }, - }); - \`\`\` - `; - } - - return dedent` - \`\`\`tsx - // ${configDir}/preview.tsx - import type { Preview } from '${typeImport}'; - import MockDate from 'mockdate'; - import '../src/index.css'; // import global styles - import { SessionProvider } from '../src/contexts/SessionContext'; - - const preview: Preview = { - decorators: [ - (Story) => ( - - - - ), - ], - async beforeEach() { - localStorage.setItem('theme', 'dark'); - localStorage.setItem('sidebar:open', 'true'); - MockDate.set('2024-04-01T12:00:00Z'); - }, - }; - - export default preview; - \`\`\` - `; -} - -function getMockDateExample(projectInfo: ProjectInfo): string { - const typeImport = getTypeImportSource(projectInfo); - - if (projectInfo.hasCsfFactoryPreview) { - return dedent` - \`\`\`tsx - import MockDate from 'mockdate'; - import { definePreview } from 'storybook/preview'; - - export default definePreview({ - async beforeEach() { - MockDate.set('2024-04-01T12:00:00Z'); - }, - }); - \`\`\` - `; - } - - return dedent` - \`\`\`tsx - import type { Preview } from '${typeImport}'; - import MockDate from 'mockdate'; - - const preview: Preview = { - async beforeEach() { - MockDate.set('2024-04-01T12:00:00Z'); - }, - }; - - export default preview; - \`\`\` - `; -} - -function getMswPreviewExample(projectInfo: ProjectInfo): string { - const configDir = projectInfo.configDir; - const typeImport = getTypeImportSource(projectInfo); - - if (projectInfo.hasCsfFactoryPreview) { - return dedent` - \`\`\`tsx - // ${configDir}/preview.tsx - import { definePreview } from 'storybook/preview'; - import { initialize, mswLoader } from 'msw-storybook-addon'; - import { mswHandlers } from './msw-handlers'; - - initialize({ - onUnhandledRequest: 'bypass', - }); - - export default definePreview({ - loaders: [mswLoader], - parameters: { - msw: { - handlers: mswHandlers, - }, - }, - }); - \`\`\` - `; - } - - return dedent` - \`\`\`tsx - // ${configDir}/preview.tsx - import type { Preview } from '${typeImport}'; - import { initialize, mswLoader } from 'msw-storybook-addon'; - import { mswHandlers } from './msw-handlers'; - - initialize({ - onUnhandledRequest: 'bypass', - }); - - const preview: Preview = { - loaders: [mswLoader], - parameters: { - msw: { - handlers: mswHandlers, - }, - }, - }; - - export default preview; - \`\`\` - `; -} - -function getStoryExample(projectInfo: ProjectInfo): string { - if (projectInfo.hasCsfFactoryPreview) { - return dedent` - \`\`\`tsx - import preview from '#.storybook/preview'; - import { expect } from 'storybook/test'; - import { SomeComponent } from './SomeComponent'; - - const meta = preview.meta({ - component: SomeComponent, - tags: ['ai-generated'], - }); - - export const Default = meta.story({ - render: () => , - play: async ({ canvas }) => { - await expect(canvas.getByRole('button')).toBeVisible(); - }, - }); - \`\`\` - `; - } - - const typeImport = getTypeImportSource(projectInfo); - - return dedent` - \`\`\`tsx - import type { Meta, StoryObj } from '${typeImport}'; - import { expect } from 'storybook/test'; - import { SomeComponent } from './SomeComponent'; - - const meta = { - component: SomeComponent, - tags: ['ai-generated'], - } satisfies Meta; - - export default meta; - type Story = StoryObj; - - export const Default: Story = { - render: () => , - play: async ({ canvas }) => { - await expect(canvas.getByRole('button')).toBeVisible(); - }, - }; - \`\`\` - `; -} - -function getNeedsWorkTagExample(projectInfo: ProjectInfo): string { - if (projectInfo.hasCsfFactoryPreview) { - return dedent` - \`\`\`ts - const meta = preview.meta({ - component: SomeComponent, - tags: ['ai-generated', 'needs-work'], - }); - \`\`\` - `; - } - - return dedent` - \`\`\`ts - const meta = { - component: SomeComponent, - tags: ['ai-generated', 'needs-work'], - } satisfies Meta; - \`\`\` - `; -} - -function getPageStoryExample(projectInfo: ProjectInfo): string { - if (projectInfo.hasCsfFactoryPreview) { - return dedent` - \`\`\`tsx - import preview from '#.storybook/preview'; - import { expect } from 'storybook/test'; - import { ProductPage } from './ProductPage'; - - const meta = preview.meta({ - component: ProductPage, - tags: ['ai-generated'], - }); - - export const Default = meta.story({ - render: () => , - play: async ({ canvas }) => { - await expect( - canvas.getByRole('heading', { name: /products/i }), - ).toBeVisible(); - }, - }); - \`\`\` - `; - } - - const typeImport = getTypeImportSource(projectInfo); - - return dedent` - \`\`\`tsx - import type { Meta, StoryObj } from '${typeImport}'; - import { expect } from 'storybook/test'; - import { ProductPage } from './ProductPage'; - - const meta = { - component: ProductPage, - tags: ['ai-generated'], - } satisfies Meta; - - export default meta; - type Story = StoryObj; - - export const Default: Story = { - render: () => , - play: async ({ canvas }) => { - await expect( - canvas.getByRole('heading', { name: /products/i }), - ).toBeVisible(); - }, - }; - \`\`\` - `; -} +import type { ProjectInfo } from './types.ts'; +import { getPrompts } from './setup-prompts/index.ts'; function getProjectOverview(projectInfo: ProjectInfo): string { return dedent` @@ -720,10 +19,10 @@ function getProjectOverview(projectInfo: ProjectInfo): string { `; } -export function generateMarkdownOutput(projectInfo: ProjectInfo): { +export async function generateMarkdownOutput(projectInfo: ProjectInfo): Promise<{ markdown: string; -} { - const { prompts: aiPrompts } = getPrompts(projectInfo); +}> { + const { prompts: aiPrompts } = await getPrompts(projectInfo); const sections: string[] = []; diff --git a/code/lib/cli-storybook/src/ai/setup-prompts/index.ts b/code/lib/cli-storybook/src/ai/setup-prompts/index.ts new file mode 100644 index 000000000000..ab8d1a7c3905 --- /dev/null +++ b/code/lib/cli-storybook/src/ai/setup-prompts/index.ts @@ -0,0 +1,68 @@ +import type { AiPrompt, ProjectInfo } from '../types.ts'; + +import * as patternCopyPlay from './pattern-copy-play.ts'; + +/** + * Main prompt used currently in `npx storybook ai setup` command. If you promote a new prompt to be default, move this to the FORMERLY_USED_PROMPTS object below. + */ +const CURRENTLY_USED_PROMPT: Record string> = { + 'pattern-copy-play': patternCopyPlay.instructions, +}; + +/** + * Names of variants registered behind `EVAL_SETUP_PROMPT`. Loaded on demand + * from sibling files so the bundler can code-split them away from the + * default-only path that real users hit. + */ +const FORMERLY_USED_PROMPTS: Record Promise<(projectInfo: ProjectInfo) => string>> = { + setup: async () => (await import('./setup.ts')).instructions, +}; + +export type PromptName = string; + +/** Names available to the eval harness — defaults plus experimental variants. */ +export const PROMPT_NAMES: PromptName[] = [ + ...Object.keys(CURRENTLY_USED_PROMPT), + ...Object.keys(FORMERLY_USED_PROMPTS), +]; + +/** + * The single prompt variant that ships to real users. Running + * `npx storybook ai setup` without any overrides always produces this prompt. + */ +export const DEFAULT_PROMPT_NAME: PromptName = 'pattern-copy-play'; + +/** + * Internal env var read only by `getPrompts`. The eval harness sets this + * before spawning `ai setup` to select a non-default prompt variant for A/B + * comparison. Unknown values fall back to the default so a typo never breaks + * the CLI for real users. + */ +const EVAL_SETUP_PROMPT_ENV = 'EVAL_SETUP_PROMPT'; + +function resolvePromptName(): PromptName { + const requested = process.env[EVAL_SETUP_PROMPT_ENV]?.trim(); + if ( + requested && + (Object.hasOwn(CURRENTLY_USED_PROMPT, requested) || + Object.hasOwn(FORMERLY_USED_PROMPTS, requested)) + ) { + return requested; + } + return DEFAULT_PROMPT_NAME; +} + +export async function getPrompts(projectInfo: ProjectInfo): Promise<{ prompts: AiPrompt[] }> { + const name = resolvePromptName(); + const builder = CURRENTLY_USED_PROMPT[name] ?? (await FORMERLY_USED_PROMPTS[name]()); + + return { + prompts: [ + { + name, + description: 'Set up Storybook for success', + instructions: builder(projectInfo), + }, + ], + }; +} diff --git a/code/lib/cli-storybook/src/ai/setup-prompts/pattern-copy-play.ts b/code/lib/cli-storybook/src/ai/setup-prompts/pattern-copy-play.ts new file mode 100644 index 000000000000..a8abe0493f15 --- /dev/null +++ b/code/lib/cli-storybook/src/ai/setup-prompts/pattern-copy-play.ts @@ -0,0 +1,870 @@ +/** + * Prompt variant: `pattern-copy-play` (current default for `npx storybook ai setup`) + * + * - Created: 2026-04-22 (eval iteration 2, default since this PR) + * - Status: shipping default — produced by every `ai setup` invocation + * without `EVAL_SETUP_PROMPT` set. + * - Reference eval results: + * https://github.com/search?q=is:pr label:"prompt:pattern-copy-play" org:storybook-tmp&type=pullrequests + * + * Update this header when iterating: bump the iteration number and link the + * latest eval run so reviewers can compare variants without spelunking git. + */ +import { dedent } from 'ts-dedent'; + +import type { ProjectInfo } from '../types.ts'; + +/** + * Builds a markdown-format docs URL with renderer and language query parameters. + * Appending .md to any Storybook docs URL returns clean markdown with code examples. + */ +function getDocsMarkdownUrl( + path: string, + projectInfo?: Pick +): string { + const { majorVersion, renderer = 'react', language = 'ts' } = projectInfo ?? {}; + const versionSegment = majorVersion ? `/${majorVersion}` : ''; + const params = new URLSearchParams(); + if (renderer) { + params.set('renderer', renderer); + } + params.set('language', language); + const query = params.toString(); + return `https://storybook.js.org/docs${versionSegment}/${path}.md${query ? `?${query}` : ''}`; +} + +function getTypeImportSource(projectInfo: ProjectInfo): string { + return projectInfo.framework || projectInfo.rendererPackage || '@storybook/react'; +} + +function getDocsReferenceSection(projectInfo: ProjectInfo): string { + const docsUrl = (path: string) => getDocsMarkdownUrl(path, projectInfo); + + return dedent` + ### Storybook Documentation Reference + + Use the following references to look up Storybook APIs, concepts, or examples: + + - Full docs index: https://storybook.js.org/llms.txt + - See code snippets only with codeOnly=true param e.g. ${docsUrl('writing-stories')}&codeOnly=true + + Key documentation pages for this task: + - Writing stories: ${docsUrl('writing-stories')} + - Decorators: ${docsUrl('writing-stories/decorators')} + - Args: ${docsUrl('writing-stories/args')} + - Play functions: ${docsUrl('writing-stories/play-function')} + - Vitest integration: ${docsUrl('writing-tests/vitest-plugin')} + + Fetch these URLs directly when you need guidance on Storybook APIs or patterns. + `; +} + +function getPreviewConfigExample(projectInfo: ProjectInfo): string { + const configDir = projectInfo.configDir; + const typeImport = getTypeImportSource(projectInfo); + + if (projectInfo.hasCsfFactoryPreview) { + return dedent` + \`\`\`tsx + // ${configDir}/preview.tsx + import '../src/index.css'; // import global styles + import MockDate from 'mockdate'; + + import { definePreview } from 'storybook/preview'; + import { SessionProvider } from '../src/contexts/SessionContext'; + + export default definePreview({ + decorators: [ + (Story) => ( + + + + ), + ], + async beforeEach() { + localStorage.setItem('theme', 'dark'); + localStorage.setItem('sidebar:open', 'true'); + MockDate.set('2024-04-01T12:00:00Z'); + }, + }); + \`\`\` + `; + } + + return dedent` + \`\`\`tsx + // ${configDir}/preview.tsx + import type { Preview } from '${typeImport}'; + import MockDate from 'mockdate'; + import '../src/index.css'; // import global styles + import { SessionProvider } from '../src/contexts/SessionContext'; + + const preview: Preview = { + decorators: [ + (Story) => ( + + + + ), + ], + async beforeEach() { + localStorage.setItem('theme', 'dark'); + localStorage.setItem('sidebar:open', 'true'); + MockDate.set('2024-04-01T12:00:00Z'); + }, + }; + + export default preview; + \`\`\` + `; +} + +function getMockDateExample(projectInfo: ProjectInfo): string { + const typeImport = getTypeImportSource(projectInfo); + + if (projectInfo.hasCsfFactoryPreview) { + return dedent` + \`\`\`tsx + import MockDate from 'mockdate'; + import { definePreview } from 'storybook/preview'; + + export default definePreview({ + async beforeEach() { + MockDate.set('2024-04-01T12:00:00Z'); + }, + }); + \`\`\` + `; + } + + return dedent` + \`\`\`tsx + import type { Preview } from '${typeImport}'; + import MockDate from 'mockdate'; + + const preview: Preview = { + async beforeEach() { + MockDate.set('2024-04-01T12:00:00Z'); + }, + }; + + export default preview; + \`\`\` + `; +} + +function getMswPreviewExample(projectInfo: ProjectInfo): string { + const configDir = projectInfo.configDir; + const typeImport = getTypeImportSource(projectInfo); + + if (projectInfo.hasCsfFactoryPreview) { + return dedent` + \`\`\`tsx + // ${configDir}/preview.tsx + import { definePreview } from 'storybook/preview'; + import { initialize, mswLoader } from 'msw-storybook-addon'; + import { mswHandlers } from './msw-handlers'; + + initialize({ + onUnhandledRequest: 'bypass', + }); + + export default definePreview({ + loaders: [mswLoader], + parameters: { + msw: { + handlers: mswHandlers, + }, + }, + }); + \`\`\` + `; + } + + return dedent` + \`\`\`tsx + // ${configDir}/preview.tsx + import type { Preview } from '${typeImport}'; + import { initialize, mswLoader } from 'msw-storybook-addon'; + import { mswHandlers } from './msw-handlers'; + + initialize({ + onUnhandledRequest: 'bypass', + }); + + const preview: Preview = { + loaders: [mswLoader], + parameters: { + msw: { + handlers: mswHandlers, + }, + }, + }; + + export default preview; + \`\`\` + `; +} + +function getStoryExample(projectInfo: ProjectInfo): string { + if (projectInfo.hasCsfFactoryPreview) { + return dedent` + \`\`\`tsx + import preview from '#.storybook/preview'; + import { expect } from 'storybook/test'; + import { SomeComponent } from './SomeComponent'; + + const meta = preview.meta({ + component: SomeComponent, + tags: ['ai-generated'], + }); + + export const Default = meta.story({ + render: () => , + play: async ({ canvas }) => { + await expect(canvas.getByRole('button')).toBeVisible(); + }, + }); + \`\`\` + `; + } + + const typeImport = getTypeImportSource(projectInfo); + + return dedent` + \`\`\`tsx + import type { Meta, StoryObj } from '${typeImport}'; + import { expect } from 'storybook/test'; + import { SomeComponent } from './SomeComponent'; + + const meta = { + component: SomeComponent, + tags: ['ai-generated'], + } satisfies Meta; + + export default meta; + type Story = StoryObj; + + export const Default: Story = { + render: () => , + play: async ({ canvas }) => { + await expect(canvas.getByRole('button')).toBeVisible(); + }, + }; + \`\`\` + `; +} + +function getNeedsWorkTagExample(projectInfo: ProjectInfo): string { + if (projectInfo.hasCsfFactoryPreview) { + return dedent` + \`\`\`ts + const meta = preview.meta({ + component: SomeComponent, + tags: ['ai-generated', 'needs-work'], + }); + \`\`\` + `; + } + + return dedent` + \`\`\`ts + const meta = { + component: SomeComponent, + tags: ['ai-generated', 'needs-work'], + } satisfies Meta; + \`\`\` + `; +} + +function getArgsStoryExample(projectInfo: ProjectInfo): string { + if (projectInfo.hasCsfFactoryPreview) { + return dedent` + \`\`\`tsx + import preview from '#.storybook/preview'; + import { expect } from 'storybook/test'; + import { Button } from './Button'; + + const meta = preview.meta({ + component: Button, + tags: ['ai-generated'], + }); + + export const Primary = meta.story({ + args: { + variant: 'primary', + children: 'Save', + }, + play: async ({ canvas }) => { + await expect(canvas.getByRole('button', { name: /save/i })).toBeVisible(); + }, + }); + + export const Disabled = meta.story({ + args: { + variant: 'primary', + disabled: true, + children: 'Save', + }, + play: async ({ canvas }) => { + await expect(canvas.getByRole('button')).toBeDisabled(); + }, + }); + \`\`\` + `; + } + + const typeImport = getTypeImportSource(projectInfo); + + return dedent` + \`\`\`tsx + import type { Meta, StoryObj } from '${typeImport}'; + import { expect } from 'storybook/test'; + import { Button } from './Button'; + + const meta = { + component: Button, + tags: ['ai-generated'], + } satisfies Meta; + + export default meta; + type Story = StoryObj; + + export const Primary: Story = { + args: { + variant: 'primary', + children: 'Save', + }, + play: async ({ canvas }) => { + await expect(canvas.getByRole('button', { name: /save/i })).toBeVisible(); + }, + }; + + export const Disabled: Story = { + args: { + variant: 'primary', + disabled: true, + children: 'Save', + }, + play: async ({ canvas }) => { + await expect(canvas.getByRole('button')).toBeDisabled(); + }, + }; + \`\`\` + `; +} + +function getRenderCompositionExample(projectInfo: ProjectInfo): string { + if (projectInfo.hasCsfFactoryPreview) { + return dedent` + \`\`\`tsx + import preview from '#.storybook/preview'; + import { expect } from 'storybook/test'; + import { Button } from './Button'; + import { Card } from './Card'; + + const meta = preview.meta({ + component: Button, + tags: ['ai-generated'], + }); + + export const InsideCard = meta.story({ + render: () => ( + + + + ), + play: async ({ canvas, userEvent }) => { + await expect(canvas.getByRole('button', { name: /save/i })).toBeVisible(); + await userEvent.click(canvas.getByRole('button', { name: /save/i })); + }, + }); + \`\`\` + `; + } + + const typeImport = getTypeImportSource(projectInfo); + + return dedent` + \`\`\`tsx + import type { Meta, StoryObj } from '${typeImport}'; + import { expect } from 'storybook/test'; + import { Button } from './Button'; + import { Card } from './Card'; + + const meta = { + component: Button, + tags: ['ai-generated'], + } satisfies Meta; + + export default meta; + type Story = StoryObj; + + export const InsideCard: Story = { + render: () => ( + + + + ), + play: async ({ canvas, userEvent }) => { + await expect(canvas.getByRole('button', { name: /save/i })).toBeVisible(); + await userEvent.click(canvas.getByRole('button', { name: /save/i })); + }, + }; + \`\`\` + `; +} + +function getPageStoryExample(projectInfo: ProjectInfo): string { + if (projectInfo.hasCsfFactoryPreview) { + return dedent` + \`\`\`tsx + import preview from '#.storybook/preview'; + import { expect } from 'storybook/test'; + import { ProductPage } from './ProductPage'; + + const meta = preview.meta({ + component: ProductPage, + tags: ['ai-generated'], + }); + + export const Default = meta.story({ + render: () => , + play: async ({ canvas }) => { + await expect( + canvas.getByRole('heading', { name: /products/i }), + ).toBeVisible(); + }, + }); + \`\`\` + `; + } + + const typeImport = getTypeImportSource(projectInfo); + + return dedent` + \`\`\`tsx + import type { Meta, StoryObj } from '${typeImport}'; + import { expect } from 'storybook/test'; + import { ProductPage } from './ProductPage'; + + const meta = { + component: ProductPage, + tags: ['ai-generated'], + } satisfies Meta; + + export default meta; + type Story = StoryObj; + + export const Default: Story = { + render: () => , + play: async ({ canvas }) => { + await expect( + canvas.getByRole('heading', { name: /products/i }), + ).toBeVisible(); + }, + }; + \`\`\` + `; +} + +export function instructions(projectInfo: ProjectInfo): string { + const configDir = projectInfo.configDir; + const typeImport = getTypeImportSource(projectInfo); + + return dedent` + Attention: The following instructions must be followed in order to successfully set up Storybook in this project. Do not skip steps or attempt to do them out of order. + + Your goal is to make Storybook fully functional in this project by analyzing the codebase, + configuring the preview with the right decorators, and writing stories for some components. + + The end state should be a Storybook where any component — from a small button to a full page — can be added without story-specific workarounds. All necessary providers, CSS, browser state, and network mocks should live in the shared preview so that just rendering the component in the story is enough. + + After each created story, run Vitest to verify it renders. + If the test fails, read the error, fix the issue, and re-run until it passes before moving on. + + - Copy real patterns from the codebase + - Keep the app code unchanged + - Put the default setup in \`${configDir}/preview.tsx\` + - Keep app mocking and runtime setup in \`${configDir}/preview.tsx\`, not in the stories + + ${getDocsReferenceSection(projectInfo)} + + ### Step 1: Analyze the codebase + + Read enough of the app to understand the full runtime environment before writing any stories. + + Do not stop at \`main.tsx\` or \`App.tsx\`. + Follow imports into providers, pages, hooks, and shared components until you know: + + - which providers exist + - which CSS files are injected + - which queries fetch data + - which browser-state reads happen + - which portals and portal roots exist + - which pages and components show the real usage patterns + + Example of what to copy: + + \`\`\`tsx + // src/main.tsx + import "./index.css"; + import App from "./App"; + import { SessionProvider } from "./contexts/SessionContext"; + + createRoot(document.getElementById("root")!).render( + + + , + ); + \`\`\` + + That means Storybook should copy: + + - the \`index.css\` import + - the \`SessionProvider\` + - the same provider order + + Example of tracing the app deeper: + + \`\`\`tsx + // src/App.tsx + function App() { + const { products, loadMoreProducts } = useProducts(); + const { currentUser, signOut } = useSession(); + // ... + } + \`\`\` + + \`\`\`ts + // src/hooks/useProducts.ts + const response = await fetch(apiBaseUrl + "/products?page=1"); + \`\`\` + + \`\`\`ts + // src/hooks/useTheme.ts + const savedTheme = localStorage.getItem("theme"); + \`\`\` + + That means the default Storybook setup should discover and prepare: + + - provider state + - MSW handlers for queries + - browser-state values that are actually read during render + + ### Step 2: Build one default app environment in preview + + Set up Storybook once so most stories work without story-specific setup. + + Start with the smallest faithful environment: + + - the real provider tree + - the real root CSS + - seeded browser state if the app reads it during render + - MSW for network/data queries + + It is fine to seed browser state such as \`localStorage\`, \`sessionStorage\`, and cookies when the app reads them during render. + Seed only the specific app-owned keys and values you need. + Do not clear all \`localStorage\`, \`sessionStorage\`, or cookies, and do not reset Storybook's own state. + Do not mock or redefine the browser runtime itself. + The stories run in Vitest browser mode, so the real browser environment should already exist. + + ${getPreviewConfigExample(projectInfo)} + + Use this same idea for: + + - providers + - root CSS + - browser state + - dates, and if the app logic depends on them during render then always use \`mockdate\` + + Example with the \`mockdate\` package: + + ${getMockDateExample(projectInfo)} + + ### Step 3: Support portals with preview-body.html + + If the app uses portals, copy that setup into Storybook too. + + Look for patterns like: + + - \`createPortal(...)\` + - modal, dialog, drawer, popover, tooltip, toast, or dropdown portal components + - hard-coded roots such as \`#portal-root\`, \`#modal-root\`, \`#drawer-root\`, or \`#toast-root\` + + Example of what to copy: + + \`\`\`tsx + // real component + return createPortal(, document.getElementById("portal-root")!); + \`\`\` + + That means Storybook should create the same portal root in \`${configDir}/preview-body.html\`: + + \`\`\`html + +
+ \`\`\` + + If the app uses multiple portal roots, create all of them there: + + \`\`\`html + + +
+
+ \`\`\` + + If a library portals directly to \`document.body\`, do not add extra roots for it. + Make sure the copied page shell, CSS, and layout still allow overlays, fixed positioning, and z-index stacking to render correctly. + + ### Step 4: Mock side effects globally + + All network/data queries should be handled by the default Storybook environment. + + - Always use \`msw-storybook-addon\` for query mocking. + - If you introduce MSW, run \`npx msw init ./public --save\` to create the worker file. + - Make sure Storybook serves \`./public\` as a static dir so \`mockServiceWorker.js\` is available. + - Do not mock \`fetch\` directly. + - Network/data queries should return deterministic mock data. + - If you need to change dependencies, first check the lockfile and use that package manager for the change. + + Example of copying a real fetch pattern into shared handlers: + + \`\`\`ts + // real app hook + const response = await fetch( + apiBaseUrl + + "/products?" + + new URLSearchParams({ + page: "1", + sort: "featured", + }), + ); + \`\`\` + + \`\`\`ts + // ${configDir}/msw-handlers.ts + import { http, HttpResponse } from "msw"; + + export const mswHandlers = { + products: [ + http.get("https://api.example.com/products", () => + HttpResponse.json({ + items: [ + { + id: "product-1", + name: "Example product", + description: "Mock product description", + imageUrl: "https://images.example.com/product.jpg", + price: 42, + }, + ], + }), + ), + ], + }; + \`\`\` + + ${getMswPreviewExample(projectInfo)} + + \`\`\`ts + // ${configDir}/main.ts + import type { StorybookConfig } from "${typeImport}"; + + const config: StorybookConfig = { + staticDirs: ["../public"], + }; + + export default config; + \`\`\` + + Keep these mocks global. + Do not put fetch mocks in individual stories. + Only add handlers for requests that the shared preview setup or the stories actually use. + Do not add catch-all handlers that can hide unrelated failures. + If the defaults are not enough, improve the shared default setup instead. + Seed browser state when needed, but do not mock \`window\`, \`document\`, \`navigator\`, observers, or similar runtime APIs. + The only exception is \`mockdate\` when date-based rendering exists. + + ### Step 5: Write stories + + Try to find around 10 good candidate components for story files. + Write colocated stories for top-level components, from low-level reusable components up to page components. + Write up to 10 story files, or fewer only if the codebase clearly has fewer meaningful targets. + + The stories should use JSX copied from real usage patterns in: + + - pages + - app shells + - routes + - tests + - existing feature code + + As a rule of thumb, each story file should have around 3 story exports when the component or page has enough meaningful states. + It can have more when the real usage supports it, up to 10 story exports in one file. + + Always show all imports explicitly in story and preview files. + Do not rely on omitted or implied imports in examples or generated code. + + #### Story tags + + Every story meta must include the \`ai-generated\` tag to identify AI-created stories: + + ${getStoryExample(projectInfo)} + + If a story could not be fully fixed after the self-healing loop (the test still fails + or the rendering is incomplete), add the \`needs-work\` tag alongside \`ai-generated\`: + + ${getNeedsWorkTagExample(projectInfo)} + + #### Args vs render + + For simple components where props drive the state, prefer \`args\` stories — no \`render\` function needed: + + ${getArgsStoryExample(projectInfo)} + + Use \`render\` when the story needs composition — wrapping the component in layout, combining multiple components, or passing children as JSX: + + ${getRenderCompositionExample(projectInfo)} + + Keep app mocking and runtime setup in preview, not in the stories. + Do not build large story-specific harnesses. + Do not write story files for subcomponents, hooks, contexts, or helpers. + Do not create new application components. + Do not add a custom \`title\`. + Do not stop after only a few easy targets if the codebase has more meaningful components or pages available. + + ### Step 6: Write a play function for every story + + Every named story export must have a \`play\` function. + The \`play\` function is not optional, even for simple stories. + + The purpose of the \`play\` function is to prove that the story actually works in the copied Storybook environment: + + - the story renders something real and non-empty + - the decorators provide the needed context + - the CSS is applied well enough for the intended state to be visible + - the MSW mocks or seeded browser state are actually being used + - important interactions, async loading states, and portals behave correctly + + Use \`play\` functions to verify behavior, not just to click around. + A story without assertions is incomplete. + + Use tools from \`storybook/test\` such as: + + - \`expect\` + - \`waitFor\` + + Prefer \`canvas\` and \`userEvent\` from the \`play\` context. + Do not destructure \`canvasElement\` just to create \`const canvas = within(canvasElement)\`. + Do not import \`userEvent\` from \`storybook/test\`; use \`userEvent\` from the \`play\` context instead. + Only use \`canvasElement.ownerDocument\` when you need to query outside the canvas, such as for portals. + + Example: + + \`\`\`tsx + import type { StoryObj } from "${typeImport}"; + + export const FilledForm: Story = { + play: async ({ canvas, userEvent }) => { + const emailInput = canvas.getByLabelText("email", { + selector: "input", + }); + + await userEvent.type(emailInput, "example-email@email.com", { + delay: 100, + }); + + const passwordInput = canvas.getByLabelText("password", { + selector: "input", + }); + + await userEvent.type(passwordInput, "ExamplePassword", { + delay: 100, + }); + + const submitButton = canvas.getByRole("button"); + await userEvent.click(submitButton); + }, + }; + \`\`\` + + The assertions should match the real pattern you copied: + + - for provider-backed stories, assert the provider-dependent UI appears correctly + - for mocked-data stories, wait for the mocked data to appear and assert on it + - for CSS-sensitive states, assert on visibility, text layout, class-driven states, or meaningful computed styles + - for routing or navigation stories, assert the routed state or navigation outcome + - for portal stories, query from \`canvasElement.ownerDocument\` when the UI renders outside the canvas + + Examples of useful checks: + + - a themed button has the expected label and is visibly enabled or disabled + - a modal opened through a decorator or provider is visible in the portal root + - mocked API data appears in the page instead of a loading spinner forever + - a selected tab actually shows the selected panel + - a toast, alert, or badge has the expected accessible text and visual state + - a CSS class or computed style confirms the real state that matters + + ### Step 7: Prove CSS is loaded in exactly one story named \`CssCheck\` + + In exactly one story, named \`CssCheck\`, assert a component-specific computed style. \`toBeVisible\` passes on an unstyled component; a concrete style value proves the shared preview loaded the app's CSS. + + Pick a visually distinctive component, read a styling value from its source, and assert it with \`getComputedStyle\`: + + \`\`\`tsx + export const CssCheck: Story = { + args: { children: "Submit" }, + play: async ({ canvas }) => { + const button = canvas.getByRole("button", { name: /submit/i }); + // PrimaryButton uses bg-blue-600 — fails if Tailwind / global CSS did not load. + await expect(getComputedStyle(button).backgroundColor).toBe("rgb(37, 99, 235)"); + }, + }; + \`\`\` + + ### Step 8: Cover the patterns you found + + Write stories for the real patterns in the codebase, for example: + + - a low-level reusable component in real JSX usage + - a provider-backed component + - a browser-state-backed component + - a fetched-data component + - a real page component + + Use \`App.tsx\` to inspect the real provider tree and usage patterns, but do not make a story for \`App\` when the codebase has actual page components. + + Example page story: + + ${getPageStoryExample(projectInfo)} + + ### Step 9: Verify both rendering and types + + As you work, verify the stories with Vitest: + + \`\`\`bash + npx vitest --project storybook + \`\`\` + + Also verify types so you catch missing required props, broken imports, and preview typing issues. Run the same TypeScript command the project itself uses. + + \`\`\`bash + + \`\`\` + + After verification passes, review every changed file and remove anything that is not needed for the final solution, especially debug fixes, overly broad mocks, unnecessary dependencies, and eval artifacts. + + Keep iterating until: + + - every story you wrote passes + - every story you wrote has a meaningful passing \`play\` function + - the changed stories and preview setup pass the project's real TypeScript check + - the rendered output looks sensible + - the default global mocked environment is strong enough that stories do not need manual fetch overrides + - stories no longer fail because the shared preview setup and story JSX are fixed + - all passing stories have \`tags: ['ai-generated']\` in their meta + - any stories that still need work have \`tags: ['ai-generated', 'needs-work']\` in their meta + `; +} diff --git a/code/lib/cli-storybook/src/ai/setup-prompts/setup.ts b/code/lib/cli-storybook/src/ai/setup-prompts/setup.ts new file mode 100644 index 000000000000..916028087d38 --- /dev/null +++ b/code/lib/cli-storybook/src/ai/setup-prompts/setup.ts @@ -0,0 +1,295 @@ +/** + * Prompt variant: `setup` + * + * - Created: 2026-04-15 (eval iteration 1, baseline before `pattern-copy-play`) + * - Status: experimental — not the default. Selected only when the eval + * harness sets `EVAL_SETUP_PROMPT=setup`. + * - Reference eval results: + * https://github.com/search?q=is:pr label:"prompt:setup" org:storybook-tmp&type=pullrequests + * + * Update this header when iterating: bump the iteration number and link the + * latest eval run so reviewers can compare variants without spelunking git. + */ +import { dedent } from 'ts-dedent'; + +import type { ProjectInfo } from '../types.ts'; + +function getTypeImportSource(projectInfo: ProjectInfo): string { + return projectInfo.framework || projectInfo.rendererPackage || '@storybook/react'; +} + +function getPreviewDecoratorExample(projectInfo: ProjectInfo): string { + const configDir = projectInfo.configDir; + + if (projectInfo.hasCsfFactoryPreview) { + return dedent` + \`\`\`tsx + // ${configDir}/preview.tsx + import '../src/index.css'; // import global styles + + import { definePreview } from 'storybook/preview'; + + export default definePreview({ + decorators: [ + (Story) => ( + + + + + + ), + ], + }); + \`\`\` + `; + } + + const typeImport = getTypeImportSource(projectInfo); + + return dedent` + \`\`\`tsx + // ${configDir}/preview.tsx + import type { Preview } from '${typeImport}'; + import '../src/index.css'; // import global styles + + const preview: Preview = { + decorators: [ + (Story) => ( + + + + + + ), + ], + }; + + export default preview; + \`\`\` + `; +} + +function getSimpleStoryExample(projectInfo: ProjectInfo): string { + if (projectInfo.hasCsfFactoryPreview) { + return dedent` + \`\`\`tsx + import preview from '#.storybook/preview'; + import { Button } from './Button'; + + const meta = preview.meta({ + title: 'AI Generated/Simple/Button', + component: Button, + tags: ['ai-generated'], + }); + + export const Default = meta.story({ + args: { + label: 'Click me', + }, + }); + + export const Disabled = meta.story({ + args: { + label: 'Disabled', + disabled: true, + }, + }); + \`\`\` + `; + } + + const typeImport = getTypeImportSource(projectInfo); + + return dedent` + \`\`\`tsx + import type { Meta, StoryObj } from '${typeImport}'; + import { Button } from './Button'; + + const meta = { + title: 'AI Generated/Simple/Button', + component: Button, + tags: ['ai-generated'], + } satisfies Meta; + + export default meta; + type Story = StoryObj; + + export const Default: Story = { + args: { + label: 'Click me', + }, + }; + + export const Disabled: Story = { + args: { + label: 'Disabled', + disabled: true, + }, + }; + \`\`\` + `; +} + +function getNeedsWorkTagExample(projectInfo: ProjectInfo): string { + if (projectInfo.hasCsfFactoryPreview) { + return dedent` + \`\`\`ts + const meta = preview.meta({ + title: 'AI Generated/Simple/Button', + component: Button, + tags: ['ai-generated', 'needs-work'], + }); + \`\`\` + `; + } + + return dedent` + \`\`\`ts + const meta = { + title: 'AI Generated/Simple/Button', + component: Button, + tags: ['ai-generated', 'needs-work'], + } satisfies Meta; + \`\`\` + `; +} + +export function instructions(projectInfo: ProjectInfo): string { + const configDir = projectInfo.configDir; + + return dedent` + Attention: The following instructions must be followed in order to successfully set up Storybook in this project. Do not skip steps or attempt to do them out of order. + + Your goal is to make Storybook fully functional in this project by analyzing the codebase, + configuring the preview with the right decorators, and writing example stories for 9 components. + + Work through these steps in order. After each story file, run Vitest to verify it renders. + If the test fails, read the error, fix the issue, and re-run until it passes before moving on. + + ### Step 1: Analyze the codebase + + Before writing any stories, understand what the components need to render: + + - Scan the project for context providers, theme systems, routers, stores, and i18n setups. + Look at the app's entry point (e.g. \`App.tsx\`, \`main.tsx\`, \`layout.tsx\`) to see what + providers wrap the component tree. + - Identify global CSS or style imports required for components to look correct. + - Note any path aliases configured in tsconfig or bundler config. + - Read \`${configDir}/main.ts\` (or \`main.js\`) to find the \`stories\` glob patterns. + Your story files must match those patterns to be picked up by Storybook. + + ### Step 2: Configure \`${configDir}/preview.tsx\` with decorators + + Add decorators that wrap every story with the providers your components need. + Without this, most non-trivial components will crash. + + ${getPreviewDecoratorExample(projectInfo)} + + Common decorators to add: + + - **Theme providers** (e.g. ThemeProvider, MUI ThemeProvider, styled-components, Tailwind) + - **Router** (e.g. MemoryRouter, BrowserRouter mock) + - **State stores** (e.g. Redux Provider, Zustand, Jotai) + - **i18n** (e.g. IntlProvider, I18nextProvider) + - **Global CSS** — import global stylesheets at the top of \`preview.tsx\` + + ### Step 3: Write stories for 9 components + + Pick 9 real components from the codebase, 3 of each complexity level. + Use the title prefix \`AI Generated//\` so they are grouped + together in the Storybook sidebar. + + **Simple (3 components)** — Presentational with few props, no internal state. + Examples: Button, Badge, Avatar, Icon, Label, Chip. + Title format: \`AI Generated/Simple/\` + + **Medium (3 components)** — Multiple visual variants or composed from simpler components. + Examples: Card, Alert, Input, Select, Tooltip, Tabs. + Title format: \`AI Generated/Medium/\` + + **Complex (3 components)** — Internal state, side effects, or deep composition. + Examples: Modal, DataTable, Form, Dropdown, Accordion, Sidebar. + Title format: \`AI Generated/Complex/\` + + For each component, create a \`.stories.tsx\` file next to the component. + Each file must have at least 2 story exports covering the component's main states. + Make sure the file location and naming matches the \`stories\` patterns in \`${configDir}/main.ts\`. + + #### Story tags + + Every story meta must include the \`ai-generated\` tag to identify AI-created stories: + + ${getSimpleStoryExample(projectInfo)} + + If a story could not be fully fixed after the self-healing loop (the test still fails + or the rendering is incomplete), add the \`needs-work\` tag alongside \`ai-generated\`: + + ${getNeedsWorkTagExample(projectInfo)} + + Rules: + + - Every named export is a story. Use \`args\` to set props. + - Provide all required props via \`args\` — check the component's types. + - If a component needs per-story decorators (beyond the global ones), add them in the meta. + - Do NOT use \`any\` types. Use the component's prop types for type safety. + + Reference: https://storybook.js.org/docs/writing-stories + + ### Step 4: Verify each story with Vitest + + After writing each story file, immediately verify it: + + \`\`\`bash + npx vitest --project storybook + \`\`\` + + **Self-healing loop — repeat for every story file:** + + 1. Write/update the story file + 2. Run \`npx vitest --project storybook \` + 3. If it fails: read the error output carefully + - Missing provider → add a decorator in \`${configDir}/preview.tsx\` or in the story meta + - Missing prop → add the required prop to \`args\` + - Import error → fix the import path + - CSS/asset error → add static dirs or import the stylesheet + 4. Fix the issue and go back to step 2 + 5. Once the test passes, move to the next component + + After all 9 story files pass individually, run the full suite: + + \`\`\`bash + npx vitest --project storybook + \`\`\` + + Once all stories pass, run a full Storybook build as a final check: + + \`\`\`bash + npx storybook build + \`\`\` + + If the build fails, fix the issue before finishing. + + Finally, run \`npx storybook doctor\` to check for common issues + (version mismatches, duplicated deps, etc.) and fix anything it reports. + + ### Checklist + + - [ ] Analyzed codebase for providers, global styles, and path aliases + - [ ] Read story patterns from \`${configDir}/main.ts\` + - [ ] Configured \`${configDir}/preview.tsx\` with necessary decorators + - [ ] Simple component 1: story written and passing + - [ ] Simple component 2: story written and passing + - [ ] Simple component 3: story written and passing + - [ ] Medium component 1: story written and passing + - [ ] Medium component 2: story written and passing + - [ ] Medium component 3: story written and passing + - [ ] Complex component 1: story written and passing + - [ ] Complex component 2: story written and passing + - [ ] Complex component 3: story written and passing + - [ ] Full Vitest suite passes: \`npx vitest --project storybook\` + - [ ] \`npx storybook build\` succeeds + - [ ] \`npx storybook doctor\` reports no remaining issues + - [ ] All passing stories have \`tags: ['ai-generated']\` in their meta + - [ ] Any stories that still need work have \`tags: ['ai-generated', 'needs-work']\` in their meta + `; +} diff --git a/code/lib/cli-storybook/src/ai/types.ts b/code/lib/cli-storybook/src/ai/types.ts index 3a9a1b52be28..93a3ea88aabd 100644 --- a/code/lib/cli-storybook/src/ai/types.ts +++ b/code/lib/cli-storybook/src/ai/types.ts @@ -4,6 +4,8 @@ export interface AiSetupOptions { configDir?: string; packageManager?: string; output?: string; + /** Populated from the program-level `--disable-telemetry` flag (defaults from `STORYBOOK_DISABLE_TELEMETRY`). */ + disableTelemetry?: boolean; } export interface ProjectInfo { diff --git a/scripts/eval/README.md b/scripts/eval/README.md index b98d8b7be0fd..6d983a94282f 100644 --- a/scripts/eval/README.md +++ b/scripts/eval/README.md @@ -4,7 +4,7 @@ The eval harness benchmarks how well AI coding agents (Claude, Codex) can set up ## Prerequisites -- `**gh` CLI** — installed and authenticated (`gh auth login`) +- **`gh` CLI** — installed and authenticated (`gh auth login`) - **Claude Code CLI** and/or **Codex CLI** — installed with an active subscription ## How it works @@ -13,7 +13,7 @@ The eval harness benchmarks how well AI coding agents (Claude, Codex) can set up The system forms a cycle: -1. `**sync-baselines.ts`** pushes a canonical `.storybook` config to each benchmark repo so every trial starts from the same known-good baseline. +1. `**sync-baselines.ts**` pushes a canonical `.storybook` config to each benchmark repo so every trial starts from the same known-good baseline. 2. `**eval.ts**` (single trial) or `**run-batch.ts**` (batch) creates a git worktree from a benchmark repo, runs an agent inside it, grades the output, and publishes a draft PR with structured result data. 3. `**collect-pr-data.ts**` scrapes those draft PRs via the GitHub API and loads the results into a local SQLite database for analysis. @@ -31,7 +31,7 @@ Each trial follows this lifecycle: All commands run from the repo root. ```sh -# Prompt file is required (scripts/eval/prompts/{name}.md). Example: pattern-copy-play +# Prompt variant is required. Example: pattern-copy-play (the CLI default) node scripts/eval/eval.ts -p mealdrop --prompt pattern-copy-play # Specific agent @@ -268,17 +268,39 @@ To benchmark a new app, register it in the harness and sync baselines. Follow th ## Prompts -Prompts are markdown files in `scripts/eval/prompts/` that tell the agent what to do during a trial. The `--prompt` flag selects one by filename (without `.md`). +The eval mirrors the real user flow exactly: + +1. A real user copies the "Set up Storybook with AI" prompt from the Storybook UI — a one-line nudge (`AI_SETUP_PROMPT`) that just says _"Run `npx storybook ai setup` and follow its instructions precisely."_ +2. The user pastes that into their AI agent. +3. The **agent** runs `npx storybook ai setup` itself as a tool call. +4. The agent reads the resulting project-aware markdown and follows it. + +The harness hands steps (1) and (2) to the trial agent as its task. Eval starts at step (3). + +### How variant selection works + +Prompt variants live in [`code/lib/cli-storybook/src/ai/prompts/`](../../code/lib/cli-storybook/src/ai/prompts/). Each variant is a self-contained `.ts` file that exports an `instructions(projectInfo)` function. The registry in `prompts/index.ts` lists every variant. + +The eval selects a variant by injecting the `EVAL_SETUP_PROMPT` env var into the agent's spawn environment. When the agent later runs `npx storybook ai setup`, the CLI reads that env var and returns the matching variant. Real users never set this env var, so they always get the default (`pattern-copy-play`). + +``` +eval.ts --prompt setup + → run-trial.ts calls driver.execute({ env: { EVAL_SETUP_PROMPT: 'setup' } }) + → agent spawns with that env + → agent's `npx storybook ai setup` tool call inherits EVAL_SETUP_PROMPT + → CLI's getPrompts() picks the 'setup' variant +``` ### Available prompts -- `**pattern-copy-play**` — analyze the codebase, copy real usage patterns, configure preview with providers and MSW mocks, write ~10 story files with play functions, verify each with Vitest. -- `**setup**` — structured step-by-step: analyze, configure preview, write 9 stories (3 simple / 3 medium / 3 complex), verify each with Vitest. +- `**pattern-copy-play**` _(default)_ — analyze the codebase, copy real usage patterns, configure preview with providers and MSW mocks, write ~10 story files with play functions, verify each with Vitest. This is the only prompt users ever see when they run `npx storybook ai setup`. +- `**setup**` — structured step-by-step: analyze, configure preview, write 9 stories (3 simple / 3 medium / 3 complex), verify each with Vitest. Available only to the eval harness for A/B comparison against the default. -### Writing a new prompt +### Adding a new prompt variant -1. Create a markdown file in `scripts/eval/prompts/`, e.g. `my-strategy.md`. -2. Write the instructions the agent should follow. The prompt is passed directly to the agent as its task. -3. Use it: `node scripts/eval/eval.ts -p mealdrop --prompt my-strategy` +1. Create `code/lib/cli-storybook/src/ai/prompts/.ts`. Make it fully self-contained — keep its own `getTypeImportSource`, code-example helpers, and any other private utilities so changing one variant can never accidentally change another. Duplication is deliberate here. +2. Export an `instructions(projectInfo: ProjectInfo): string` function. +3. Register it in `code/lib/cli-storybook/src/ai/prompts/index.ts` by adding an entry to `PROMPT_BUILDERS`. +4. Use it from the eval: `node scripts/eval/eval.ts -p mealdrop --prompt `. -The prompt should tell the agent how to analyze the codebase, configure `.storybook/preview.ts`, write story files matching the `stories` glob, and verify with `npx vitest --project storybook`. +To promote a variant to be the default users see, change `DEFAULT_PROMPT_NAME` in the same registry file. diff --git a/scripts/eval/eval.ts b/scripts/eval/eval.ts index e6348614065e..3e5d3b27fcff 100644 --- a/scripts/eval/eval.ts +++ b/scripts/eval/eval.ts @@ -31,7 +31,7 @@ import { } from './lib/agents/config.ts'; import { prepareTrial } from './lib/prepare-trial.ts'; import { PROJECTS } from './lib/projects.ts'; -import { runTrial, type TrialConfig } from './lib/run-trial.ts'; +import { captureAiSetupMarkdown, runTrial, type TrialConfig } from './lib/run-trial.ts'; import { captureEnvironment, createLogger, @@ -84,7 +84,7 @@ const argsSchema = z if (prompt === '') { ctx.addIssue({ code: z.ZodIssueCode.custom, - message: `Specify --prompt (markdown file in scripts/eval/prompts/). Example: --prompt ${EXAMPLE_PROMPT_BASENAME}. Run with --list-prompts to see available names.`, + message: `Specify --prompt . Example: --prompt ${EXAMPLE_PROMPT_BASENAME}. Run with --list-prompts to see available names.`, path: ['prompt'], }); } @@ -101,7 +101,7 @@ const evalOptions = { effort: { type: 'string' as const, short: 'e', description: 'Effort level' }, prompt: { type: 'string' as const, - description: `Prompt template name — required with -p (file: prompts/{name}.md; e.g. ${EXAMPLE_PROMPT_BASENAME})`, + description: `Prompt variant name — required with -p (e.g. ${EXAMPLE_PROMPT_BASENAME}). Use --list-prompts to see available names.`, }, verbose: { type: 'boolean' as const, short: 'v', description: 'Enable verbose output' }, manual: { @@ -192,12 +192,21 @@ if (args.manual) { const promptPath = join(workspace.resultsDir, 'prompt.md'); await writeFile(promptPath, prompt); - const cliCommand = buildManualCommand(variant, promptPath); + const setupPromptPath = join(workspace.resultsDir, 'setup-prompt.md'); + const setupPromptContent = await captureAiSetupMarkdown( + workspace.projectPath, + promptName, + logger + ); + await writeFile(setupPromptPath, setupPromptContent); + + const cliCommand = buildManualCommand(variant, promptPath, promptName); logger.log(pc.bold('\n── Manual mode ──')); - logger.log(`\n Trial dir: ${pc.cyan(workspace.trialDir)}`); - logger.log(` Project dir: ${pc.cyan(workspace.projectPath)}`); - logger.log(` Prompt file: ${pc.cyan(promptPath)}`); + logger.log(`\n Trial dir: ${pc.cyan(workspace.trialDir)}`); + logger.log(` Project dir: ${pc.cyan(workspace.projectPath)}`); + logger.log(` Prompt file: ${pc.cyan(promptPath)}`); + logger.log(` Setup prompt: ${pc.cyan(setupPromptPath)}`); logger.log(pc.bold('\nRun the agent yourself:\n')); logger.log(` ${pc.green('cd')} ${workspace.projectPath}`); logger.log(` ${pc.green(cliCommand)}\n`); @@ -241,13 +250,16 @@ function inferAgent(model: string): AgentId { throw new Error(`No agent found for model: ${model}`); } -function buildManualCommand(variant: AgentVariant, promptPath: string): string { +function buildManualCommand(variant: AgentVariant, promptPath: string, promptName: string): string { + // EVAL_SETUP_PROMPT must be in the env the agent inherits, so that the + // agent's own `npx storybook ai setup` tool call picks the right variant. + const envPrefix = `EVAL_SETUP_PROMPT=${promptName} `; const promptArg = `"$(cat ${promptPath})"`; if (variant.agent === 'claude') { const sdkModel = AGENTS.claude.sdkModelIds[variant.model] ?? variant.model; - return `claude --model ${sdkModel} ${promptArg}`; + return `${envPrefix}claude --model ${sdkModel} ${promptArg}`; } - return `codex --model ${variant.model} --reasoning-effort ${variant.effort} ${promptArg}`; + return `${envPrefix}codex --model ${variant.model} --reasoning-effort ${variant.effort} ${promptArg}`; } function toVariant(args: z.infer): AgentVariant { diff --git a/scripts/eval/lib/agents/claude-code.ts b/scripts/eval/lib/agents/claude-code.ts index a403abb1933e..7113a43d4416 100644 --- a/scripts/eval/lib/agents/claude-code.ts +++ b/scripts/eval/lib/agents/claude-code.ts @@ -13,7 +13,14 @@ import type { Logger } from '../utils.ts'; export const claudeAgent: AgentDriver = { name: 'claude', - async execute({ prompt, projectPath, variant, logger, verbose }): Promise { + async execute({ + prompt, + projectPath, + variant, + logger, + verbose, + env, + }): Promise { if (variant.agent !== 'claude') { throw new Error(`Claude driver received unsupported variant: ${variant.agent}`); } @@ -37,6 +44,7 @@ export const claudeAgent: AgentDriver = { cwd: projectPath, env: { ...process.env, + ...env, STORYBOOK_DISABLE_TELEMETRY: '1', }, allowedTools: [...settings.allowedTools], diff --git a/scripts/eval/lib/agents/codex.ts b/scripts/eval/lib/agents/codex.ts index ae10f2e659d0..c3a75c00e488 100644 --- a/scripts/eval/lib/agents/codex.ts +++ b/scripts/eval/lib/agents/codex.ts @@ -11,7 +11,14 @@ import { countLines } from '../output-preview.ts'; export const codexAgent: AgentDriver = { name: 'codex', - async execute({ prompt, projectPath, variant, logger, verbose }): Promise { + async execute({ + prompt, + projectPath, + variant, + logger, + verbose, + env, + }): Promise { if (variant.agent !== 'codex') { throw new Error(`Codex driver received unsupported variant: ${variant.agent}`); } @@ -23,6 +30,7 @@ export const codexAgent: AgentDriver = { const codex = new Codex({ env: { ...process.env, + ...env, STORYBOOK_DISABLE_TELEMETRY: '1', }, }); diff --git a/scripts/eval/lib/agents/config.ts b/scripts/eval/lib/agents/config.ts index 1f49fef48c7a..71ca3986b125 100644 --- a/scripts/eval/lib/agents/config.ts +++ b/scripts/eval/lib/agents/config.ts @@ -46,6 +46,14 @@ export interface AgentExecuteParams { resultsDir: string; logger: Logger; verbose?: boolean; + /** + * Extra env vars to forward to the agent's spawn. Merged on top of + * `process.env` and under the driver's fixed entries (e.g. + * `STORYBOOK_DISABLE_TELEMETRY`). Used by the harness to inject + * `EVAL_SETUP_PROMPT` so that the agent's own `npx storybook ai setup` + * tool call resolves to the selected prompt variant. + */ + env?: Record; } export interface AgentDriver { diff --git a/scripts/eval/lib/grade.ts b/scripts/eval/lib/grade.ts index c6b1bb1c9888..592171ec7086 100644 --- a/scripts/eval/lib/grade.ts +++ b/scripts/eval/lib/grade.ts @@ -180,6 +180,15 @@ export async function grade( logger, }); + const cssCheck = storyRenderRun.summary?.cssCheck ?? 'not-run'; + if (cssCheck === 'pass') { + logger.logSuccess('CssCheck story passed'); + } else if (cssCheck === 'fail') { + logger.logError('CssCheck story failed'); + } else { + logger.logError('CssCheck story missing or not run'); + } + const baselinePreviewRun = await withBaselinePreviewEnvironment({ repoRoot, baselineCommit, diff --git a/scripts/eval/lib/publish-trial.test.ts b/scripts/eval/lib/publish-trial.test.ts index db700424b8ad..b8eb583859aa 100644 --- a/scripts/eval/lib/publish-trial.test.ts +++ b/scripts/eval/lib/publish-trial.test.ts @@ -173,6 +173,7 @@ describe('publishTrialBranch', () => { total: 8, passed: 4, storyFiles: 3, + cssCheck: 'not-run' as const, }, buildSuccess: true, typeCheckErrors: 0, @@ -188,6 +189,7 @@ describe('publishTrialBranch', () => { total: 8, passed: 6, storyFiles: 3, + cssCheck: 'not-run' as const, }, }, score: { @@ -247,6 +249,7 @@ describe('publishTrialBranch', () => { expect(prBody).toContain('Ghost stories after: `3/4 (75%)`'); expect(prBody).toContain('Vitest pass rate before preview changes: `4/8 (50%)`'); expect(prBody).toContain('Vitest pass rate after preview changes: `6/8 (75%)`'); + expect(prBody).toContain('CssCheck: `not-run`'); expect(prBody).toContain('[.storybook/eval-results/data.json]('); expect(prBody).toContain('Full prompt'); expect(prBody.match(/
/g)).toHaveLength(1); diff --git a/scripts/eval/lib/publish-trial.ts b/scripts/eval/lib/publish-trial.ts index ea9104dc8b66..330e6189fb57 100644 --- a/scripts/eval/lib/publish-trial.ts +++ b/scripts/eval/lib/publish-trial.ts @@ -260,6 +260,7 @@ function renderPrBody(opts: { branch: string; data: EvalData }) { `- Ghost stories after: \`${postAgentGhostStories}\``, `- Vitest pass rate before preview changes: \`${baselinePreviewStories}\``, `- Vitest pass rate after preview changes: \`${postAgentStoryRender}\``, + `- CssCheck: \`${opts.data.grade.storyRender?.cssCheck ?? 'not-run'}\``, `- Duration: \`${formatDuration(opts.data.execution.duration)}\``, `- Cost: \`${formatCost(opts.data.execution.cost)}\``, `- Raw data: [${getEvalResultsRelativePath('data.json', opts.data.project.projectDir)}](${dataUrl})`, diff --git a/scripts/eval/lib/run-trial.test.ts b/scripts/eval/lib/run-trial.test.ts index 6022b45b5e62..957492069b2d 100644 --- a/scripts/eval/lib/run-trial.test.ts +++ b/scripts/eval/lib/run-trial.test.ts @@ -45,7 +45,15 @@ vi.mock('./agents/claude-code', () => ({ vi.mock('./agents/codex', () => ({ codexAgent: { name: 'codex', execute: vi.fn() }, })); +vi.mock('tinyexec', () => ({ + x: vi.fn().mockResolvedValue({ + exitCode: 0, + stdout: '# Storybook Setup\n\nFull project-aware instructions...', + stderr: '', + }), +})); +import { x } from 'tinyexec'; import { claudeAgent } from './agents/claude-code.ts'; import { collectGhostStoriesGrade, grade } from './grade.ts'; import { prepareTrial } from './prepare-trial.ts'; @@ -158,13 +166,28 @@ describe('runTrial pipeline', () => { const params = vi.mocked(claudeAgent.execute).mock.calls[0][0]; expect(params).toMatchObject({ - prompt: expect.stringContaining('set up Storybook'), + prompt: expect.stringContaining('npx storybook ai setup'), projectPath: TMP, variant: { agent: 'claude', model: 'sonnet-4.6', effort: 'high' }, resultsDir: join(TMP, '.storybook', 'eval-results'), + env: { EVAL_SETUP_PROMPT: 'setup' }, }); expect(params.logger).toBeDefined(); + expect(vi.mocked(x)).toHaveBeenCalledWith( + 'npx', + ['storybook', 'ai', 'setup'], + expect.objectContaining({ + nodeOptions: expect.objectContaining({ + cwd: TMP, + env: expect.objectContaining({ + EVAL_SETUP_PROMPT: 'setup', + STORYBOOK_DISABLE_TELEMETRY: '1', + }), + }), + }) + ); + const gradeWorkspace = vi.mocked(grade).mock.calls[0][0]; expect(gradeWorkspace).toMatchObject({ baselineCommit: 'deadbeef', @@ -194,7 +217,7 @@ describe('runTrial pipeline', () => { }); }); - it('writes data.json and prompt.md to results dir', async () => { + it('writes data.json, prompt.md, and setup-prompt.md to results dir', async () => { setupMocks(); await runTrial(baseConfig); @@ -217,6 +240,7 @@ describe('runTrial pipeline', () => { }, prompt: { name: 'setup', + content: expect.stringContaining('Full project-aware instructions'), }, artifacts: { buildOutput: { path: '.storybook/eval-results/build-output.txt', success: true }, @@ -227,7 +251,7 @@ describe('runTrial pipeline', () => { }, docs: { transcript: { - prompt: expect.stringContaining('set up Storybook'), + prompt: expect.stringContaining('Full project-aware instructions'), }, }, }); @@ -235,7 +259,11 @@ describe('runTrial pipeline', () => { expect(data).not.toHaveProperty('artifacts.screenshotOutput'); const promptContent = readFileSync(join(resultsDir, 'prompt.md'), 'utf-8'); - expect(promptContent).toContain('set up Storybook'); + expect(promptContent).toContain('npx storybook ai setup'); + + const setupPromptContent = readFileSync(join(resultsDir, 'setup-prompt.md'), 'utf-8'); + expect(setupPromptContent).toContain('Full project-aware instructions'); + expect(() => readFileSync(join(resultsDir, 'summary.json'), 'utf-8')).toThrow(); expect(() => readFileSync(join(resultsDir, 'transcript.json'), 'utf-8')).toThrow(); }); @@ -408,6 +436,7 @@ function setupMocks(overrides?: { total: 6, passed: 2, storyFiles: 3, + cssCheck: 'not-run' as const, }, buildSuccess, typeCheckErrors, @@ -425,6 +454,7 @@ function setupMocks(overrides?: { total: 6, passed: 4, storyFiles: 3, + cssCheck: 'pass' as const, }, } : {}), diff --git a/scripts/eval/lib/run-trial.ts b/scripts/eval/lib/run-trial.ts index 55e2040bcdaf..03152c03a1c2 100644 --- a/scripts/eval/lib/run-trial.ts +++ b/scripts/eval/lib/run-trial.ts @@ -1,5 +1,6 @@ import { writeFile } from 'node:fs/promises'; import { join } from 'pathe'; +import { x } from 'tinyexec'; import type { Logger } from './utils.ts'; import type { AgentId, AgentDriver, AgentVariant } from './agents/config.ts'; import type { Project } from './projects.ts'; @@ -22,7 +23,7 @@ export interface TrialConfig { project: Project; /** Agent, model, and effort level. */ variant: AgentVariant; - /** Prompt name — maps to `prompts/{name}.md` (e.g. "setup"). */ + /** Prompt variant name — registered in `code/lib/cli-storybook/src/ai/prompts/` (e.g. "pattern-copy-play"). */ prompt: string; /** Log agent messages to stdout. */ verbose?: boolean; @@ -64,11 +65,24 @@ export async function runTrial(config: TrialConfig, logger?: Logger): Promise { + try { + const result = await x('npx', ['storybook', 'ai', 'setup'], { + throwOnError: false, + timeout: 60_000, + nodeOptions: { + cwd: projectPath, + env: { + ...process.env, + EVAL_SETUP_PROMPT: promptName, + STORYBOOK_DISABLE_TELEMETRY: '1', + }, + }, + }); + + if (result.exitCode !== 0) { + log.logError( + `Failed to capture ai setup markdown (exit ${result.exitCode}). Falling back to nudge-only record.` + ); + log.logError(result.stderr.trim() || result.stdout.trim()); + return ''; + } + + return result.stdout.trim(); + } catch (error) { + log.logError( + `Failed to capture ai setup markdown (${error instanceof Error ? error.message : String(error)}). Falling back to nudge-only record.` + ); + return ''; + } +} diff --git a/scripts/eval/lib/story-render.ts b/scripts/eval/lib/story-render.ts index a7c4e86b7c90..7a2ec831f806 100644 --- a/scripts/eval/lib/story-render.ts +++ b/scripts/eval/lib/story-render.ts @@ -13,6 +13,7 @@ export interface StoryRenderGrade { total: number; passed: number; storyFiles: number; + cssCheck: 'pass' | 'fail' | 'not-run'; } export interface StoryRenderRunResult { @@ -82,6 +83,7 @@ export async function runStoryRenderPass(opts: { total: 0, passed: 0, storyFiles: 0, + cssCheck: 'not-run' as const, }, }; } @@ -209,6 +211,7 @@ async function readStoryRenderSummary(reportPath: string, storyFiles: number) { total: parsed.total, passed: parsed.passed, storyFiles, + cssCheck: parsed.cssCheck, } satisfies StoryRenderGrade; } diff --git a/scripts/eval/lib/utils.test.ts b/scripts/eval/lib/utils.test.ts index db1d8e6ece53..bb197efcfd98 100644 --- a/scripts/eval/lib/utils.test.ts +++ b/scripts/eval/lib/utils.test.ts @@ -98,40 +98,33 @@ describe('formatReadableUtcTimestamp', () => { }); describe('listPrompts', () => { - it('lists available prompt names', () => { + it('mirrors the CLI prompt registry', () => { const prompts = listPrompts(); expect(prompts).toContain('pattern-copy-play'); - expect(prompts).not.toContain('pattern-copy'); expect(prompts).toContain('setup'); + expect(prompts).not.toContain('pattern-copy'); }); - it('returns only names without .md extension', () => { - for (const name of listPrompts()) { - expect(name).not.toContain('.md'); - } + it('includes the default/example prompt', () => { + expect(listPrompts()).toContain(EXAMPLE_PROMPT_BASENAME); }); }); describe('loadPrompt', () => { - it('loads setup prompt by name', () => { - const prompt = loadPrompt('setup'); - expect(prompt).toContain('Storybook'); - expect(prompt).toContain('### Step 1'); - }); - - it('loads the play-driven pattern-copy prompt by name', () => { + it('returns the nudge string the agent receives (not the resolved instructions)', () => { const prompt = loadPrompt(EXAMPLE_PROMPT_BASENAME); - expect(prompt).toContain('play function'); - expect(prompt).toContain('The purpose of the `play` function is to prove'); + expect(prompt).toContain('npx storybook ai setup'); + expect(prompt).not.toContain('### Step 1'); }); - it('throws for unknown prompt', () => { + it('rejects unknown prompt names', () => { expect(() => loadPrompt('nonexistent-prompt-xyz')).toThrow('Prompt not found'); }); - it('returns trimmed content', () => { - const prompt = loadPrompt(EXAMPLE_PROMPT_BASENAME); - expect(prompt).toBe(prompt.trim()); + it('accepts every registered prompt name', () => { + for (const name of listPrompts()) { + expect(() => loadPrompt(name)).not.toThrow(); + } }); }); diff --git a/scripts/eval/lib/utils.ts b/scripts/eval/lib/utils.ts index a67739842cbc..ab8c16a79d6e 100644 --- a/scripts/eval/lib/utils.ts +++ b/scripts/eval/lib/utils.ts @@ -1,8 +1,13 @@ -import { readFileSync, existsSync, readdirSync } from 'node:fs'; -import { basename, join, resolve, sep } from 'node:path'; +import { join, resolve, sep } from 'node:path'; import pc from 'picocolors'; import { x } from 'tinyexec'; +import { AI_SETUP_PROMPT } from '../../../code/core/src/shared/constants/ai-prompts.ts'; +import { + DEFAULT_PROMPT_NAME, + PROMPT_NAMES, +} from '../../../code/lib/cli-storybook/src/ai/setup-prompts/index.ts'; + export interface Logger { log: (msg: string) => void; logStep: (msg: string) => void; @@ -14,9 +19,8 @@ export const REPO_ROOT = resolve(import.meta.dirname, '..', '..', '..'); export const EVAL_ROOT = resolve(REPO_ROOT, '..', 'storybook-eval'); export const REPOS_DIR = resolve(EVAL_ROOT, 'repos'); export const TRIALS_DIR = resolve(EVAL_ROOT, 'trials'); -export const PROMPTS_DIR = resolve(import.meta.dirname, '..', 'prompts'); -/** Basename (no `.md`) used in docs and tests when a concrete prompt must be named. */ -export const EXAMPLE_PROMPT_BASENAME = 'pattern-copy-play'; +/** Name used in docs and tests when a concrete prompt must be named. Tracks the CLI default. */ +export const EXAMPLE_PROMPT_BASENAME = DEFAULT_PROMPT_NAME; export const NODE_EVAL_TRIAL_SCRIPT = 'scripts/eval/eval.ts' as const; export const NODE_EVAL_RUN_BATCH_SCRIPT = 'scripts/eval/run-batch.ts' as const; export const NODE_EVAL_SYNC_BASELINES_SCRIPT = 'scripts/eval/sync-baselines.ts' as const; @@ -142,22 +146,24 @@ export function formatTable(headers: string[], rows: string[][]): string { ].join('\n'); } -/** Load a prompt by name from prompts/{name}.md. */ +/** + * Returns the exact nudge string a real user copies from the Storybook UI — + * "Run `npx storybook ai setup` and follow its instructions precisely." The + * AGENT then runs `ai setup` itself as a tool call, mirroring the real user + * flow. The harness selects a prompt variant via the `EVAL_SETUP_PROMPT` env + * var on the agent's spawn (not here); this function only validates the name. + */ export function loadPrompt(name: string): string { const available = listPrompts(); if (!available.includes(name)) { throw new Error(`Prompt not found: ${name}\nAvailable: ${available.join(', ')}`); } - const file = resolve(PROMPTS_DIR, `${name}.md`); - return readFileSync(file, 'utf-8').trim(); + return AI_SETUP_PROMPT; } -/** List available prompt names. */ +/** List available prompt names. Mirrors the builder registry in the CLI. */ export function listPrompts(): string[] { - if (!existsSync(PROMPTS_DIR)) return []; - return readdirSync(PROMPTS_DIR) - .filter((f) => f.endsWith('.md')) - .map((f) => basename(f, '.md')); + return [...PROMPT_NAMES]; } export interface EvalEnvironment { diff --git a/scripts/eval/prompts/pattern-copy-play.md b/scripts/eval/prompts/pattern-copy-play.md deleted file mode 100644 index a8ce9b20bd94..000000000000 --- a/scripts/eval/prompts/pattern-copy-play.md +++ /dev/null @@ -1,529 +0,0 @@ -# Pattern-Copy Storybook Setup With Play Functions - -Your goal is to make Storybook fully functional in this project by analyzing the codebase, -configuring the preview with the right decorators, and writing stories for some components. - -The end state should be a Storybook where any component — from a small button to a full page — can be added without story-specific workarounds. All necessary providers, CSS, browser state, and network mocks should live in the shared preview so that new stories only need the component import and a render call. - -After each created story, run Vitest to verify it renders. -If the test fails, read the error, fix the issue, and re-run until it passes before moving on. - -- copy real patterns from the codebase -- keep the app code unchanged -- put the default setup in `.storybook/preview.tsx` -- keep app mocking and runtime setup in `.storybook/preview.tsx`, not in the stories - -### Step 1: Analyze the codebase - -Read enough of the app to understand the full runtime environment before writing any stories. - -Do not stop at `main.tsx` or `App.tsx`. -Follow imports into providers, pages, hooks, and shared components until you know: -- which providers exist -- which CSS files are injected -- which queries fetch data -- which browser-state reads happen -- which portals and portal roots exist -- which pages and components show the real usage patterns - -Example of what to copy: - -```tsx -// src/main.tsx -import './index.css'; -import App from './App'; -import { SessionProvider } from './contexts/SessionContext'; - -createRoot(document.getElementById('root')!).render( - - - -); -``` - -That means Storybook should copy: -- the `index.css` import -- the `SessionProvider` -- the same provider order - -Example of tracing the app deeper: - -```tsx -// src/App.tsx -function App() { - const { products, loadMoreProducts } = useProducts(); - const { currentUser, signOut } = useSession(); - // ... -} -``` - -```ts -// src/hooks/useProducts.ts -const response = await fetch(apiBaseUrl + '/products?page=1'); -``` - -```ts -// src/hooks/useTheme.ts -const savedTheme = localStorage.getItem('theme'); -``` - -That means the default Storybook setup should discover and prepare: -- provider state -- MSW handlers for queries -- browser-state values that are actually read during render - -## 2. Build one default app environment in preview - -Set up Storybook once so most stories work without story-specific setup. - -Start with the smallest faithful environment: -- the real provider tree -- the real root CSS -- seeded browser state if the app reads it during render -- MSW for network/data queries - -It is fine to seed browser state such as `localStorage`, `sessionStorage`, and cookies when the app reads them during render. -Seed only the specific app-owned keys and values you need. -Do not clear all `localStorage`, `sessionStorage`, or cookies, and do not reset Storybook's own state. -Do not mock or redefine the browser runtime itself. -The stories run in Vitest browser mode, so the real browser environment should already exist. - -Example: - -```tsx -// .storybook/preview.tsx -import type { Preview } from '@storybook/react-vite'; -import MockDate from 'mockdate'; -import '../src/index.css'; -import { SessionProvider } from '../src/contexts/SessionContext'; - -const preview: Preview = { - decorators: [ - (Story) => ( - - - - ), - ], - async beforeEach() { - localStorage.setItem('theme', 'dark'); - localStorage.setItem('sidebar:open', 'true'); - MockDate.set('2024-04-01T12:00:00Z'); - }, -}; - -export default preview; -``` - -Use this same idea for: -- providers -- root CSS -- browser state -- dates, and if the app logic depends on them during render then always use `mockdate` - -Example with the `mockdate` package: - -```tsx -import type { Preview } from '@storybook/react-vite'; -import MockDate from 'mockdate'; - -const preview: Preview = { - async beforeEach() { - MockDate.set('2024-04-01T12:00:00Z'); - }, -}; - -export default preview; -``` - -## 3. Support portals with preview-body.html - -If the app uses portals, copy that setup into Storybook too. - -Look for patterns like: -- `createPortal(...)` -- modal, dialog, drawer, popover, tooltip, toast, or dropdown portal components -- hard-coded roots such as `#portal-root`, `#modal-root`, `#drawer-root`, or `#toast-root` - -Example of what to copy: - -```tsx -// real component -return createPortal( - , - document.getElementById('portal-root')! -); -``` - -That means Storybook should create the same portal root in `.storybook/preview-body.html`: - -```html - -
-``` - -If the app uses multiple portal roots, create all of them there: - -```html - - -
-
-``` - -If a library portals directly to `document.body`, do not add extra roots for it. -Make sure the copied page shell, CSS, and layout still allow overlays, fixed positioning, and z-index stacking to render correctly. - -## 4. Mock side effects globally - -All network/data queries should be handled by the default Storybook environment. - -- Always use `msw-storybook-addon` for query mocking. -- If you introduce MSW, run `npx msw init ./public --save` to create the worker file. -- Make sure Storybook serves `./public` as a static dir so `mockServiceWorker.js` is available. -- Do not mock `fetch` directly. -- Network/data queries should return deterministic mock data. -- If you need to change dependencies, first check the lockfile and use that package manager for the change. - -Example of copying a real fetch pattern into shared handlers: - -```ts -// real app hook -const response = await fetch( - apiBaseUrl + - '/products?' + - new URLSearchParams({ - page: '1', - sort: 'featured', - }) -); -``` - -```ts -// .storybook/msw-handlers.ts -import { http, HttpResponse } from 'msw'; - -export const mswHandlers = { - products: [ - http.get('https://api.example.com/products', () => - HttpResponse.json({ - items: [ - { - id: 'product-1', - name: 'Example product', - description: 'Mock product description', - imageUrl: 'https://images.example.com/product.jpg', - price: 42, - }, - ], - }) - ), - ], -}; -``` - -```tsx -// .storybook/preview.tsx -import type { Preview } from '@storybook/react-vite'; -import { initialize, mswLoader } from 'msw-storybook-addon'; -import { mswHandlers } from './msw-handlers'; - -initialize({ - onUnhandledRequest: 'bypass', -}); - -const preview: Preview = { - loaders: [mswLoader], - parameters: { - msw: { - handlers: mswHandlers, - }, - }, -}; - -export default preview; -``` - -```ts -// .storybook/main.ts -import type { StorybookConfig } from '@storybook/react-vite'; - -const config: StorybookConfig = { - staticDirs: ['../public'], -}; - -export default config; -``` - -Keep these mocks global. -Do not put fetch mocks in individual stories. -Only add handlers for requests that the shared preview setup or the stories actually use. -Do not add catch-all handlers that can hide unrelated failures. -If the defaults are not enough, improve the shared default setup instead. -Seed browser state when needed, but do not mock `window`, `document`, `navigator`, observers, or similar runtime APIs. -The only exception is `mockdate` when date-based rendering exists. - -## 5. Write stories - -Try to find around 10 good candidate components for story files. -Write colocated stories for top-level components, from low-level reusable components up to page components. -Write up to 10 story files, or fewer only if the codebase clearly has fewer meaningful targets. - -The stories should use JSX copied from real usage patterns in: -- pages -- app shells -- routes -- tests -- existing feature code - -As a rule of thumb, each story file should have around 3 story exports when the component or page has enough meaningful states. -It can have more when the real usage supports it, up to 10 story exports in one file. - -Always show all imports explicitly in story and preview files. -Do not rely on omitted or implied imports in examples or generated code. - -For simple components where props drive the state, prefer `args` stories — no `render` function needed: - -```tsx -import type { Meta, StoryObj } from '@storybook/react-vite'; -import { expect } from 'storybook/test'; -import { Button } from './Button'; - -const meta = { - component: Button, -} satisfies Meta; - -export default meta; -type Story = StoryObj; - -export const Primary: Story = { - args: { - variant: 'primary', - children: 'Save', - }, - play: async ({ canvas }) => { - await expect(canvas.getByRole('button', { name: /save/i })).toBeVisible(); - }, -}; - -export const Disabled: Story = { - args: { - variant: 'primary', - disabled: true, - children: 'Save', - }, - play: async ({ canvas }) => { - await expect(canvas.getByRole('button')).toBeDisabled(); - }, -}; -``` - -Use `render` when the story needs composition — wrapping the component in layout, combining multiple components, or passing children as JSX: - -```tsx -import type { Meta, StoryObj } from '@storybook/react-vite'; -import { expect } from 'storybook/test'; -import { Button } from './Button'; -import { Card } from './Card'; - -const meta = { - component: Button, -} satisfies Meta; - -export default meta; -type Story = StoryObj; - -export const InsideCard: Story = { - render: () => ( - - - - ), - play: async ({ canvas, userEvent }) => { - await expect(canvas.getByRole('button', { name: /save/i })).toBeVisible(); - await userEvent.click(canvas.getByRole('button', { name: /save/i })); - }, -}; -``` - -Example of copying real page JSX: - -```tsx -// real app -return ( -
- - {products.map((product) => ( - - ))} -
-); -``` - -```tsx -import type { Meta, StoryObj } from '@storybook/react-vite'; -import { expect } from 'storybook/test'; -import { FiltersPanel } from './FiltersPanel'; -import { ProductCard } from './ProductCard'; -import { mockProduct } from './mockProduct'; - -const meta = { - component: ProductCard, -} satisfies Meta; - -export default meta; -type Story = StoryObj; - -// story -export const Default: Story = { - render: () => ( -
- - -
- ), - play: async ({ canvas }) => { - await expect(canvas.getByText(/example product/i)).toBeVisible(); - }, -}; -``` - -Keep app mocking and runtime setup in preview, not in the stories. -Do not build large story-specific harnesses. -Do not write story files for subcomponents, hooks, contexts, or helpers. -Do not create new application components. -Do not add a custom `title`. -Do not stop after only a few easy targets if the codebase has more meaningful components or pages available. - -## 6. Write a play function for every story - -Every named story export must have a `play` function. -The `play` function is not optional, even for simple stories. - -The purpose of the `play` function is to prove that the story actually works in the copied Storybook environment: -- the story renders something real and non-empty -- the decorators provide the needed context -- the CSS is applied well enough for the intended state to be visible -- the MSW mocks or seeded browser state are actually being used -- important interactions, async loading states, and portals behave correctly - -Use `play` functions to verify behavior, not just to click around. -A story without assertions is incomplete. - -Use tools from `storybook/test` such as: -- `expect` -- `waitFor` - -Prefer `canvas` and `userEvent` from the `play` context. -Do not destructure `canvasElement` just to create `const canvas = within(canvasElement)`. -Do not import `userEvent` from `storybook/test`; use `userEvent` from the `play` context instead. -Only use `canvasElement.ownerDocument` when you need to query outside the canvas, such as for portals. -Example: - -```tsx -import type { StoryObj } from '@storybook/react-vite'; - -export const FilledForm: Story = { - play: async ({ canvas, userEvent }) => { - const emailInput = canvas.getByLabelText('email', { - selector: 'input', - }); - - await userEvent.type(emailInput, 'example-email@email.com', { - delay: 100, - }); - - const passwordInput = canvas.getByLabelText('password', { - selector: 'input', - }); - - await userEvent.type(passwordInput, 'ExamplePassword', { - delay: 100, - }); - - const submitButton = canvas.getByRole('button'); - await userEvent.click(submitButton); - }, -}; -``` - -The assertions should match the real pattern you copied: -- for provider-backed stories, assert the provider-dependent UI appears correctly -- for mocked-data stories, wait for the mocked data to appear and assert on it -- for CSS-sensitive states, assert on visibility, text layout, class-driven states, or meaningful computed styles -- for routing or navigation stories, assert the routed state or navigation outcome -- for portal stories, query from `canvasElement.ownerDocument` when the UI renders outside the canvas - -Examples of useful checks: -- a themed button has the expected label and is visibly enabled or disabled -- a modal opened through a decorator or provider is visible in the portal root -- mocked API data appears in the page instead of a loading spinner forever -- a selected tab actually shows the selected panel -- a toast, alert, or badge has the expected accessible text and visual state -- a CSS class or computed style confirms the real state that matters - -## 7. Cover the patterns you found - -Write stories for the real patterns in the codebase, for example: -- a low-level reusable component in real JSX usage -- a provider-backed component -- a browser-state-backed component -- a fetched-data component -- a real page component - -Use `App.tsx` to inspect the real provider tree and usage patterns, but do not make a story for `App` when the codebase has actual page components. - -Example page story: - -```tsx -import type { Meta, StoryObj } from '@storybook/react-vite'; -import { expect } from 'storybook/test'; -import { ProductPage } from './ProductPage'; - -const meta = { - component: ProductPage, -} satisfies Meta; - -export default meta; -type Story = StoryObj; - -export const Default: Story = { - render: () => , - play: async ({ canvas }) => { - await expect(canvas.getByRole('heading', { name: /products/i })).toBeVisible(); - }, -}; -``` - -## 8. Verify both rendering and types - -As you work, verify the stories with Vitest: - -```bash -npx vitest --project storybook -``` - -Also verify types so you catch missing required props, broken imports, and preview typing issues. Run the same TypeScript command the project itself uses. - -```bash - -``` - -After verification passes, review every changed file and remove anything that is not needed for the final solution, especially debug fixes, overly broad mocks, unnecessary dependencies, and eval artifacts. - -Once all stories pass and types are clean, run a full Storybook build as a final check: - -```bash -npx storybook build -``` - -If the build fails, fix the issue before finishing. Common build failures include missing dependencies, broken imports that only surface during static analysis, or configuration issues in `.storybook/main.ts`. - -Keep iterating until: -- every story you wrote passes -- every story you wrote has a meaningful passing `play` function -- the changed stories and preview setup pass the project's real TypeScript check -- `storybook build` succeeds -- the rendered output looks sensible -- the default global mocked environment is strong enough that stories do not need manual fetch overrides -- stories no longer fail because the shared preview setup and story JSX are fixed diff --git a/scripts/eval/prompts/setup.md b/scripts/eval/prompts/setup.md deleted file mode 100644 index 6ee4d37596c4..000000000000 --- a/scripts/eval/prompts/setup.md +++ /dev/null @@ -1,204 +0,0 @@ -Attention: The following instructions must be followed in order to successfully set up Storybook in this project. Do not skip steps or attempt to do them out of order. - -Your goal is to make Storybook fully functional in this project by analyzing the codebase, -configuring the preview with the right decorators, and writing example stories for 9 components. - -Work through these steps in order. After each story file, run Vitest to verify it renders. -If the test fails, read the error, fix the issue, and re-run until it passes before moving on. - -### Step 1: Analyze the codebase - -Before writing any stories, understand what the components need to render: - -- Scan the project for context providers, theme systems, routers, stores, and i18n setups. - Look at the app's entry point (e.g. `App.tsx`, `main.tsx`, `layout.tsx`) to see what - providers wrap the component tree. -- Identify global CSS or style imports required for components to look correct. -- Note any path aliases configured in tsconfig or bundler config. -- Read `.storybook/main.ts` (or `main.js`) to find the `stories` glob patterns. - Your story files must match those patterns to be picked up by Storybook. - -### Step 2: Configure `.storybook/preview.ts` with decorators - -Add decorators that wrap every story with the providers your components need. -Without this, most non-trivial components will crash. - -If the project uses CSF Factory (look for `definePreview` in `.storybook/preview.ts`): -```ts -// .storybook/preview.ts -import '../src/index.css'; // import global styles - -import { definePreview } from 'storybook/preview'; - -export const config = definePreview({ - decorators: [ - (Story) => ( - - - - - - ), - ], -}); -``` - -Otherwise: -```ts -// .storybook/preview.ts -import '../src/index.css'; // import global styles - -const preview = { - decorators: [ - (Story) => ( - - - - - - ), - ], -}; -export default preview; -``` - -Common decorators to add: -- **Theme providers** (e.g. ThemeProvider, MUI ThemeProvider, styled-components, Tailwind) -- **Router** (e.g. MemoryRouter, BrowserRouter mock) -- **State stores** (e.g. Redux Provider, Zustand, Jotai) -- **i18n** (e.g. IntlProvider, I18nextProvider) -- **Global CSS** — import global stylesheets at the top of preview.ts - -### Step 3: Write stories for 9 components - -Pick 9 real components from the codebase, 3 of each complexity level. -Use the title prefix `AI Generated//` so they are grouped -together in the Storybook sidebar. - -**Simple (3 components)** — Presentational with few props, no internal state. -Examples: Button, Badge, Avatar, Icon, Label, Chip. -Title format: `AI Generated/Simple/` - -**Medium (3 components)** — Multiple visual variants or composed from simpler components. -Examples: Card, Alert, Input, Select, Tooltip, Tabs. -Title format: `AI Generated/Medium/` - -**Complex (3 components)** — Internal state, side effects, or deep composition. -Examples: Modal, DataTable, Form, Dropdown, Accordion, Sidebar. -Title format: `AI Generated/Complex/` - -For each component, create a `.stories.ts` file next to the component. -Each file must have at least 2 story exports covering the component's main states. -Make sure the file location and naming matches the `stories` patterns in `.storybook/main.ts`. - -If the project uses CSF Factory (look for `definePreview` / `config.meta` patterns): - -Story format (CSF Factory — this project uses CSF factories): -```ts -import { config } from '#.storybook/preview'; -import { Button } from './Button'; - -const meta = config.meta({ - title: 'AI Generated/Simple/Button', - component: Button, -}); - -export const Default = meta.story({ - args: { - label: 'Click me', - }, -}); - -export const Disabled = meta.story({ - args: { - label: 'Disabled', - disabled: true, - }, -}); -``` - -Otherwise: - -Story format (CSF): -```ts -import type { Meta, StoryObj } from '@storybook/react'; -import { Button } from './Button'; - -const meta = { - title: 'AI Generated/Simple/Button', - component: Button, -} satisfies Meta; - -export default meta; -type Story = StoryObj; - -export const Default: Story = { - args: { - label: 'Click me', - }, -}; - -export const Disabled: Story = { - args: { - label: 'Disabled', - disabled: true, - }, -}; -``` - -Rules: -- Every named export is a story. Use `args` to set props. -- Provide all required props via `args` — check the component's types. -- If a component needs per-story decorators (beyond the global ones), add them in the meta. -- Do NOT use `any` types. Use the component's prop types for type safety. - -Reference: https://storybook.js.org/docs/latest/writing-stories - -### Step 4: Verify each story with Vitest - -After writing each story file, immediately verify it: - -```bash -npx vitest --project storybook -``` - -**Self-healing loop — repeat for every story file:** -1. Write/update the story file -2. Run `npx vitest --project storybook ` -3. If it fails: read the error output carefully - - Missing provider → add a decorator in `.storybook/preview.ts` or in the story meta - - Missing prop → add the required prop to `args` - - Import error → fix the import path - - CSS/asset error → add static dirs or import the stylesheet -4. Fix the issue and go back to step 2 -5. Once the test passes, move to the next component - -After all 9 story files pass individually, run the full suite: -```bash -npx vitest --project storybook -``` - -Once all stories pass, run a full Storybook build as a final check: -```bash -npx storybook build -``` - -If the build fails, fix the issue before finishing. - -### Checklist - -- [ ] Analyzed codebase for providers, global styles, and path aliases -- [ ] Read story patterns from `.storybook/main.ts` -- [ ] Configured `.storybook/preview.ts` with necessary decorators -- [ ] Simple component 1: story written and passing -- [ ] Simple component 2: story written and passing -- [ ] Simple component 3: story written and passing -- [ ] Medium component 1: story written and passing -- [ ] Medium component 2: story written and passing -- [ ] Medium component 3: story written and passing -- [ ] Complex component 1: story written and passing -- [ ] Complex component 2: story written and passing -- [ ] Complex component 3: story written and passing -- [ ] Full Vitest suite passes: `npx vitest --project storybook` -- [ ] `npx storybook build` succeeds -- [ ] Run `npx storybook doctor` to check for common issues (version mismatches, duplicated deps, etc.) diff --git a/scripts/eval/run-batch.ts b/scripts/eval/run-batch.ts index 9ca0a64078d8..98006a73030a 100644 --- a/scripts/eval/run-batch.ts +++ b/scripts/eval/run-batch.ts @@ -86,7 +86,7 @@ export interface RunBatchOptions { repoRoot?: string; evalRoot?: string; batchTimestamp?: string; - /** Required when `descriptors` are not provided — prompt template basename (prompts/{name}.md). */ + /** Required when `descriptors` are not provided — prompt variant name from the CLI registry. */ prompt?: string; /** Skip interactive confirmation (large API / token usage). */ yes?: boolean; @@ -476,7 +476,8 @@ const runBatchOptions = { concurrency: { type: 'string' as const, description: 'Max concurrent runs (default: 8)' }, prompt: { type: 'string' as const, - description: 'Prompt template name (required; file: scripts/eval/prompts/{name}.md)', + description: + 'Prompt variant name (required; registered in code/lib/cli-storybook/src/ai/prompts/)', }, agents: { type: 'string' as const,