Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 1 addition & 2 deletions eval/lib/agents/claude-code-cli.ts
Original file line number Diff line number Diff line change
Expand Up @@ -285,8 +285,7 @@ function getTodoProgress(
messages: ClaudeCodeStreamMessage[],
): TodoProgress | null {
// Find the most recent TodoWrite message
for (let i = messages.length - 1; i >= 0; i--) {
const message = messages[i];
for (const message of messages.toReversed()) {
if (message.type === 'assistant') {
const todoWrite = message.message.content.find(
(c): c is ToolUseContent =>
Expand Down
2 changes: 1 addition & 1 deletion eval/lib/collect-args.ts
Original file line number Diff line number Diff line change
Expand Up @@ -362,7 +362,7 @@ export async function collectArgs() {
const config: McpServerConfig = {
[mcpServerName]: {
type: 'stdio',
command,
command: command!,
args: argsParts.length > 0 ? argsParts : undefined,
},
};
Expand Down
2 changes: 1 addition & 1 deletion eval/lib/evaluations/build.ts
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@ export async function build({
projectPath,
resultsPath,
}: ExperimentArgs): Promise<boolean> {
const result = await x('pnpm', ['build'], {
const result = await x('pnpm', ['eval:build'], {
nodeOptions: {
cwd: projectPath,
},
Expand Down
30 changes: 22 additions & 8 deletions eval/lib/evaluations/prepare-evaluations.ts
Original file line number Diff line number Diff line change
Expand Up @@ -9,14 +9,14 @@ export async function prepareEvaluations({
}: ExperimentArgs) {
await addDevDependency(
[
'vitest@catalog:',
'@vitest/browser-playwright@catalog:',
'storybook@catalog:',
'@storybook/addon-docs@catalog:',
'@storybook/addon-a11y@catalog:',
'@storybook/addon-vitest@catalog:',
'@storybook/react-vite@catalog:',
'eslint-plugin-storybook@catalog:',
'vitest@catalog:experiments',
'@vitest/browser-playwright@catalog:experiments',
'storybook@catalog:experiments',
'@storybook/addon-docs@catalog:experiments',
'@storybook/addon-a11y@catalog:experiments',
'@storybook/addon-vitest@catalog:experiments',
'@storybook/react-vite@catalog:experiments',
'eslint-plugin-storybook@catalog:experiments',
],
{ cwd: projectPath, silent: true },
);
Expand All @@ -28,6 +28,20 @@ export async function prepareEvaluations({
filter: (source) =>
!source.includes('node_modules') && !source.includes('dist'),
});

const { default: pkgJson } = await import(
path.join(projectPath, 'package.json'),
{
with: { type: 'json' },
}
);
// add the storybook script after agent execution, so it does not taint the experiment
pkgJson.scripts.storybook = 'storybook dev --port 6006';
await fs.writeFile(
path.join(projectPath, 'package.json'),
JSON.stringify(pkgJson, null, 2),
);

await fs
.cp(
path.join(evalPath, 'expected', 'stories'),
Expand Down
33 changes: 25 additions & 8 deletions eval/lib/evaluations/test-stories.ts
Original file line number Diff line number Diff line change
@@ -1,24 +1,41 @@
import { startVitest } from 'vitest/node';
import * as path from 'node:path';
import * as fs from 'node:fs/promises';
import type { EvaluationSummary, ExperimentArgs } from '../../types';
import type { JsonTestResults } from 'vitest/reporters';
import { x } from 'tinyexec';
import { dedent } from 'ts-dedent';

export async function testStories({
projectPath,
resultsPath,
}: ExperimentArgs): Promise<Pick<EvaluationSummary, 'test' | 'a11y'>> {
const testResultsPath = path.join(resultsPath, 'tests.json');

const vitest = await startVitest('test', undefined, {
root: projectPath,
watch: false,
silent: true,
reporters: ['json'],
outputFile: testResultsPath,
const result = await x('pnpm', ['eval:test'], {
nodeOptions: {
cwd: projectPath,
},
});

await vitest.close();
await fs.writeFile(
path.join(resultsPath, 'tests.md'),
dedent`# Test Results

**Exit Code:** ${result.exitCode}

## stdout

\`\`\`sh
${result.stdout}
\`\`\`

## stderr

\`\`\`
${result.stderr}
\`\`\`
`,
);

const { default: jsonTestResults } = (await import(testResultsPath, {
with: { type: 'json' },
Expand Down
4 changes: 2 additions & 2 deletions eval/lib/evaluations/typecheck.ts
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,7 @@
// Read tsconfig.json
const configFile = ts.readConfigFile(
path.join(projectPath, 'tsconfig.app.json'),
ts.sys.readFile,

Check warning on line 33 in eval/lib/evaluations/typecheck.ts

View workflow job for this annotation

GitHub Actions / Lint

typescript-eslint(unbound-method)

void`, or consider using an arrow function instead.
);
const parsedConfig = ts.parseJsonConfigFileContent(
configFile.config,
Expand Down Expand Up @@ -109,8 +109,8 @@
}
console.log({
typeErrors: await checkTypes({
projectPath: path.join(experimentPath[0], 'project'),
resultsPath: path.join(experimentPath[0], 'results'),
projectPath: path.join(experimentPath[0]!, 'project'),
resultsPath: path.join(experimentPath[0]!, 'results'),
} as ExperimentArgs),
});
}
2 changes: 1 addition & 1 deletion eval/lib/save/chromatic.ts
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@ export async function buildStorybook(
let buildError = '';

try {
await runScript('build-storybook', {
await runScript('eval:build-storybook', {
cwd: projectPath,
silent: true,
});
Expand Down
5 changes: 3 additions & 2 deletions eval/lib/save/google-sheet.ts
Original file line number Diff line number Diff line change
Expand Up @@ -44,8 +44,9 @@ function getContextDetails(context: Context): string {
const mcpConfig = Object.values(context.mcpServerConfig)[0];
if (mcpConfig?.type === 'stdio' && mcpConfig.args) {
const manifestIndex = mcpConfig.args.indexOf('--manifestPath');
if (manifestIndex !== -1 && mcpConfig.args[manifestIndex + 1]) {
return path.basename(mcpConfig.args[manifestIndex + 1]);
const manifestIndexValue = mcpConfig.args[manifestIndex + 1];
if (manifestIndex !== -1 && manifestIndexValue) {
return path.basename(manifestIndexValue);
}
}
return 'unknown manifest name';
Expand Down
7 changes: 4 additions & 3 deletions eval/package.json
Original file line number Diff line number Diff line change
Expand Up @@ -5,15 +5,14 @@
"description": "The project for evaluating UI component development with and without Storybook MCP",
"type": "module",
"scripts": {
"build-storybook": "storybook build",
"eval": "node eval.ts",
"storybook": "storybook dev -p 6006"
"storybook": "storybook dev -p 6006",
"typecheck": "tsc"
},
"devDependencies": {
"@anthropic-ai/claude-agent-sdk": "^0.1.30",
"@clack/prompts": "1.0.0-alpha.6",
"@radix-ui/colors": "^3.0.0",
"chromatic": "^13.3.3",
"@radix-ui/react-popover": "^1.1.15",
"@radix-ui/react-toggle": "^1.1.10",
"@radix-ui/react-toggle-group": "^1.1.11",
Expand All @@ -24,9 +23,11 @@
"@tsconfig/node24": "^24.0.1",
"@types/envinfo": "^7.8.4",
"@types/eslint": "^9.6.1",
"@types/node": "^24.10.1",
"@types/react": "^18.3.26",
"@vitejs/plugin-react-swc": "^4.2.0",
"ai-tokenizer": "^1.0.3",
"chromatic": "^13.3.3",
"envinfo": "^7.20.0",
"eslint": "^9.36.0",
"globals": "^16.4.0",
Expand Down
2 changes: 1 addition & 1 deletion eval/templates/evaluation/eslint.config.js
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@ export default defineConfig([
extends: [
js.configs.recommended,
tseslint.configs.recommendedTypeChecked,
reactHooks.configs['recommended-latest'],
reactHooks.configs.flat['recommended-latest'],
reactRefresh.configs.vite,
],
languageOptions: {
Expand Down
41 changes: 20 additions & 21 deletions eval/templates/project/package.json
Original file line number Diff line number Diff line change
Expand Up @@ -4,31 +4,30 @@
"private": true,
"type": "module",
"scripts": {
"build": "vite build",
"build-storybook": "storybook build",
"dev": "vite",
"lint": "eslint .",
"eval:build": "vite build",
"eval:build-storybook": "storybook build",
"eval:dev": "vite",
"eval:lint": "eslint .",
"preview": "vite preview",
"test": "vitest",
"storybook": "storybook dev --port 6006",
"typecheck": "tsc --noEmit --project ./tsconfig.app.json"
"eval:test": "vitest run --reporter json --outputFile ../results/tests.json",
"eval:typecheck": "tsc --noEmit --project ./tsconfig.app.json"
},
"dependencies": {
"react": "^19.1.1",
"react-dom": "^19.1.1"
"react": "catalog:experiments",
"react-dom": "catalog:experiments"
},
"devDependencies": {
"@eslint/js": "^9.36.0",
"@types/node": "^24.6.0",
"@types/react": "^19.1.16",
"@types/react-dom": "^19.1.9",
"@vitejs/plugin-react-swc": "^4.1.0",
"eslint": "^9.36.0",
"eslint-plugin-react-hooks": "^5.2.0",
"eslint-plugin-react-refresh": "^0.4.22",
"globals": "^16.4.0",
"typescript": "~5.9.3",
"typescript-eslint": "^8.45.0",
"vite": "^7.1.12"
"@eslint/js": "catalog:experiments",
"@types/node": "catalog:experiments",
"@types/react": "catalog:experiments",
"@types/react-dom": "catalog:experiments",
"@vitejs/plugin-react-swc": "catalog:experiments",
"eslint": "catalog:experiments",
"eslint-plugin-react-hooks": "catalog:experiments",
"eslint-plugin-react-refresh": "catalog:experiments",
"globals": "catalog:experiments",
"typescript": "catalog:experiments",
"typescript-eslint": "catalog:experiments",
"vite": "catalog:experiments"
}
}
29 changes: 14 additions & 15 deletions eval/templates/result-docs/conversation.tsx
Original file line number Diff line number Diff line change
Expand Up @@ -172,8 +172,8 @@ const CodeBlock = ({
const codeRef = useRef<HTMLElement>(null);

useEffect(() => {
if (codeRef.current && (window as any).hljs) {
(window as any).hljs.highlightElement(codeRef.current);
if (codeRef.current && (globalThis as any).hljs) {
(globalThis as any).hljs.highlightElement(codeRef.current);
}
}, [content, isTruncated]);

Expand Down Expand Up @@ -410,6 +410,15 @@ const ElapsedTime = ({
</div>
);

const TYPE_COLORS = {
assistant: { bg: '#dbeafe', text: '#1e40af' },
user: { bg: '#f3e8ff', text: '#6b21a8' },
system: { bg: '#e0e7ff', text: '#3730a3' },
result: { bg: '#dcfce7', text: '#166534' },
tool: { bg: '#fef3c7', text: '#92400e' },
prompt: { bg: '#fce7f3', text: '#9f1239' },
} as const;

const Turn = ({
children,
type,
Expand All @@ -420,7 +429,7 @@ const Turn = ({
isMCP = false,
}: {
children: React.ReactNode;
type: string;
type: keyof typeof TYPE_COLORS;
title: string;
subtitle?: string;
tokenCount?: string;
Expand All @@ -429,16 +438,7 @@ const Turn = ({
}) => {
const [isExpanded, setIsExpanded] = useState(false);

const typeColors: Record<string, { bg: string; text: string }> = {
assistant: { bg: '#dbeafe', text: '#1e40af' },
user: { bg: '#f3e8ff', text: '#6b21a8' },
system: { bg: '#e0e7ff', text: '#3730a3' },
result: { bg: '#dcfce7', text: '#166534' },
tool: { bg: '#fef3c7', text: '#92400e' },
prompt: { bg: '#fce7f3', text: '#9f1239' },
};

const colors = typeColors[type] || typeColors.assistant;
const colors = TYPE_COLORS[type] ?? TYPE_COLORS.assistant;

return (
<div
Expand Down Expand Up @@ -771,10 +771,9 @@ function groupToolCallsWithResults(turns: ConversationMessage[]): Array<{
}> = [];
const usedResultIndices = new Set<number>();

for (let i = 0; i < turns.length; i++) {
for (const [i, turn] of turns.entries()) {
if (usedResultIndices.has(i)) continue;

const turn = turns[i];
const toolUseContent =
turn.type === 'assistant' &&
'message' in turn &&
Expand Down
15 changes: 13 additions & 2 deletions eval/tsconfig.json
Original file line number Diff line number Diff line change
@@ -1,6 +1,17 @@
{
"extends": "../tsconfig.json",
"compilerOptions": {
"jsx": "react-jsx"
}
"jsx": "react-jsx",
"lib": [
"es2024",
"ESNext.Array",
"ESNext.Collection",
"ESNext.Iterator",
"ESNext.Promise",
"DOM",
"DOM.AsyncIterable",
"DOM.Iterable"
]
},
"include": ["eval", "lib", "templates/result-docs", "google-apps-script.js"]
}
7 changes: 4 additions & 3 deletions package.json
Original file line number Diff line number Diff line change
Expand Up @@ -37,17 +37,18 @@
"@codecov/rollup-plugin": "^1.9.1",
"@modelcontextprotocol/inspector": "^0.17.2",
"@types/node": "20.19.0",
"@vitest/coverage-v8": "^4.0.8",
"@vitest/coverage-v8": "4.0.6",
"oxlint": "^1.25.0",
"oxlint-tsgolint": "^0.4.0",
"pkg-pr-new": "^0.0.57",
"prettier": "^3.6.2",
"publint": "^0.3.15",
"tsdown": "^0.16.4",
"turbo": "^2.6.1",
"vite-tsconfig-paths": "^5.1.4",
"typescript": "~5.9.3",
"vite": "catalog:",
"vitest": "^4.0.8"
"vite-tsconfig-paths": "^5.1.4",
"vitest": "4.0.6"
},
"packageManager": "pnpm@10.19.0+sha512.c9fc7236e92adf5c8af42fd5bf1612df99c2ceb62f27047032f4720b33f8eacdde311865e91c411f2774f618d82f320808ecb51718bfa82c060c4ba7c76a32b8"
}
5 changes: 1 addition & 4 deletions packages/addon-mcp/src/mcp-handler.ts
Original file line number Diff line number Diff line change
Expand Up @@ -107,10 +107,7 @@ export const mcpServerHandler = async ({
componentCount: Object.keys(manifest.components).length,
});
},
onGetComponentDocumentation: async ({
input,
foundComponent,
}) => {
onGetComponentDocumentation: async ({ input, foundComponent }) => {
await collectTelemetry({
event: 'tool:getComponentDocumentation',
server,
Expand Down
Loading
Loading