Skip to content

Commit 3f0a697

Browse files
feat(web-evals): add task log viewing, export failed logs, and new run options (#9637)
Co-authored-by: roomote[bot] <219738659+roomote[bot]@users.noreply.github.com>
1 parent 5a6dd58 commit 3f0a697

File tree

15 files changed

+1383
-38
lines changed

15 files changed

+1383
-38
lines changed

apps/web-evals/package.json

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -29,6 +29,7 @@
2929
"@roo-code/evals": "workspace:^",
3030
"@roo-code/types": "workspace:^",
3131
"@tanstack/react-query": "^5.69.0",
32+
"archiver": "^7.0.1",
3233
"class-variance-authority": "^0.7.1",
3334
"clsx": "^2.1.1",
3435
"cmdk": "^1.1.0",
@@ -52,6 +53,7 @@
5253
"@roo-code/config-eslint": "workspace:^",
5354
"@roo-code/config-typescript": "workspace:^",
5455
"@tailwindcss/postcss": "^4",
56+
"@types/archiver": "^7.0.0",
5557
"@types/ps-tree": "^1.1.6",
5658
"@types/react": "^18.3.23",
5759
"@types/react-dom": "^18.3.5",

apps/web-evals/src/actions/runs.ts

Lines changed: 25 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -21,7 +21,7 @@ import { CreateRun } from "@/lib/schemas"
2121

2222
const EVALS_REPO_PATH = path.resolve(path.dirname(fileURLToPath(import.meta.url)), "../../../../../evals")
2323

24-
export async function createRun({ suite, exercises = [], timeout, ...values }: CreateRun) {
24+
export async function createRun({ suite, exercises = [], timeout, iterations = 1, ...values }: CreateRun) {
2525
const run = await _createRun({
2626
...values,
2727
timeout,
@@ -36,15 +36,34 @@ export async function createRun({ suite, exercises = [], timeout, ...values }: C
3636
throw new Error("Invalid exercise path: " + path)
3737
}
3838

39-
await createTask({ ...values, runId: run.id, language: language as ExerciseLanguage, exercise })
39+
// Create multiple tasks for each iteration
40+
for (let iteration = 1; iteration <= iterations; iteration++) {
41+
await createTask({
42+
...values,
43+
runId: run.id,
44+
language: language as ExerciseLanguage,
45+
exercise,
46+
iteration,
47+
})
48+
}
4049
}
4150
} else {
4251
for (const language of exerciseLanguages) {
43-
const exercises = await getExercisesForLanguage(EVALS_REPO_PATH, language)
52+
const languageExercises = await getExercisesForLanguage(EVALS_REPO_PATH, language)
53+
54+
// Create tasks for all iterations of each exercise
55+
const tasksToCreate: Array<{ language: ExerciseLanguage; exercise: string; iteration: number }> = []
56+
for (const exercise of languageExercises) {
57+
for (let iteration = 1; iteration <= iterations; iteration++) {
58+
tasksToCreate.push({ language, exercise, iteration })
59+
}
60+
}
4461

45-
await pMap(exercises, (exercise) => createTask({ runId: run.id, language, exercise }), {
46-
concurrency: 10,
47-
})
62+
await pMap(
63+
tasksToCreate,
64+
({ language, exercise, iteration }) => createTask({ runId: run.id, language, exercise, iteration }),
65+
{ concurrency: 10 },
66+
)
4867
}
4968
}
5069

Lines changed: 74 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,74 @@
1+
import { NextResponse } from "next/server"
2+
import type { NextRequest } from "next/server"
3+
import * as fs from "node:fs/promises"
4+
import * as path from "node:path"
5+
6+
import { findTask, findRun } from "@roo-code/evals"
7+
8+
export const dynamic = "force-dynamic"
9+
10+
const LOG_BASE_PATH = "/tmp/evals/runs"
11+
12+
// Sanitize path components to prevent path traversal attacks
13+
function sanitizePathComponent(component: string): string {
14+
// Remove any path separators, null bytes, and other dangerous characters
15+
return component.replace(/[/\\:\0*?"<>|]/g, "_")
16+
}
17+
18+
export async function GET(request: NextRequest, { params }: { params: Promise<{ id: string; taskId: string }> }) {
19+
const { id, taskId } = await params
20+
21+
try {
22+
const runId = Number(id)
23+
const taskIdNum = Number(taskId)
24+
25+
if (isNaN(runId) || isNaN(taskIdNum)) {
26+
return NextResponse.json({ error: "Invalid run ID or task ID" }, { status: 400 })
27+
}
28+
29+
// Verify the run exists
30+
await findRun(runId)
31+
32+
// Get the task to find its language and exercise
33+
const task = await findTask(taskIdNum)
34+
35+
// Verify the task belongs to this run
36+
if (task.runId !== runId) {
37+
return NextResponse.json({ error: "Task does not belong to this run" }, { status: 404 })
38+
}
39+
40+
// Sanitize language and exercise to prevent path traversal
41+
const safeLanguage = sanitizePathComponent(task.language)
42+
const safeExercise = sanitizePathComponent(task.exercise)
43+
44+
// Construct the log file path
45+
const logFileName = `${safeLanguage}-${safeExercise}.log`
46+
const logFilePath = path.join(LOG_BASE_PATH, String(runId), logFileName)
47+
48+
// Verify the resolved path is within the expected directory (defense in depth)
49+
const resolvedPath = path.resolve(logFilePath)
50+
const expectedBase = path.resolve(LOG_BASE_PATH)
51+
if (!resolvedPath.startsWith(expectedBase)) {
52+
return NextResponse.json({ error: "Invalid log path" }, { status: 400 })
53+
}
54+
55+
// Check if the log file exists and read it (async)
56+
try {
57+
const logContent = await fs.readFile(logFilePath, "utf-8")
58+
return NextResponse.json({ logContent })
59+
} catch (err) {
60+
if ((err as NodeJS.ErrnoException).code === "ENOENT") {
61+
return NextResponse.json({ error: "Log file not found", logContent: null }, { status: 200 })
62+
}
63+
throw err
64+
}
65+
} catch (error) {
66+
console.error("Error reading task log:", error)
67+
68+
if (error instanceof Error && error.name === "RecordNotFoundError") {
69+
return NextResponse.json({ error: "Task or run not found" }, { status: 404 })
70+
}
71+
72+
return NextResponse.json({ error: "Failed to read log file" }, { status: 500 })
73+
}
74+
}
Lines changed: 129 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,129 @@
1+
import { NextResponse } from "next/server"
2+
import type { NextRequest } from "next/server"
3+
import * as fs from "node:fs"
4+
import * as path from "node:path"
5+
import archiver from "archiver"
6+
7+
import { findRun, getTasks } from "@roo-code/evals"
8+
9+
export const dynamic = "force-dynamic"
10+
11+
const LOG_BASE_PATH = "/tmp/evals/runs"
12+
13+
// Sanitize path components to prevent path traversal attacks
14+
function sanitizePathComponent(component: string): string {
15+
// Remove any path separators, null bytes, and other dangerous characters
16+
return component.replace(/[/\\:\0*?"<>|]/g, "_")
17+
}
18+
19+
export async function GET(request: NextRequest, { params }: { params: Promise<{ id: string }> }) {
20+
const { id } = await params
21+
22+
try {
23+
const runId = Number(id)
24+
25+
if (isNaN(runId)) {
26+
return NextResponse.json({ error: "Invalid run ID" }, { status: 400 })
27+
}
28+
29+
// Verify the run exists
30+
await findRun(runId)
31+
32+
// Get all tasks for this run
33+
const tasks = await getTasks(runId)
34+
35+
// Filter for failed tasks only
36+
const failedTasks = tasks.filter((task) => task.passed === false)
37+
38+
if (failedTasks.length === 0) {
39+
return NextResponse.json({ error: "No failed tasks to export" }, { status: 400 })
40+
}
41+
42+
// Create a zip archive
43+
const archive = archiver("zip", { zlib: { level: 9 } })
44+
45+
// Collect chunks to build the response
46+
const chunks: Buffer[] = []
47+
48+
archive.on("data", (chunk: Buffer) => {
49+
chunks.push(chunk)
50+
})
51+
52+
// Track archive errors
53+
let archiveError: Error | null = null
54+
archive.on("error", (err: Error) => {
55+
archiveError = err
56+
})
57+
58+
// Set up the end promise before finalizing (proper event listener ordering)
59+
const archiveEndPromise = new Promise<void>((resolve, reject) => {
60+
archive.on("end", resolve)
61+
archive.on("error", reject)
62+
})
63+
64+
// Add each failed task's log file to the archive
65+
const logDir = path.join(LOG_BASE_PATH, String(runId))
66+
let filesAdded = 0
67+
68+
for (const task of failedTasks) {
69+
// Sanitize language and exercise to prevent path traversal
70+
const safeLanguage = sanitizePathComponent(task.language)
71+
const safeExercise = sanitizePathComponent(task.exercise)
72+
const logFileName = `${safeLanguage}-${safeExercise}.log`
73+
const logFilePath = path.join(logDir, logFileName)
74+
75+
// Verify the resolved path is within the expected directory (defense in depth)
76+
const resolvedPath = path.resolve(logFilePath)
77+
const expectedBase = path.resolve(LOG_BASE_PATH)
78+
if (!resolvedPath.startsWith(expectedBase)) {
79+
continue // Skip files with suspicious paths
80+
}
81+
82+
if (fs.existsSync(logFilePath)) {
83+
archive.file(logFilePath, { name: logFileName })
84+
filesAdded++
85+
}
86+
}
87+
88+
// Check if any files were actually added
89+
if (filesAdded === 0) {
90+
archive.abort()
91+
return NextResponse.json(
92+
{ error: "No log files found - they may have been cleared from disk" },
93+
{ status: 404 },
94+
)
95+
}
96+
97+
// Finalize the archive
98+
await archive.finalize()
99+
100+
// Wait for all data to be collected
101+
await archiveEndPromise
102+
103+
// Check for archive errors
104+
if (archiveError) {
105+
throw archiveError
106+
}
107+
108+
// Combine all chunks into a single buffer
109+
const zipBuffer = Buffer.concat(chunks)
110+
111+
// Return the zip file
112+
return new NextResponse(zipBuffer, {
113+
status: 200,
114+
headers: {
115+
"Content-Type": "application/zip",
116+
"Content-Disposition": `attachment; filename="run-${runId}-failed-logs.zip"`,
117+
"Content-Length": String(zipBuffer.length),
118+
},
119+
})
120+
} catch (error) {
121+
console.error("Error exporting failed logs:", error)
122+
123+
if (error instanceof Error && error.name === "RecordNotFoundError") {
124+
return NextResponse.json({ error: "Run not found" }, { status: 404 })
125+
}
126+
127+
return NextResponse.json({ error: "Failed to export logs" }, { status: 500 })
128+
}
129+
}

0 commit comments

Comments
 (0)