Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions apps/web-evals/package.json
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,7 @@
"@roo-code/evals": "workspace:^",
"@roo-code/types": "workspace:^",
"@tanstack/react-query": "^5.69.0",
"archiver": "^7.0.1",
"class-variance-authority": "^0.7.1",
"clsx": "^2.1.1",
"cmdk": "^1.1.0",
Expand All @@ -52,6 +53,7 @@
"@roo-code/config-eslint": "workspace:^",
"@roo-code/config-typescript": "workspace:^",
"@tailwindcss/postcss": "^4",
"@types/archiver": "^7.0.0",
"@types/ps-tree": "^1.1.6",
"@types/react": "^18.3.23",
"@types/react-dom": "^18.3.5",
Expand Down
31 changes: 25 additions & 6 deletions apps/web-evals/src/actions/runs.ts
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@ import { CreateRun } from "@/lib/schemas"

const EVALS_REPO_PATH = path.resolve(path.dirname(fileURLToPath(import.meta.url)), "../../../../../evals")

export async function createRun({ suite, exercises = [], timeout, ...values }: CreateRun) {
export async function createRun({ suite, exercises = [], timeout, iterations = 1, ...values }: CreateRun) {
const run = await _createRun({
...values,
timeout,
Expand All @@ -36,15 +36,34 @@ export async function createRun({ suite, exercises = [], timeout, ...values }: C
throw new Error("Invalid exercise path: " + path)
}

await createTask({ ...values, runId: run.id, language: language as ExerciseLanguage, exercise })
// Create multiple tasks for each iteration
for (let iteration = 1; iteration <= iterations; iteration++) {
await createTask({
...values,
runId: run.id,
language: language as ExerciseLanguage,
exercise,
iteration,
})
}
}
} else {
for (const language of exerciseLanguages) {
const exercises = await getExercisesForLanguage(EVALS_REPO_PATH, language)
const languageExercises = await getExercisesForLanguage(EVALS_REPO_PATH, language)

// Create tasks for all iterations of each exercise
const tasksToCreate: Array<{ language: ExerciseLanguage; exercise: string; iteration: number }> = []
for (const exercise of languageExercises) {
for (let iteration = 1; iteration <= iterations; iteration++) {
tasksToCreate.push({ language, exercise, iteration })
}
}

await pMap(exercises, (exercise) => createTask({ runId: run.id, language, exercise }), {
concurrency: 10,
})
await pMap(
tasksToCreate,
({ language, exercise, iteration }) => createTask({ runId: run.id, language, exercise, iteration }),
{ concurrency: 10 },
)
}
}

Expand Down
74 changes: 74 additions & 0 deletions apps/web-evals/src/app/api/runs/[id]/logs/[taskId]/route.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,74 @@
import { NextResponse } from "next/server"
import type { NextRequest } from "next/server"
import * as fs from "node:fs/promises"
import * as path from "node:path"

import { findTask, findRun } from "@roo-code/evals"

export const dynamic = "force-dynamic"

const LOG_BASE_PATH = "/tmp/evals/runs"

// Sanitize path components to prevent path traversal attacks
function sanitizePathComponent(component: string): string {
// Remove any path separators, null bytes, and other dangerous characters
return component.replace(/[/\\:\0*?"<>|]/g, "_")
}

export async function GET(request: NextRequest, { params }: { params: Promise<{ id: string; taskId: string }> }) {
const { id, taskId } = await params

try {
const runId = Number(id)
const taskIdNum = Number(taskId)

if (isNaN(runId) || isNaN(taskIdNum)) {
return NextResponse.json({ error: "Invalid run ID or task ID" }, { status: 400 })
}

// Verify the run exists
await findRun(runId)

// Get the task to find its language and exercise
const task = await findTask(taskIdNum)

// Verify the task belongs to this run
if (task.runId !== runId) {
return NextResponse.json({ error: "Task does not belong to this run" }, { status: 404 })
}

// Sanitize language and exercise to prevent path traversal
const safeLanguage = sanitizePathComponent(task.language)
const safeExercise = sanitizePathComponent(task.exercise)

// Construct the log file path
const logFileName = `${safeLanguage}-${safeExercise}.log`
const logFilePath = path.join(LOG_BASE_PATH, String(runId), logFileName)

// Verify the resolved path is within the expected directory (defense in depth)
const resolvedPath = path.resolve(logFilePath)
const expectedBase = path.resolve(LOG_BASE_PATH)
if (!resolvedPath.startsWith(expectedBase)) {
return NextResponse.json({ error: "Invalid log path" }, { status: 400 })
}

// Check if the log file exists and read it (async)
try {
const logContent = await fs.readFile(logFilePath, "utf-8")
return NextResponse.json({ logContent })
} catch (err) {
if ((err as NodeJS.ErrnoException).code === "ENOENT") {
return NextResponse.json({ error: "Log file not found", logContent: null }, { status: 200 })
}
throw err
}
} catch (error) {
console.error("Error reading task log:", error)

if (error instanceof Error && error.name === "RecordNotFoundError") {
return NextResponse.json({ error: "Task or run not found" }, { status: 404 })
}

return NextResponse.json({ error: "Failed to read log file" }, { status: 500 })
}
}
129 changes: 129 additions & 0 deletions apps/web-evals/src/app/api/runs/[id]/logs/failed/route.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,129 @@
import { NextResponse } from "next/server"
import type { NextRequest } from "next/server"
import * as fs from "node:fs"
import * as path from "node:path"
import archiver from "archiver"

import { findRun, getTasks } from "@roo-code/evals"

export const dynamic = "force-dynamic"

const LOG_BASE_PATH = "/tmp/evals/runs"

// Sanitize path components to prevent path traversal attacks
function sanitizePathComponent(component: string): string {
// Remove any path separators, null bytes, and other dangerous characters
return component.replace(/[/\\:\0*?"<>|]/g, "_")
}

export async function GET(request: NextRequest, { params }: { params: Promise<{ id: string }> }) {
const { id } = await params

try {
const runId = Number(id)

if (isNaN(runId)) {
return NextResponse.json({ error: "Invalid run ID" }, { status: 400 })
}

// Verify the run exists
await findRun(runId)

// Get all tasks for this run
const tasks = await getTasks(runId)

// Filter for failed tasks only
const failedTasks = tasks.filter((task) => task.passed === false)

if (failedTasks.length === 0) {
return NextResponse.json({ error: "No failed tasks to export" }, { status: 400 })
}

// Create a zip archive
const archive = archiver("zip", { zlib: { level: 9 } })

// Collect chunks to build the response
const chunks: Buffer[] = []

archive.on("data", (chunk: Buffer) => {
chunks.push(chunk)
})

// Track archive errors
let archiveError: Error | null = null
archive.on("error", (err: Error) => {
archiveError = err
})

// Set up the end promise before finalizing (proper event listener ordering)
const archiveEndPromise = new Promise<void>((resolve, reject) => {
archive.on("end", resolve)
archive.on("error", reject)
})

// Add each failed task's log file to the archive
const logDir = path.join(LOG_BASE_PATH, String(runId))
let filesAdded = 0

for (const task of failedTasks) {
// Sanitize language and exercise to prevent path traversal
const safeLanguage = sanitizePathComponent(task.language)
const safeExercise = sanitizePathComponent(task.exercise)
const logFileName = `${safeLanguage}-${safeExercise}.log`
const logFilePath = path.join(logDir, logFileName)

// Verify the resolved path is within the expected directory (defense in depth)
const resolvedPath = path.resolve(logFilePath)
const expectedBase = path.resolve(LOG_BASE_PATH)
if (!resolvedPath.startsWith(expectedBase)) {
continue // Skip files with suspicious paths
}

if (fs.existsSync(logFilePath)) {
archive.file(logFilePath, { name: logFileName })
filesAdded++
}
}

// Check if any files were actually added
if (filesAdded === 0) {
archive.abort()
return NextResponse.json(
{ error: "No log files found - they may have been cleared from disk" },
{ status: 404 },
)
}

// Finalize the archive
await archive.finalize()

// Wait for all data to be collected
await archiveEndPromise

// Check for archive errors
if (archiveError) {
throw archiveError
}

// Combine all chunks into a single buffer
const zipBuffer = Buffer.concat(chunks)

// Return the zip file
return new NextResponse(zipBuffer, {
status: 200,
headers: {
"Content-Type": "application/zip",
"Content-Disposition": `attachment; filename="run-${runId}-failed-logs.zip"`,
"Content-Length": String(zipBuffer.length),
},
})
} catch (error) {
console.error("Error exporting failed logs:", error)

if (error instanceof Error && error.name === "RecordNotFoundError") {
return NextResponse.json({ error: "Run not found" }, { status: 404 })
}

return NextResponse.json({ error: "Failed to export logs" }, { status: 500 })
}
}
Loading
Loading