Skip to content
Merged
Show file tree
Hide file tree
Changes from 5 commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions apps/web-evals/package.json
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,7 @@
"@roo-code/evals": "workspace:^",
"@roo-code/types": "workspace:^",
"@tanstack/react-query": "^5.69.0",
"archiver": "^7.0.1",
"class-variance-authority": "^0.7.1",
"clsx": "^2.1.1",
"cmdk": "^1.1.0",
Expand All @@ -52,6 +53,7 @@
"@roo-code/config-eslint": "workspace:^",
"@roo-code/config-typescript": "workspace:^",
"@tailwindcss/postcss": "^4",
"@types/archiver": "^7.0.0",
"@types/ps-tree": "^1.1.6",
"@types/react": "^18.3.23",
"@types/react-dom": "^18.3.5",
Expand Down
31 changes: 25 additions & 6 deletions apps/web-evals/src/actions/runs.ts
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@ import { CreateRun } from "@/lib/schemas"

const EVALS_REPO_PATH = path.resolve(path.dirname(fileURLToPath(import.meta.url)), "../../../../../evals")

export async function createRun({ suite, exercises = [], timeout, ...values }: CreateRun) {
export async function createRun({ suite, exercises = [], timeout, iterations = 1, ...values }: CreateRun) {
const run = await _createRun({
...values,
timeout,
Expand All @@ -36,15 +36,34 @@ export async function createRun({ suite, exercises = [], timeout, ...values }: C
throw new Error("Invalid exercise path: " + path)
}

await createTask({ ...values, runId: run.id, language: language as ExerciseLanguage, exercise })
// Create multiple tasks for each iteration
for (let iteration = 1; iteration <= iterations; iteration++) {
await createTask({
...values,
runId: run.id,
language: language as ExerciseLanguage,
exercise,
iteration,
})
}
}
} else {
for (const language of exerciseLanguages) {
const exercises = await getExercisesForLanguage(EVALS_REPO_PATH, language)
const languageExercises = await getExercisesForLanguage(EVALS_REPO_PATH, language)

// Create tasks for all iterations of each exercise
const tasksToCreate: Array<{ language: ExerciseLanguage; exercise: string; iteration: number }> = []
for (const exercise of languageExercises) {
for (let iteration = 1; iteration <= iterations; iteration++) {
tasksToCreate.push({ language, exercise, iteration })
}
}

await pMap(exercises, (exercise) => createTask({ runId: run.id, language, exercise }), {
concurrency: 10,
})
await pMap(
tasksToCreate,
({ language, exercise, iteration }) => createTask({ runId: run.id, language, exercise, iteration }),
{ concurrency: 10 },
)
}
}

Expand Down
74 changes: 74 additions & 0 deletions apps/web-evals/src/app/api/runs/[id]/logs/[taskId]/route.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,74 @@
import { NextResponse } from "next/server"
import type { NextRequest } from "next/server"
import * as fs from "node:fs/promises"
import * as path from "node:path"

import { findTask, findRun } from "@roo-code/evals"

export const dynamic = "force-dynamic"

const LOG_BASE_PATH = "/tmp/evals/runs"

// Sanitize path components to prevent path traversal attacks
function sanitizePathComponent(component: string): string {
// Remove any path separators, null bytes, and other dangerous characters
return component.replace(/[/\\:\0*?"<>|]/g, "_")
}

export async function GET(request: NextRequest, { params }: { params: Promise<{ id: string; taskId: string }> }) {
const { id, taskId } = await params

try {
const runId = Number(id)
const taskIdNum = Number(taskId)

if (isNaN(runId) || isNaN(taskIdNum)) {
return NextResponse.json({ error: "Invalid run ID or task ID" }, { status: 400 })
}

// Verify the run exists
await findRun(runId)

// Get the task to find its language and exercise
const task = await findTask(taskIdNum)

// Verify the task belongs to this run
if (task.runId !== runId) {
return NextResponse.json({ error: "Task does not belong to this run" }, { status: 404 })
}

// Sanitize language and exercise to prevent path traversal
const safeLanguage = sanitizePathComponent(task.language)
const safeExercise = sanitizePathComponent(task.exercise)

// Construct the log file path
const logFileName = `${safeLanguage}-${safeExercise}.log`
const logFilePath = path.join(LOG_BASE_PATH, String(runId), logFileName)

// Verify the resolved path is within the expected directory (defense in depth)
const resolvedPath = path.resolve(logFilePath)
const expectedBase = path.resolve(LOG_BASE_PATH)
if (!resolvedPath.startsWith(expectedBase)) {
return NextResponse.json({ error: "Invalid log path" }, { status: 400 })
}

// Check if the log file exists and read it (async)
try {
const logContent = await fs.readFile(logFilePath, "utf-8")
return NextResponse.json({ logContent })
} catch (err) {
if ((err as NodeJS.ErrnoException).code === "ENOENT") {
return NextResponse.json({ error: "Log file not found", logContent: null }, { status: 200 })
}
throw err
}
} catch (error) {
console.error("Error reading task log:", error)

if (error instanceof Error && error.name === "RecordNotFoundError") {
return NextResponse.json({ error: "Task or run not found" }, { status: 404 })
}

return NextResponse.json({ error: "Failed to read log file" }, { status: 500 })
}
}
129 changes: 129 additions & 0 deletions apps/web-evals/src/app/api/runs/[id]/logs/failed/route.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,129 @@
import { NextResponse } from "next/server"
import type { NextRequest } from "next/server"
import * as fs from "node:fs"
import * as path from "node:path"
import archiver from "archiver"

import { findRun, getTasks } from "@roo-code/evals"

export const dynamic = "force-dynamic"

const LOG_BASE_PATH = "/tmp/evals/runs"

// Sanitize path components to prevent path traversal attacks
function sanitizePathComponent(component: string): string {
// Remove any path separators, null bytes, and other dangerous characters
return component.replace(/[/\\:\0*?"<>|]/g, "_")
}

export async function GET(request: NextRequest, { params }: { params: Promise<{ id: string }> }) {
const { id } = await params

try {
const runId = Number(id)

if (isNaN(runId)) {
return NextResponse.json({ error: "Invalid run ID" }, { status: 400 })
}

// Verify the run exists
await findRun(runId)

// Get all tasks for this run
const tasks = await getTasks(runId)

// Filter for failed tasks only
const failedTasks = tasks.filter((task) => task.passed === false)

if (failedTasks.length === 0) {
return NextResponse.json({ error: "No failed tasks to export" }, { status: 400 })
}

// Create a zip archive
const archive = archiver("zip", { zlib: { level: 9 } })

// Collect chunks to build the response
const chunks: Buffer[] = []

archive.on("data", (chunk: Buffer) => {
chunks.push(chunk)
})

// Track archive errors
let archiveError: Error | null = null
archive.on("error", (err: Error) => {
archiveError = err
})

// Set up the end promise before finalizing (proper event listener ordering)
const archiveEndPromise = new Promise<void>((resolve, reject) => {
archive.on("end", resolve)
archive.on("error", reject)
})

// Add each failed task's log file to the archive
const logDir = path.join(LOG_BASE_PATH, String(runId))
let filesAdded = 0

for (const task of failedTasks) {
// Sanitize language and exercise to prevent path traversal
const safeLanguage = sanitizePathComponent(task.language)
const safeExercise = sanitizePathComponent(task.exercise)
const logFileName = `${safeLanguage}-${safeExercise}.log`
const logFilePath = path.join(logDir, logFileName)

// Verify the resolved path is within the expected directory (defense in depth)
const resolvedPath = path.resolve(logFilePath)
const expectedBase = path.resolve(LOG_BASE_PATH)
if (!resolvedPath.startsWith(expectedBase)) {
continue // Skip files with suspicious paths
}

if (fs.existsSync(logFilePath)) {
archive.file(logFilePath, { name: logFileName })
filesAdded++
}
}

// Check if any files were actually added
if (filesAdded === 0) {
archive.abort()
return NextResponse.json(
{ error: "No log files found - they may have been cleared from disk" },
{ status: 404 },
)
}

// Finalize the archive
await archive.finalize()

// Wait for all data to be collected
await archiveEndPromise

// Check for archive errors
if (archiveError) {
throw archiveError
}

// Combine all chunks into a single buffer
const zipBuffer = Buffer.concat(chunks)

// Return the zip file
return new NextResponse(zipBuffer, {
status: 200,
headers: {
"Content-Type": "application/zip",
"Content-Disposition": `attachment; filename="run-${runId}-failed-logs.zip"`,
"Content-Length": String(zipBuffer.length),
},
})
} catch (error) {
console.error("Error exporting failed logs:", error)

if (error instanceof Error && error.name === "RecordNotFoundError") {
return NextResponse.json({ error: "Run not found" }, { status: 404 })
}

return NextResponse.json({ error: "Failed to export logs" }, { status: 500 })
}
}
Loading
Loading