-
Notifications
You must be signed in to change notification settings - Fork 2.6k
feat(web-evals): add task log viewing, export failed logs, and new run options #9637
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Merged
Merged
Changes from all commits
Commits
Show all changes
6 commits
Select commit
Hold shift + click to select a range
6c7ec4d
feat(web-evals): add task log viewing, export failed logs, and new ru…
hannesrudolph 0374d39
fix(web-evals): add missing API routes for task log viewing and faile…
hannesrudolph 861ad71
fix: address PR review findings
hannesrudolph ed51c0e
feat(web-evals): add iterations support and database migration
hannesrudolph 382d25f
Update apps/web-evals/src/app/runs/[id]/run.tsx
hannesrudolph 13016ee
fix(web-evals): fix JSX syntax error and use safe React elements for …
hannesrudolph File filter
Filter by extension
Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
There are no files selected for viewing
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
74 changes: 74 additions & 0 deletions
74
apps/web-evals/src/app/api/runs/[id]/logs/[taskId]/route.ts
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,74 @@ | ||
| import { NextResponse } from "next/server" | ||
| import type { NextRequest } from "next/server" | ||
| import * as fs from "node:fs/promises" | ||
| import * as path from "node:path" | ||
|
|
||
| import { findTask, findRun } from "@roo-code/evals" | ||
|
|
||
| export const dynamic = "force-dynamic" | ||
|
|
||
| const LOG_BASE_PATH = "/tmp/evals/runs" | ||
|
|
||
| // Sanitize path components to prevent path traversal attacks | ||
| function sanitizePathComponent(component: string): string { | ||
| // Remove any path separators, null bytes, and other dangerous characters | ||
| return component.replace(/[/\\:\0*?"<>|]/g, "_") | ||
| } | ||
|
|
||
| export async function GET(request: NextRequest, { params }: { params: Promise<{ id: string; taskId: string }> }) { | ||
| const { id, taskId } = await params | ||
|
|
||
| try { | ||
| const runId = Number(id) | ||
| const taskIdNum = Number(taskId) | ||
|
|
||
| if (isNaN(runId) || isNaN(taskIdNum)) { | ||
| return NextResponse.json({ error: "Invalid run ID or task ID" }, { status: 400 }) | ||
| } | ||
|
|
||
| // Verify the run exists | ||
| await findRun(runId) | ||
|
|
||
| // Get the task to find its language and exercise | ||
| const task = await findTask(taskIdNum) | ||
|
|
||
| // Verify the task belongs to this run | ||
| if (task.runId !== runId) { | ||
| return NextResponse.json({ error: "Task does not belong to this run" }, { status: 404 }) | ||
| } | ||
|
|
||
| // Sanitize language and exercise to prevent path traversal | ||
| const safeLanguage = sanitizePathComponent(task.language) | ||
| const safeExercise = sanitizePathComponent(task.exercise) | ||
|
|
||
| // Construct the log file path | ||
| const logFileName = `${safeLanguage}-${safeExercise}.log` | ||
| const logFilePath = path.join(LOG_BASE_PATH, String(runId), logFileName) | ||
|
|
||
| // Verify the resolved path is within the expected directory (defense in depth) | ||
| const resolvedPath = path.resolve(logFilePath) | ||
| const expectedBase = path.resolve(LOG_BASE_PATH) | ||
| if (!resolvedPath.startsWith(expectedBase)) { | ||
| return NextResponse.json({ error: "Invalid log path" }, { status: 400 }) | ||
| } | ||
|
|
||
| // Check if the log file exists and read it (async) | ||
| try { | ||
| const logContent = await fs.readFile(logFilePath, "utf-8") | ||
| return NextResponse.json({ logContent }) | ||
| } catch (err) { | ||
| if ((err as NodeJS.ErrnoException).code === "ENOENT") { | ||
| return NextResponse.json({ error: "Log file not found", logContent: null }, { status: 200 }) | ||
| } | ||
| throw err | ||
| } | ||
| } catch (error) { | ||
| console.error("Error reading task log:", error) | ||
|
|
||
| if (error instanceof Error && error.name === "RecordNotFoundError") { | ||
| return NextResponse.json({ error: "Task or run not found" }, { status: 404 }) | ||
| } | ||
|
|
||
| return NextResponse.json({ error: "Failed to read log file" }, { status: 500 }) | ||
| } | ||
| } |
129 changes: 129 additions & 0 deletions
129
apps/web-evals/src/app/api/runs/[id]/logs/failed/route.ts
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,129 @@ | ||
| import { NextResponse } from "next/server" | ||
| import type { NextRequest } from "next/server" | ||
| import * as fs from "node:fs" | ||
| import * as path from "node:path" | ||
| import archiver from "archiver" | ||
|
|
||
| import { findRun, getTasks } from "@roo-code/evals" | ||
|
|
||
| export const dynamic = "force-dynamic" | ||
|
|
||
| const LOG_BASE_PATH = "/tmp/evals/runs" | ||
|
|
||
| // Sanitize path components to prevent path traversal attacks | ||
| function sanitizePathComponent(component: string): string { | ||
| // Remove any path separators, null bytes, and other dangerous characters | ||
| return component.replace(/[/\\:\0*?"<>|]/g, "_") | ||
| } | ||
|
|
||
| export async function GET(request: NextRequest, { params }: { params: Promise<{ id: string }> }) { | ||
| const { id } = await params | ||
|
|
||
| try { | ||
| const runId = Number(id) | ||
|
|
||
| if (isNaN(runId)) { | ||
| return NextResponse.json({ error: "Invalid run ID" }, { status: 400 }) | ||
| } | ||
|
|
||
| // Verify the run exists | ||
| await findRun(runId) | ||
|
|
||
| // Get all tasks for this run | ||
| const tasks = await getTasks(runId) | ||
|
|
||
| // Filter for failed tasks only | ||
| const failedTasks = tasks.filter((task) => task.passed === false) | ||
|
|
||
| if (failedTasks.length === 0) { | ||
| return NextResponse.json({ error: "No failed tasks to export" }, { status: 400 }) | ||
| } | ||
|
|
||
| // Create a zip archive | ||
| const archive = archiver("zip", { zlib: { level: 9 } }) | ||
|
|
||
| // Collect chunks to build the response | ||
| const chunks: Buffer[] = [] | ||
|
|
||
| archive.on("data", (chunk: Buffer) => { | ||
hannesrudolph marked this conversation as resolved.
Show resolved
Hide resolved
|
||
| chunks.push(chunk) | ||
| }) | ||
|
|
||
| // Track archive errors | ||
| let archiveError: Error | null = null | ||
| archive.on("error", (err: Error) => { | ||
| archiveError = err | ||
| }) | ||
|
|
||
| // Set up the end promise before finalizing (proper event listener ordering) | ||
| const archiveEndPromise = new Promise<void>((resolve, reject) => { | ||
| archive.on("end", resolve) | ||
| archive.on("error", reject) | ||
| }) | ||
|
|
||
| // Add each failed task's log file to the archive | ||
| const logDir = path.join(LOG_BASE_PATH, String(runId)) | ||
| let filesAdded = 0 | ||
|
|
||
| for (const task of failedTasks) { | ||
| // Sanitize language and exercise to prevent path traversal | ||
| const safeLanguage = sanitizePathComponent(task.language) | ||
| const safeExercise = sanitizePathComponent(task.exercise) | ||
| const logFileName = `${safeLanguage}-${safeExercise}.log` | ||
| const logFilePath = path.join(logDir, logFileName) | ||
|
|
||
| // Verify the resolved path is within the expected directory (defense in depth) | ||
| const resolvedPath = path.resolve(logFilePath) | ||
| const expectedBase = path.resolve(LOG_BASE_PATH) | ||
| if (!resolvedPath.startsWith(expectedBase)) { | ||
| continue // Skip files with suspicious paths | ||
| } | ||
|
|
||
| if (fs.existsSync(logFilePath)) { | ||
| archive.file(logFilePath, { name: logFileName }) | ||
| filesAdded++ | ||
| } | ||
| } | ||
|
|
||
| // Check if any files were actually added | ||
| if (filesAdded === 0) { | ||
| archive.abort() | ||
| return NextResponse.json( | ||
| { error: "No log files found - they may have been cleared from disk" }, | ||
| { status: 404 }, | ||
| ) | ||
| } | ||
|
|
||
| // Finalize the archive | ||
| await archive.finalize() | ||
|
|
||
| // Wait for all data to be collected | ||
| await archiveEndPromise | ||
|
|
||
| // Check for archive errors | ||
| if (archiveError) { | ||
| throw archiveError | ||
| } | ||
|
|
||
| // Combine all chunks into a single buffer | ||
| const zipBuffer = Buffer.concat(chunks) | ||
|
|
||
| // Return the zip file | ||
| return new NextResponse(zipBuffer, { | ||
| status: 200, | ||
| headers: { | ||
| "Content-Type": "application/zip", | ||
| "Content-Disposition": `attachment; filename="run-${runId}-failed-logs.zip"`, | ||
| "Content-Length": String(zipBuffer.length), | ||
| }, | ||
| }) | ||
| } catch (error) { | ||
| console.error("Error exporting failed logs:", error) | ||
|
|
||
| if (error instanceof Error && error.name === "RecordNotFoundError") { | ||
| return NextResponse.json({ error: "Run not found" }, { status: 404 }) | ||
| } | ||
|
|
||
| return NextResponse.json({ error: "Failed to export logs" }, { status: 500 }) | ||
| } | ||
| } | ||
Oops, something went wrong.
Oops, something went wrong.
Add this suggestion to a batch that can be applied as a single commit.
This suggestion is invalid because no changes were made to the code.
Suggestions cannot be applied while the pull request is closed.
Suggestions cannot be applied while viewing a subset of changes.
Only one suggestion per line can be applied in a batch.
Add this suggestion to a batch that can be applied as a single commit.
Applying suggestions on deleted lines is not supported.
You must change the existing code in this line in order to create a valid suggestion.
Outdated suggestions cannot be applied.
This suggestion has been applied or marked resolved.
Suggestions cannot be applied from pending reviews.
Suggestions cannot be applied on multi-line comments.
Suggestions cannot be applied while the pull request is queued to merge.
Suggestion cannot be applied right now. Please check back later.
Uh oh!
There was an error while loading. Please reload this page.