Skip to content

Commit 8433eaf

Browse files
feat(evals-ui): Add filtering, bulk delete, tool consolidation, and run notes (#9837)
1 parent ae655c5 commit 8433eaf

File tree

6 files changed

+970
-95
lines changed

6 files changed

+970
-95
lines changed

apps/web-evals/src/actions/runs.ts

Lines changed: 153 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -13,13 +13,19 @@ import {
1313
exerciseLanguages,
1414
createRun as _createRun,
1515
deleteRun as _deleteRun,
16+
updateRun as _updateRun,
17+
getIncompleteRuns as _getIncompleteRuns,
18+
deleteRunsByIds as _deleteRunsByIds,
1619
createTask,
1720
getExercisesForLanguage,
1821
} from "@roo-code/evals"
1922

2023
import { CreateRun } from "@/lib/schemas"
2124
import { redisClient } from "@/lib/server/redis"
2225

26+
// Storage base path for eval logs
27+
const EVALS_STORAGE_PATH = "/tmp/evals/runs"
28+
2329
const EVALS_REPO_PATH = path.resolve(path.dirname(fileURLToPath(import.meta.url)), "../../../../../evals")
2430

2531
export async function createRun({ suite, exercises = [], timeout, iterations = 1, ...values }: CreateRun) {
@@ -214,3 +220,150 @@ export async function killRun(runId: number): Promise<KillRunResult> {
214220
errors,
215221
}
216222
}
223+
224+
export type DeleteIncompleteRunsResult = {
225+
success: boolean
226+
deletedCount: number
227+
deletedRunIds: number[]
228+
storageErrors: string[]
229+
}
230+
231+
/**
232+
* Delete all incomplete runs (runs without a taskMetricsId/final score).
233+
* Removes both database records and storage folders.
234+
*/
235+
export async function deleteIncompleteRuns(): Promise<DeleteIncompleteRunsResult> {
236+
const storageErrors: string[] = []
237+
238+
// Get all incomplete runs
239+
const incompleteRuns = await _getIncompleteRuns()
240+
const runIds = incompleteRuns.map((run) => run.id)
241+
242+
if (runIds.length === 0) {
243+
return {
244+
success: true,
245+
deletedCount: 0,
246+
deletedRunIds: [],
247+
storageErrors: [],
248+
}
249+
}
250+
251+
// Delete storage folders for each run
252+
for (const runId of runIds) {
253+
const storagePath = path.join(EVALS_STORAGE_PATH, String(runId))
254+
try {
255+
if (fs.existsSync(storagePath)) {
256+
fs.rmSync(storagePath, { recursive: true, force: true })
257+
console.log(`Deleted storage folder: ${storagePath}`)
258+
}
259+
} catch (error) {
260+
console.error(`Failed to delete storage folder ${storagePath}:`, error)
261+
storageErrors.push(`Failed to delete storage for run ${runId}`)
262+
}
263+
264+
// Also try to clear Redis state for any potentially running incomplete runs
265+
try {
266+
const redis = await redisClient()
267+
await redis.del(`heartbeat:${runId}`)
268+
await redis.del(`runners:${runId}`)
269+
} catch (error) {
270+
// Non-critical error, just log it
271+
console.error(`Failed to clear Redis state for run ${runId}:`, error)
272+
}
273+
}
274+
275+
// Delete from database
276+
await _deleteRunsByIds(runIds)
277+
278+
revalidatePath("/runs")
279+
280+
return {
281+
success: true,
282+
deletedCount: runIds.length,
283+
deletedRunIds: runIds,
284+
storageErrors,
285+
}
286+
}
287+
288+
/**
289+
* Get count of incomplete runs (for UI display)
290+
*/
291+
export async function getIncompleteRunsCount(): Promise<number> {
292+
const incompleteRuns = await _getIncompleteRuns()
293+
return incompleteRuns.length
294+
}
295+
296+
/**
297+
* Delete all runs older than 30 days.
298+
* Removes both database records and storage folders.
299+
*/
300+
export async function deleteOldRuns(): Promise<DeleteIncompleteRunsResult> {
301+
const storageErrors: string[] = []
302+
303+
// Get all runs older than 30 days
304+
const thirtyDaysAgo = new Date(Date.now() - 30 * 24 * 60 * 60 * 1000)
305+
const { getRuns } = await import("@roo-code/evals")
306+
const allRuns = await getRuns()
307+
const oldRuns = allRuns.filter((run) => run.createdAt < thirtyDaysAgo)
308+
const runIds = oldRuns.map((run) => run.id)
309+
310+
if (runIds.length === 0) {
311+
return {
312+
success: true,
313+
deletedCount: 0,
314+
deletedRunIds: [],
315+
storageErrors: [],
316+
}
317+
}
318+
319+
// Delete storage folders for each run
320+
for (const runId of runIds) {
321+
const storagePath = path.join(EVALS_STORAGE_PATH, String(runId))
322+
try {
323+
if (fs.existsSync(storagePath)) {
324+
fs.rmSync(storagePath, { recursive: true, force: true })
325+
console.log(`Deleted storage folder: ${storagePath}`)
326+
}
327+
} catch (error) {
328+
console.error(`Failed to delete storage folder ${storagePath}:`, error)
329+
storageErrors.push(`Failed to delete storage for run ${runId}`)
330+
}
331+
332+
// Also try to clear Redis state
333+
try {
334+
const redis = await redisClient()
335+
await redis.del(`heartbeat:${runId}`)
336+
await redis.del(`runners:${runId}`)
337+
} catch (error) {
338+
// Non-critical error, just log it
339+
console.error(`Failed to clear Redis state for run ${runId}:`, error)
340+
}
341+
}
342+
343+
// Delete from database
344+
await _deleteRunsByIds(runIds)
345+
346+
revalidatePath("/runs")
347+
348+
return {
349+
success: true,
350+
deletedCount: runIds.length,
351+
deletedRunIds: runIds,
352+
storageErrors,
353+
}
354+
}
355+
356+
/**
357+
* Update the description of a run.
358+
*/
359+
export async function updateRunDescription(runId: number, description: string | null): Promise<{ success: boolean }> {
360+
try {
361+
await _updateRun(runId, { description })
362+
revalidatePath("/runs")
363+
revalidatePath(`/runs/${runId}`)
364+
return { success: true }
365+
} catch (error) {
366+
console.error("Failed to update run description:", error)
367+
return { success: false }
368+
}
369+
}

0 commit comments

Comments
 (0)