diff --git a/apps/web-roo-code/src/app/evals/evals.tsx b/apps/web-roo-code/src/app/evals/evals.tsx index 26591719dbcc..294d702f8401 100644 --- a/apps/web-roo-code/src/app/evals/evals.tsx +++ b/apps/web-roo-code/src/app/evals/evals.tsx @@ -1,59 +1,33 @@ "use client" import { useMemo } from "react" -import { ScatterChart, Scatter, XAxis, YAxis, Label, Customized, Cross } from "recharts" - -import type { TaskMetrics, Run } from "@roo-code/evals" import { formatTokens, formatCurrency, formatDuration, formatScore } from "@/lib" import { useOpenRouterModels } from "@/lib/hooks" -import { - ChartContainer, - ChartTooltip, - ChartTooltipContent, - ChartConfig, - ChartLegend, - ChartLegendContent, - Table, - TableBody, - TableCaption, - TableCell, - TableHead, - TableHeader, - TableRow, -} from "@/components/ui" +import { Table, TableBody, TableCaption, TableCell, TableHead, TableHeader, TableRow } from "@/components/ui" + +import type { EvalRun } from "./types" +import { Plot } from "./plot" -export function Evals({ - runs, -}: { - runs: (Run & { - label: string - score: number - languageScores?: Record<"go" | "java" | "javascript" | "python" | "rust", number> - taskMetrics: TaskMetrics - modelId?: string - })[] -}) { +export function Evals({ runs }: { runs: EvalRun[] }) { const { data: openRouterModels } = useOpenRouterModels() - const tableData = useMemo( + const tableData: (EvalRun & { label: string; cost: number })[] = useMemo( () => - runs.map((run) => ({ - ...run, - label: run.description || run.model, - score: run.score, - cost: run.taskMetrics.cost, - model: openRouterModels?.[run.modelId ?? ""], - modelInfo: openRouterModels?.[run.modelId ?? ""]?.modelInfo, - })), - [runs, openRouterModels], - ) + runs.map((run) => { + const openRouterModelInfo = openRouterModels?.[run.modelId ?? ""]?.modelInfo - const chartData = useMemo(() => tableData.filter(({ cost }) => cost < 100), [tableData]) - - const chartConfig = useMemo( - () => chartData.reduce((acc, run) => ({ ...acc, [run.label]: run }), {} as ChartConfig), - [chartData], + return { + ...run, + label: run.name || run.description || run.model, + cost: run.taskMetrics.cost, + description: run.description ?? openRouterModelInfo?.description ?? null, + contextWindow: run.contextWindow ?? openRouterModelInfo?.contextWindow ?? null, + inputPrice: run.inputPrice ?? openRouterModelInfo?.inputPrice ?? null, + outputPrice: run.outputPrice ?? openRouterModelInfo?.outputPrice ?? null, + } + }), + [runs, openRouterModels], ) return ( @@ -127,15 +101,15 @@ export function Evals({ {tableData.map((run) => ( - +
{run.label}
-
{formatTokens(run.modelInfo?.contextWindow)}
+
{formatTokens(run.contextWindow)}
-
{formatCurrency(run.modelInfo?.inputPrice)}
+
{formatCurrency(run.inputPrice)}
/
-
{formatCurrency(run.modelInfo?.outputPrice)}
+
{formatCurrency(run.outputPrice)}
{formatDuration(run.taskMetrics.duration)} @@ -167,58 +141,9 @@ export function Evals({ ))}
-
Cost Versus Score
- - - Math.round((dataMin - 5) / 5) * 5, - (dataMax: number) => Math.round((dataMax + 5) / 5) * 5, - ]} - tickFormatter={(value) => formatCurrency(value)}> - - Math.max(0, Math.round((dataMin - 5) / 5) * 5), - (dataMax: number) => Math.min(100, Math.round((dataMax + 5) / 5) * 5), - ]} - tickFormatter={(value) => `${value}%`}> - - } /> - - {chartData.map((d, i) => ( - - ))} - } /> - - -
- (Note: Very expensive models are excluded from the scatter plot.) -
+
) } - -// eslint-disable-next-line @typescript-eslint/no-explicit-any -const renderQuadrant = (props: any) => ( - -) diff --git a/apps/web-roo-code/src/app/evals/plot.tsx b/apps/web-roo-code/src/app/evals/plot.tsx new file mode 100644 index 000000000000..86c1be3a9a34 --- /dev/null +++ b/apps/web-roo-code/src/app/evals/plot.tsx @@ -0,0 +1,336 @@ +"use client" + +import { useMemo } from "react" +import { ScatterChart, Scatter, XAxis, YAxis, Customized, Cross, LabelList } from "recharts" + +import { formatCurrency } from "@/lib" +import { ChartContainer, ChartTooltip, ChartConfig } from "@/components/ui" + +import type { EvalRun } from "./types" + +type PlotProps = { + tableData: (EvalRun & { label: string; cost: number })[] +} + +type LabelPosition = "top" | "bottom" | "left" | "right" + +export const Plot = ({ tableData }: PlotProps) => { + const chartData = useMemo(() => tableData.filter(({ cost }) => cost < 50), [tableData]) + + const chartConfig = useMemo( + () => chartData.reduce((acc, run) => ({ ...acc, [run.label]: run }), {} as ChartConfig), + [chartData], + ) + + // Calculate label positions to avoid overlaps. + const labelPositions = useMemo(() => { + const positions: Record = {} + + // Track placed labels with their approximate bounds. + const placedLabels: Array<{ + cost: number + score: number + label: string + position: LabelPosition + }> = [] + + // Helper function to check if two labels would overlap. + const wouldLabelsOverlap = ( + p1: { cost: number; score: number; position: LabelPosition }, + p2: { cost: number; score: number; position: LabelPosition }, + ): boolean => { + // Approximate thresholds for overlap detection. + const horizontalThreshold = 4 // Cost units. + const verticalThreshold = 5 // Score units. + + const costDiff = Math.abs(p1.cost - p2.cost) + const scoreDiff = Math.abs(p1.score - p2.score) + + // If points are far apart, no overlap. + if (costDiff > horizontalThreshold * 2 || scoreDiff > verticalThreshold * 2) { + return false + } + + // Check specific position combinations for overlap. + // Same position for nearby points definitely overlaps. + if (p1.position === p2.position && costDiff < horizontalThreshold && scoreDiff < verticalThreshold) { + return true + } + + // Check adjacent position overlaps. + const p1IsTop = p1.position === "top" + const p1IsBottom = p1.position === "bottom" + const p2IsTop = p2.position === "top" + const p2IsBottom = p2.position === "bottom" + + // If both labels are on the same vertical side and points are close + // horizontally. + if ((p1IsTop && p2IsTop) || (p1IsBottom && p2IsBottom)) { + if (costDiff < horizontalThreshold && scoreDiff < verticalThreshold / 2) { + return true + } + } + + return false + } + + // Helper function to check if position would overlap with a data point. + const wouldOverlapPoint = (point: (typeof chartData)[0], position: LabelPosition): boolean => { + for (const other of chartData) { + if (other.label === point.label) { + continue + } + + const costDiff = Math.abs(point.cost - other.cost) + const scoreDiff = Math.abs(point.score - other.score) + + // Check if label would be placed on top of another point. + switch (position) { + case "top": + // Label is above, check if there's a point above. + if (costDiff < 3 && other.score > point.score && other.score - point.score < 6) { + return true + } + break + case "bottom": + // Label is below, check if there's a point below. + if (costDiff < 3 && other.score < point.score && point.score - other.score < 6) { + return true + } + break + case "left": + // Label is to the left, check if there's a point to the left. + if (scoreDiff < 3 && other.cost < point.cost && point.cost - other.cost < 4) { + return true + } + break + case "right": + // Label is to the right, check if there's a point to the right. + if (scoreDiff < 3 && other.cost > point.cost && other.cost - point.cost < 4) { + return true + } + break + } + } + return false + } + + // Sort points to process them in a consistent order. + // Process from top-left to bottom-right. + const sortedData = [...chartData].sort((a, b) => { + // First by score (higher first). + const scoreDiff = b.score - a.score + if (Math.abs(scoreDiff) > 1) return scoreDiff + // Then by cost (lower first). + return a.cost - b.cost + }) + + // Process each point and find the best position. + sortedData.forEach((point) => { + // Try positions in order of preference. + const positionPreferences: LabelPosition[] = ["top", "bottom", "right", "left"] + + let bestPosition: LabelPosition = "top" + + for (const position of positionPreferences) { + // Check if this position would overlap with any placed labels. + let hasLabelOverlap = false + + for (const placed of placedLabels) { + if ( + wouldLabelsOverlap( + { cost: point.cost, score: point.score, position }, + { cost: placed.cost, score: placed.score, position: placed.position }, + ) + ) { + hasLabelOverlap = true + break + } + } + + // Check if this position would overlap with any data points. + const hasPointOverlap = wouldOverlapPoint(point, position) + + // If no overlaps, use this position. + if (!hasLabelOverlap && !hasPointOverlap) { + bestPosition = position + break + } + } + + // Use the best position found + positions[point.label] = bestPosition + placedLabels.push({ + cost: point.cost, + score: point.score, + label: point.label, + position: bestPosition, + }) + }) + + return positions + }, [chartData]) + + return ( + <> +
Cost x Score
+ + + Math.round((dataMin - 5) / 5) * 5, + (dataMax: number) => Math.round((dataMax + 5) / 5) * 5, + ]} + tickFormatter={(value) => formatCurrency(value)} + /> + Math.max(0, Math.round((dataMin - 5) / 5) * 5), + (dataMax: number) => Math.min(100, Math.round((dataMax + 5) / 5) * 5), + ]} + tickFormatter={(value) => `${value}%`} + /> + { + if (!active || !payload || !payload.length || !payload[0]) { + return null + } + + const { label, cost, score } = payload[0].payload + + return ( +
+
{label}
+
+
+ Score: {Math.round(score)}% +
+
+ Cost: {formatCurrency(cost)} +
+
+
+ ) + }} + /> + + {chartData.map((d, index) => ( + + renderCustomLabel(props, labelPositions[d.label] || "top")} + /> + + ))} +
+
+
+ (Note: Models with a cost of $50 or more are excluded from the scatter plot.) +
+ + ) +} + +// eslint-disable-next-line @typescript-eslint/no-explicit-any +const renderQuadrant = (props: any) => ( + +) + +// eslint-disable-next-line @typescript-eslint/no-explicit-any +const renderCustomLabel = (props: any, position: LabelPosition) => { + const { x, y, value } = props + const maxWidth = 80 // Maximum width in pixels - adjust as needed. + + const truncateText = (text: string, maxChars: number = 20) => { + if (text.length <= maxChars) { + return text + } + + return text.substring(0, maxChars - 1) + "…" + } + + // Calculate position offsets based on label position. + let xOffset = 0 + let yOffset = 0 + let textAnchor: "middle" | "start" | "end" = "middle" + let dominantBaseline: "auto" | "hanging" | "middle" = "auto" + + switch (position) { + case "top": + yOffset = -8 + textAnchor = "middle" + dominantBaseline = "auto" + break + case "bottom": + yOffset = 15 + textAnchor = "middle" + dominantBaseline = "hanging" + break + case "left": + xOffset = -8 + yOffset = 5 + textAnchor = "end" + dominantBaseline = "middle" + break + case "right": + xOffset = 15 + yOffset = 5 + textAnchor = "start" + dominantBaseline = "middle" + break + } + + return ( + + {truncateText(value)} + + ) +} + +const generateSpectrumColor = (index: number, total: number): string => { + // Distribute hues evenly across the color wheel (0-360 degrees). + // Start at 0 (red) and distribute evenly. + const hue = (index * 360) / total + + // Use high saturation for vibrant colors. + const saturation = 70 + + // Use medium lightness for good visibility on both light and dark backgrounds. + const lightness = 50 + + return `hsl(${Math.round(hue)}, ${saturation}%, ${lightness}%)` +} diff --git a/apps/web-roo-code/src/app/evals/types.ts b/apps/web-roo-code/src/app/evals/types.ts new file mode 100644 index 000000000000..c28049661dc1 --- /dev/null +++ b/apps/web-roo-code/src/app/evals/types.ts @@ -0,0 +1,9 @@ +import type { TaskMetrics, Run } from "@roo-code/evals" + +export type EvalRun = Run & { + label: string + score: number + languageScores?: Record<"go" | "java" | "javascript" | "python" | "rust", number> + taskMetrics: TaskMetrics + modelId?: string +} diff --git a/apps/web-roo-code/src/lib/format-currency.ts b/apps/web-roo-code/src/lib/format-currency.ts index e9ea9f25ef8e..820ebe2588f3 100644 --- a/apps/web-roo-code/src/lib/format-currency.ts +++ b/apps/web-roo-code/src/lib/format-currency.ts @@ -7,6 +7,7 @@ export const formatCurrency = (amount: number | null | undefined) => { if (amount === null || amount === undefined) { return "-" } + return formatter.format(amount) } diff --git a/apps/web-roo-code/src/lib/hooks/use-open-router-models.ts b/apps/web-roo-code/src/lib/hooks/use-open-router-models.ts index 2988421ae53b..4b5ffbc9c328 100644 --- a/apps/web-roo-code/src/lib/hooks/use-open-router-models.ts +++ b/apps/web-roo-code/src/lib/hooks/use-open-router-models.ts @@ -49,7 +49,7 @@ export const getOpenRouterModels = async (): Promise => { return result.data.data .filter((rawModel) => { - // Skip image generation models (models that output images) + // Skip image generation models (models that output images). return !rawModel.architecture?.output_modalities?.includes("image") }) .sort((a, b) => a.name.localeCompare(b.name)) diff --git a/packages/evals/package.json b/packages/evals/package.json index 83690a99c4d7..611ee6e79c2d 100644 --- a/packages/evals/package.json +++ b/packages/evals/package.json @@ -15,6 +15,8 @@ "drizzle-kit:production": "dotenvx run -f .env.production -- tsx node_modules/drizzle-kit/bin.cjs", "db:generate": "pnpm drizzle-kit generate", "db:migrate": "pnpm drizzle-kit migrate", + "db:test:migrate": "pnpm drizzle-kit:test migrate", + "db:production:migrate": "pnpm drizzle-kit:production migrate", "db:push": "pnpm drizzle-kit push", "db:test:push": "pnpm drizzle-kit:test push", "db:production:push": "pnpm drizzle-kit:production push", diff --git a/packages/evals/src/db/migrations/0002_bouncy_blazing_skull.sql b/packages/evals/src/db/migrations/0002_bouncy_blazing_skull.sql new file mode 100644 index 000000000000..6f6518bbeaa4 --- /dev/null +++ b/packages/evals/src/db/migrations/0002_bouncy_blazing_skull.sql @@ -0,0 +1,6 @@ +ALTER TABLE "runs" ADD COLUMN "name" text;--> statement-breakpoint +ALTER TABLE "runs" ADD COLUMN "contextWindow" integer;--> statement-breakpoint +ALTER TABLE "runs" ADD COLUMN "inputPrice" real;--> statement-breakpoint +ALTER TABLE "runs" ADD COLUMN "outputPrice" real;--> statement-breakpoint +ALTER TABLE "runs" ADD COLUMN "cacheWritesPrice" real;--> statement-breakpoint +ALTER TABLE "runs" ADD COLUMN "cacheReadsPrice" real; \ No newline at end of file diff --git a/packages/evals/src/db/migrations/meta/0002_snapshot.json b/packages/evals/src/db/migrations/meta/0002_snapshot.json new file mode 100644 index 000000000000..d18be4f6d117 --- /dev/null +++ b/packages/evals/src/db/migrations/meta/0002_snapshot.json @@ -0,0 +1,453 @@ +{ + "id": "3d2b8423-6170-4cb2-9f62-1c86756da97a", + "prevId": "43b197c4-ff4f-48c1-908b-a330e66a162d", + "version": "7", + "dialect": "postgresql", + "tables": { + "public.runs": { + "name": "runs", + "schema": "", + "columns": { + "id": { + "name": "id", + "type": "integer", + "primaryKey": true, + "notNull": true, + "identity": { + "type": "always", + "name": "runs_id_seq", + "schema": "public", + "increment": "1", + "startWith": "1", + "minValue": "1", + "maxValue": "2147483647", + "cache": "1", + "cycle": false + } + }, + "task_metrics_id": { + "name": "task_metrics_id", + "type": "integer", + "primaryKey": false, + "notNull": false + }, + "model": { + "name": "model", + "type": "text", + "primaryKey": false, + "notNull": true + }, + "name": { + "name": "name", + "type": "text", + "primaryKey": false, + "notNull": false + }, + "description": { + "name": "description", + "type": "text", + "primaryKey": false, + "notNull": false + }, + "contextWindow": { + "name": "contextWindow", + "type": "integer", + "primaryKey": false, + "notNull": false + }, + "inputPrice": { + "name": "inputPrice", + "type": "real", + "primaryKey": false, + "notNull": false + }, + "outputPrice": { + "name": "outputPrice", + "type": "real", + "primaryKey": false, + "notNull": false + }, + "cacheWritesPrice": { + "name": "cacheWritesPrice", + "type": "real", + "primaryKey": false, + "notNull": false + }, + "cacheReadsPrice": { + "name": "cacheReadsPrice", + "type": "real", + "primaryKey": false, + "notNull": false + }, + "settings": { + "name": "settings", + "type": "jsonb", + "primaryKey": false, + "notNull": false + }, + "pid": { + "name": "pid", + "type": "integer", + "primaryKey": false, + "notNull": false + }, + "socket_path": { + "name": "socket_path", + "type": "text", + "primaryKey": false, + "notNull": true + }, + "concurrency": { + "name": "concurrency", + "type": "integer", + "primaryKey": false, + "notNull": true, + "default": 2 + }, + "timeout": { + "name": "timeout", + "type": "integer", + "primaryKey": false, + "notNull": true, + "default": 5 + }, + "passed": { + "name": "passed", + "type": "integer", + "primaryKey": false, + "notNull": true, + "default": 0 + }, + "failed": { + "name": "failed", + "type": "integer", + "primaryKey": false, + "notNull": true, + "default": 0 + }, + "created_at": { + "name": "created_at", + "type": "timestamp", + "primaryKey": false, + "notNull": true + } + }, + "indexes": {}, + "foreignKeys": { + "runs_task_metrics_id_taskMetrics_id_fk": { + "name": "runs_task_metrics_id_taskMetrics_id_fk", + "tableFrom": "runs", + "tableTo": "taskMetrics", + "columnsFrom": ["task_metrics_id"], + "columnsTo": ["id"], + "onDelete": "no action", + "onUpdate": "no action" + } + }, + "compositePrimaryKeys": {}, + "uniqueConstraints": {}, + "policies": {}, + "checkConstraints": {}, + "isRLSEnabled": false + }, + "public.taskMetrics": { + "name": "taskMetrics", + "schema": "", + "columns": { + "id": { + "name": "id", + "type": "integer", + "primaryKey": true, + "notNull": true, + "identity": { + "type": "always", + "name": "taskMetrics_id_seq", + "schema": "public", + "increment": "1", + "startWith": "1", + "minValue": "1", + "maxValue": "2147483647", + "cache": "1", + "cycle": false + } + }, + "tokens_in": { + "name": "tokens_in", + "type": "integer", + "primaryKey": false, + "notNull": true + }, + "tokens_out": { + "name": "tokens_out", + "type": "integer", + "primaryKey": false, + "notNull": true + }, + "tokens_context": { + "name": "tokens_context", + "type": "integer", + "primaryKey": false, + "notNull": true + }, + "cache_writes": { + "name": "cache_writes", + "type": "integer", + "primaryKey": false, + "notNull": true + }, + "cache_reads": { + "name": "cache_reads", + "type": "integer", + "primaryKey": false, + "notNull": true + }, + "cost": { + "name": "cost", + "type": "real", + "primaryKey": false, + "notNull": true + }, + "duration": { + "name": "duration", + "type": "integer", + "primaryKey": false, + "notNull": true + }, + "tool_usage": { + "name": "tool_usage", + "type": "jsonb", + "primaryKey": false, + "notNull": false + }, + "created_at": { + "name": "created_at", + "type": "timestamp", + "primaryKey": false, + "notNull": true + } + }, + "indexes": {}, + "foreignKeys": {}, + "compositePrimaryKeys": {}, + "uniqueConstraints": {}, + "policies": {}, + "checkConstraints": {}, + "isRLSEnabled": false + }, + "public.tasks": { + "name": "tasks", + "schema": "", + "columns": { + "id": { + "name": "id", + "type": "integer", + "primaryKey": true, + "notNull": true, + "identity": { + "type": "always", + "name": "tasks_id_seq", + "schema": "public", + "increment": "1", + "startWith": "1", + "minValue": "1", + "maxValue": "2147483647", + "cache": "1", + "cycle": false + } + }, + "run_id": { + "name": "run_id", + "type": "integer", + "primaryKey": false, + "notNull": true + }, + "task_metrics_id": { + "name": "task_metrics_id", + "type": "integer", + "primaryKey": false, + "notNull": false + }, + "language": { + "name": "language", + "type": "text", + "primaryKey": false, + "notNull": true + }, + "exercise": { + "name": "exercise", + "type": "text", + "primaryKey": false, + "notNull": true + }, + "passed": { + "name": "passed", + "type": "boolean", + "primaryKey": false, + "notNull": false + }, + "started_at": { + "name": "started_at", + "type": "timestamp", + "primaryKey": false, + "notNull": false + }, + "finished_at": { + "name": "finished_at", + "type": "timestamp", + "primaryKey": false, + "notNull": false + }, + "created_at": { + "name": "created_at", + "type": "timestamp", + "primaryKey": false, + "notNull": true + } + }, + "indexes": { + "tasks_language_exercise_idx": { + "name": "tasks_language_exercise_idx", + "columns": [ + { + "expression": "run_id", + "isExpression": false, + "asc": true, + "nulls": "last" + }, + { + "expression": "language", + "isExpression": false, + "asc": true, + "nulls": "last" + }, + { + "expression": "exercise", + "isExpression": false, + "asc": true, + "nulls": "last" + } + ], + "isUnique": true, + "concurrently": false, + "method": "btree", + "with": {} + } + }, + "foreignKeys": { + "tasks_run_id_runs_id_fk": { + "name": "tasks_run_id_runs_id_fk", + "tableFrom": "tasks", + "tableTo": "runs", + "columnsFrom": ["run_id"], + "columnsTo": ["id"], + "onDelete": "no action", + "onUpdate": "no action" + }, + "tasks_task_metrics_id_taskMetrics_id_fk": { + "name": "tasks_task_metrics_id_taskMetrics_id_fk", + "tableFrom": "tasks", + "tableTo": "taskMetrics", + "columnsFrom": ["task_metrics_id"], + "columnsTo": ["id"], + "onDelete": "no action", + "onUpdate": "no action" + } + }, + "compositePrimaryKeys": {}, + "uniqueConstraints": {}, + "policies": {}, + "checkConstraints": {}, + "isRLSEnabled": false + }, + "public.toolErrors": { + "name": "toolErrors", + "schema": "", + "columns": { + "id": { + "name": "id", + "type": "integer", + "primaryKey": true, + "notNull": true, + "identity": { + "type": "always", + "name": "toolErrors_id_seq", + "schema": "public", + "increment": "1", + "startWith": "1", + "minValue": "1", + "maxValue": "2147483647", + "cache": "1", + "cycle": false + } + }, + "run_id": { + "name": "run_id", + "type": "integer", + "primaryKey": false, + "notNull": false + }, + "task_id": { + "name": "task_id", + "type": "integer", + "primaryKey": false, + "notNull": false + }, + "tool_name": { + "name": "tool_name", + "type": "text", + "primaryKey": false, + "notNull": true + }, + "error": { + "name": "error", + "type": "text", + "primaryKey": false, + "notNull": true + }, + "created_at": { + "name": "created_at", + "type": "timestamp", + "primaryKey": false, + "notNull": true + } + }, + "indexes": {}, + "foreignKeys": { + "toolErrors_run_id_runs_id_fk": { + "name": "toolErrors_run_id_runs_id_fk", + "tableFrom": "toolErrors", + "tableTo": "runs", + "columnsFrom": ["run_id"], + "columnsTo": ["id"], + "onDelete": "no action", + "onUpdate": "no action" + }, + "toolErrors_task_id_tasks_id_fk": { + "name": "toolErrors_task_id_tasks_id_fk", + "tableFrom": "toolErrors", + "tableTo": "tasks", + "columnsFrom": ["task_id"], + "columnsTo": ["id"], + "onDelete": "no action", + "onUpdate": "no action" + } + }, + "compositePrimaryKeys": {}, + "uniqueConstraints": {}, + "policies": {}, + "checkConstraints": {}, + "isRLSEnabled": false + } + }, + "enums": {}, + "schemas": {}, + "sequences": {}, + "roles": {}, + "policies": {}, + "views": {}, + "_meta": { + "columns": {}, + "schemas": {}, + "tables": {} + } +} diff --git a/packages/evals/src/db/migrations/meta/_journal.json b/packages/evals/src/db/migrations/meta/_journal.json index e20425b10579..38543557f393 100644 --- a/packages/evals/src/db/migrations/meta/_journal.json +++ b/packages/evals/src/db/migrations/meta/_journal.json @@ -15,6 +15,13 @@ "when": 1753198630651, "tag": "0001_lowly_captain_flint", "breakpoints": true + }, + { + "idx": 2, + "version": "7", + "when": 1757191027855, + "tag": "0002_bouncy_blazing_skull", + "breakpoints": true } ] } diff --git a/packages/evals/src/db/schema.ts b/packages/evals/src/db/schema.ts index 73705ac054db..66588c792c36 100644 --- a/packages/evals/src/db/schema.ts +++ b/packages/evals/src/db/schema.ts @@ -13,7 +13,13 @@ export const runs = pgTable("runs", { id: integer().primaryKey().generatedAlwaysAsIdentity(), taskMetricsId: integer("task_metrics_id").references(() => taskMetrics.id), model: text().notNull(), + name: text(), description: text(), + contextWindow: integer(), + inputPrice: real(), + outputPrice: real(), + cacheWritesPrice: real(), + cacheReadsPrice: real(), settings: jsonb().$type(), pid: integer(), socketPath: text("socket_path").notNull(),