Skip to content
Merged
Show file tree
Hide file tree
Changes from 84 commits
Commits
Show all changes
115 commits
Select commit Hold shift + click to select a range
65c750d
Changeset version bump (#9856)
github-actions[bot] Dec 5, 2025
5c50160
Better error logs for parseToolCall exceptions (#9857)
cte Dec 5, 2025
642a187
(update): Add DeepSeek V3-2 Support for Baseten Provider (#9861)
AlexKer Dec 5, 2025
d285d01
web: Product pages (#9865)
brunobergher Dec 5, 2025
9f4dcfc
fix: sanitize removed/invalid API providers to prevent infinite loop …
hannesrudolph Dec 5, 2025
9d5eca9
Update xAI models catalog (#9872)
hannesrudolph Dec 5, 2025
dd92453
refactor: decouple tools from system prompt (#9784)
daniel-lxs Dec 5, 2025
4a5cbcb
Stop making count_tokens requests (#9884)
mrubens Dec 6, 2025
2eae321
Default to using native tools when supported on openrouter (#9878)
mrubens Dec 6, 2025
946fd03
feat: change defaultToolProtocol default from xml to native (#9892)
Dec 6, 2025
8aa1346
Refactor: Unified context-management architecture with improved UX (#…
hannesrudolph Dec 7, 2025
1f7e1ee
Make eval runs deleteable (#9909)
mrubens Dec 8, 2025
bea7626
fix: add Kimi, MiniMax, and Qwen model configurations for Bedrock (#9…
Dec 8, 2025
1370cb0
fix: use foreground color for context-management icons (#9912)
hannesrudolph Dec 8, 2025
efbf427
feat: add xhigh reasoning effort for gpt-5.1-codex-max (#9900)
andrewginns Dec 8, 2025
fba8508
feat: add search_replace native tool for single-replacement operation…
hannesrudolph Dec 8, 2025
6f602fc
Improve cloud job error logging for RCC provider errors (#9924)
cte Dec 8, 2025
754b701
feat: configure tool preferences for xAI models (#9923)
hannesrudolph Dec 8, 2025
88a0bed
fix: process finish_reason to emit tool_call_end events (#9927)
daniel-lxs Dec 8, 2025
ee48b3a
fix: suppress 'ask promise was ignored' error in handleError (#9914)
daniel-lxs Dec 9, 2025
375c103
fix: exclude apply_diff from native tools when diffEnabled is false (…
Dec 9, 2025
93a43e4
Try to make OpenAI errors more useful (#9639)
mrubens Dec 9, 2025
de00ab1
refactor: consolidate ThinkingBudget components and fix disable handl…
hannesrudolph Dec 9, 2025
3356267
Add timeout to OpenAI Compatible Provider Client (#9898)
dcbartlett Dec 9, 2025
2efebf5
fix: add finish_reason processing to xai.ts provider (#9929)
daniel-lxs Dec 9, 2025
5bde2e5
Remove defaultTemperature from Roo provider configuration (#9932)
mrubens Dec 9, 2025
54a5265
feat: forbid time estimates in architect mode (#9931)
Dec 9, 2025
c103a4a
feat: streaming tool stats + token usage throttling (#9926)
hannesrudolph Dec 9, 2025
8a98f14
feat: Make Architect save to `/plans` and gitignore it (#9944)
brunobergher Dec 9, 2025
e142906
feat: add announcement support CTA and social icons (#9945)
hannesrudolph Dec 9, 2025
f89a6be
fix: display actual API error message instead of generic text on retr…
hannesrudolph Dec 9, 2025
83787a7
feat(roo): add versioned settings support with minPluginVersion gatin…
hannesrudolph Dec 9, 2025
0068d1f
Revert "feat: change defaultToolProtocol default from xml to native" …
mrubens Dec 9, 2025
4608c97
fix: return undefined instead of 0 for disabled API timeout (#9960)
hannesrudolph Dec 9, 2025
1898848
feat(deepseek): update DeepSeek models to V3.2 with new pricing (#9962)
hannesrudolph Dec 9, 2025
721b02e
Add a way to save screenshots from the browser tool (#9963)
mrubens Dec 10, 2025
ada7411
Tweaks to baseten model definitions (#9866)
mrubens Dec 10, 2025
29d6f6d
fix: always show tool protocol selector for openai-compatible (#9966)
hannesrudolph Dec 10, 2025
24eb6ae
feat: add API error telemetry to OpenRouter provider (#9953)
daniel-lxs Dec 10, 2025
f472a82
fix: validate and fix tool_result IDs before API requests (#9952)
daniel-lxs Dec 10, 2025
df5fdef
fix: respect explicit supportsReasoningEffort array values (#9970)
hannesrudolph Dec 10, 2025
0cf5b28
v3.36.3 (#9972)
cte Dec 10, 2025
048e7f3
feat(gemini): add minimal and medium reasoning effort levels (#9973)
hannesrudolph Dec 10, 2025
03912d8
Delete changeset files (#9977)
cte Dec 10, 2025
36ef603
Add missing release notes for v3.36.3 (#9979)
cte Dec 10, 2025
ab18bf3
feat: add error details modal with on-demand display (#9985)
Dec 10, 2025
e092e77
Fix: Correct TODO list display order in chat view (ROO-107) (#9991)
Dec 10, 2025
5a4315f
fix: prevent premature rawChunkTracker clearing for MCP tools (#9993)
daniel-lxs Dec 10, 2025
fda020a
fix: filter out 429 rate limit errors from API error telemetry (#9987)
daniel-lxs Dec 10, 2025
2cd772c
Release v3.36.4 (#9994)
cte Dec 10, 2025
380a578
Changeset version bump (#9995)
github-actions[bot] Dec 10, 2025
1cf6ae6
feat(telemetry): add app version to exception captures and filter 402…
daniel-lxs Dec 10, 2025
f05dd59
Remove Glama provider (#9801)
hannesrudolph Dec 10, 2025
2a70a2e
@roo-code/types v1.90.0 (#9998)
cte Dec 10, 2025
483e70c
fix: apply versioned settings on nightly builds (#9997)
hannesrudolph Dec 10, 2025
0cbaed7
feat: add toggle for Enter key behavior in chat input (#10002)
hannesrudolph Dec 11, 2025
6a30d94
chore: remove list_code_definition_names tool (#10005)
hannesrudolph Dec 11, 2025
a1d3a43
Update roomotes.yml (#10008)
cte Dec 11, 2025
f9cfc66
fix: add general API endpoints for Z.ai provider (#9894)
Dec 11, 2025
47320dc
fix: handle empty Gemini responses and reasoning loops (#10007)
hannesrudolph Dec 11, 2025
8731709
fix: add missing tool_result blocks to prevent API errors (#10015)
daniel-lxs Dec 11, 2025
51dbccf
feat: add gpt-5.2 model to openai-native provider (#10024)
hannesrudolph Dec 11, 2025
8a68b04
fix: filter orphaned tool_results when more results than tool_uses (#…
daniel-lxs Dec 11, 2025
526e195
Release v3.36.5 (#10029)
cte Dec 11, 2025
21c2d93
Changeset version bump (#10032)
github-actions[bot] Dec 11, 2025
c513df5
fix: merge settings and versionedSettings for Roo provider models (#1…
hannesrudolph Dec 11, 2025
7766b91
Revert "fix: merge settings and versionedSettings for Roo provider mo…
cte Dec 11, 2025
5072ff1
Revert the 3.6.5 release (we halted it) (#10036)
cte Dec 11, 2025
4dabd52
Release v3.36.5 (#10037)
cte Dec 11, 2025
f97b515
Changeset version bump (#10038)
github-actions[bot] Dec 11, 2025
495b5c6
ux: improve auto-approve timer visibility in follow-up suggestions (#…
brunobergher Dec 12, 2025
d976a9b
fix: cancel auto-approval timeout when user starts typing (#9937)
Dec 12, 2025
23a214c
fix: extract raw error message from OpenRouter metadata (#10039)
daniel-lxs Dec 12, 2025
ba7c553
feat: add tool alias support for model-specific tool customization (#…
daniel-lxs Dec 12, 2025
0f8fac9
fix: show tool protocol dropdown for LiteLLM provider (#10053)
daniel-lxs Dec 12, 2025
8da4d3d
feat: add WorkspaceTaskVisibility type for organization cloud setting…
Dec 12, 2025
f60c14e
Release: v1.91.0 (#10055)
jr Dec 12, 2025
3521270
feat: sanitize MCP server/tool names for API compatibility (#10054)
daniel-lxs Dec 12, 2025
0742335
Release v3.36.6 (#10057)
cte Dec 12, 2025
f961b73
changeset version bump
github-actions[bot] Dec 12, 2025
0b112ce
Update CHANGELOG.md
cte Dec 12, 2025
93e8ed3
Merge branch 'upstream-at-v3.36.6' into roo-v3.36.6
kevinvandijk Dec 16, 2025
e8792c8
Revive glama which was deleted upstream
kevinvandijk Dec 16, 2025
108a4f4
Fix type for xhigh reasoning
kevinvandijk Dec 16, 2025
2ffd221
Fix types
kevinvandijk Dec 16, 2025
7b17ae2
Fix typing in tests
kevinvandijk Dec 16, 2025
aad9fed
Fix remaining merge conflicts
kevinvandijk Dec 16, 2025
878703c
Update locales
kevinvandijk Dec 17, 2025
2da872f
Fix build failing in file we don't care about
kevinvandijk Dec 17, 2025
8ad3df4
Fix throttle events
kevinvandijk Dec 17, 2025
dc73bb5
Fix snapshots
kevinvandijk Dec 17, 2025
713c811
Fix mock in unit test
kevinvandijk Dec 17, 2025
03de7b4
Update snapshots because we use a different default mode
kevinvandijk Dec 17, 2025
2fbc511
Fix more mocks and skip tests we don't need
kevinvandijk Dec 17, 2025
a6c0036
Re-add countTokens for anthropic handler because of our custom changes
kevinvandijk Dec 17, 2025
cf70498
Disable irrelevant tests
kevinvandijk Dec 17, 2025
31bef3d
Resolve circular dependencies causing failing tests
kevinvandijk Dec 17, 2025
fa53ec9
Re-add Morp Fast Apply to refactored tools
kevinvandijk Dec 17, 2025
8668b72
Re-add compatibility with Morph Fast Apply to refactored native tools
kevinvandijk Dec 17, 2025
0379019
Merge branch 'main' into roo-v3.36.6
kevinvandijk Dec 17, 2025
e5c0687
Disable tests because we have a different method
kevinvandijk Dec 17, 2025
4eea059
Adjust snapshots for fast apply rule
kevinvandijk Dec 17, 2025
2d4a07e
Fix webview test
kevinvandijk Dec 18, 2025
1d2e27e
Align test to actual expected behavior
kevinvandijk Dec 18, 2025
4ca152d
Merge branch 'main' into roo-v3.36.6
kevinvandijk Dec 18, 2025
8a9fddd
Add changeset
kevinvandijk Dec 18, 2025
4adeb64
Set Anthropic headers when calling OpenRouter
chrarnoldus Dec 18, 2025
02baa58
Parse versioned model settings
chrarnoldus Dec 18, 2025
59d348b
Add comment
chrarnoldus Dec 18, 2025
b85c463
Also fix in the other place this is duplicated
chrarnoldus Dec 18, 2025
24e12b5
Merge pull request #4555 from Kilo-Org/christiaan/parse-versioned-set…
chrarnoldus Dec 18, 2025
04fa140
Merge pull request #4554 from Kilo-Org/christiaan/ant-headers
chrarnoldus Dec 18, 2025
7345904
Fix unit tests for added headers
kevinvandijk Dec 18, 2025
1c77e28
Merge branch 'main' into roo-v3.36.6
kevinvandijk Dec 18, 2025
2dd1b27
Update locales
kevinvandijk Dec 18, 2025
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 3 additions & 1 deletion .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -68,4 +68,6 @@ qdrant_storage/
*.code-workspace

# Act Secret Files
.secrets
.secrets
# Architect plans
plans/
111 changes: 82 additions & 29 deletions apps/web-evals/src/app/runs/[id]/run.tsx
Original file line number Diff line number Diff line change
Expand Up @@ -242,7 +242,7 @@ function formatLogContent(log: string): React.ReactNode[] {

export function Run({ run }: { run: Run }) {
const runStatus = useRunStatus(run)
const { tasks, tokenUsage, usageUpdatedAt, heartbeat, runners } = runStatus
const { tasks, tokenUsage, toolUsage, usageUpdatedAt, heartbeat, runners } = runStatus

const [selectedTask, setSelectedTask] = useState<Task | null>(null)
const [taskLog, setTaskLog] = useState<string | null>(null)
Expand Down Expand Up @@ -336,37 +336,70 @@ export function Run({ run }: { run: Run }) {
)

const taskMetrics: Record<number, TaskMetrics> = useMemo(() => {
// Reference usageUpdatedAt to trigger recomputation when Map contents change
void usageUpdatedAt
const metrics: Record<number, TaskMetrics> = {}

tasks?.forEach((task) => {
const usage = tokenUsage.get(task.id)

if (task.finishedAt && task.taskMetrics) {
metrics[task.id] = task.taskMetrics
} else if (usage) {
const streamingUsage = tokenUsage.get(task.id)
const dbMetrics = task.taskMetrics

// For finished tasks, prefer DB values but fall back to streaming values
// This handles race conditions during timeout where DB might not have latest data
if (task.finishedAt) {
// Check if DB metrics have meaningful values (not just default/empty)
const dbHasData = dbMetrics && (dbMetrics.tokensIn > 0 || dbMetrics.tokensOut > 0 || dbMetrics.cost > 0)
if (dbHasData) {
metrics[task.id] = dbMetrics
} else if (streamingUsage) {
// Fall back to streaming values if DB is empty/stale
metrics[task.id] = {
tokensIn: streamingUsage.totalTokensIn,
tokensOut: streamingUsage.totalTokensOut,
tokensContext: streamingUsage.contextTokens,
duration: streamingUsage.duration ?? 0,
cost: streamingUsage.totalCost,
}
}
} else if (streamingUsage) {
// For running tasks, use streaming values
metrics[task.id] = {
tokensIn: usage.totalTokensIn,
tokensOut: usage.totalTokensOut,
tokensContext: usage.contextTokens,
duration: usage.duration ?? 0,
cost: usage.totalCost,
tokensIn: streamingUsage.totalTokensIn,
tokensOut: streamingUsage.totalTokensOut,
tokensContext: streamingUsage.contextTokens,
duration: streamingUsage.duration ?? 0,
cost: streamingUsage.totalCost,
}
}
})

return metrics
// eslint-disable-next-line react-hooks/exhaustive-deps
}, [tasks, tokenUsage, usageUpdatedAt])

// Collect all unique tool names from all tasks and sort by total attempts
const toolColumns = useMemo<ToolName[]>(() => {
// Reference usageUpdatedAt to trigger recomputation when Map contents change
void usageUpdatedAt
if (!tasks) return []

const toolTotals = new Map<ToolName, number>()

for (const task of tasks) {
if (task.taskMetrics?.toolUsage) {
for (const [toolName, usage] of Object.entries(task.taskMetrics.toolUsage)) {
// Get both DB and streaming values
const dbToolUsage = task.taskMetrics?.toolUsage
const streamingToolUsage = toolUsage.get(task.id)

// For finished tasks, prefer DB values but fall back to streaming values
// For running tasks, use streaming values
// This handles race conditions during timeout where DB might not have latest data
const taskToolUsage = task.finishedAt
? dbToolUsage && Object.keys(dbToolUsage).length > 0
? dbToolUsage
: streamingToolUsage
: streamingToolUsage

if (taskToolUsage) {
for (const [toolName, usage] of Object.entries(taskToolUsage)) {
const tool = toolName as ToolName
const current = toolTotals.get(tool) ?? 0
toolTotals.set(tool, current + usage.attempts)
Expand All @@ -378,10 +411,13 @@ export function Run({ run }: { run: Run }) {
return Array.from(toolTotals.entries())
.sort((a, b) => b[1] - a[1])
.map(([name]): ToolName => name)
}, [tasks])
// toolUsage ref is stable; usageUpdatedAt triggers recomputation when Map contents change
}, [tasks, toolUsage, usageUpdatedAt])

// Compute aggregate stats
const stats = useMemo(() => {
// Reference usageUpdatedAt to trigger recomputation when Map contents change
void usageUpdatedAt
if (!tasks) return null

const passed = tasks.filter((t) => t.passed === true).length
Expand All @@ -393,8 +429,8 @@ export function Run({ run }: { run: Run }) {
let totalCost = 0
let totalDuration = 0

// Aggregate tool usage from completed tasks
const toolUsage: ToolUsage = {}
// Aggregate tool usage from all tasks (both finished and running)
const toolUsageAggregate: ToolUsage = {}

for (const task of tasks) {
const metrics = taskMetrics[task.id]
Expand All @@ -405,15 +441,24 @@ export function Run({ run }: { run: Run }) {
totalDuration += metrics.duration
}

// Aggregate tool usage from finished tasks with taskMetrics
if (task.finishedAt && task.taskMetrics?.toolUsage) {
for (const [key, usage] of Object.entries(task.taskMetrics.toolUsage)) {
// Aggregate tool usage: prefer DB values for finished tasks, fall back to streaming values
// This handles race conditions during timeout where DB might not have latest data
const dbToolUsage = task.taskMetrics?.toolUsage
const streamingToolUsage = toolUsage.get(task.id)
const taskToolUsage = task.finishedAt
? dbToolUsage && Object.keys(dbToolUsage).length > 0
? dbToolUsage
: streamingToolUsage
: streamingToolUsage

if (taskToolUsage) {
for (const [key, usage] of Object.entries(taskToolUsage)) {
const tool = key as keyof ToolUsage
if (!toolUsage[tool]) {
toolUsage[tool] = { attempts: 0, failures: 0 }
if (!toolUsageAggregate[tool]) {
toolUsageAggregate[tool] = { attempts: 0, failures: 0 }
}
toolUsage[tool].attempts += usage.attempts
toolUsage[tool].failures += usage.failures
toolUsageAggregate[tool].attempts += usage.attempts
toolUsageAggregate[tool].failures += usage.failures
}
}
}
Expand All @@ -427,13 +472,15 @@ export function Run({ run }: { run: Run }) {
totalTokensOut,
totalCost,
totalDuration,
toolUsage,
toolUsage: toolUsageAggregate,
}
// eslint-disable-next-line react-hooks/exhaustive-deps
}, [tasks, taskMetrics, tokenUsage, usageUpdatedAt])
// Map refs are stable; usageUpdatedAt triggers recomputation when Map contents change
}, [tasks, taskMetrics, toolUsage, usageUpdatedAt])

// Calculate elapsed time (wall-clock time from run creation to completion or now)
const elapsedTime = useMemo(() => {
// Reference usageUpdatedAt to trigger recomputation for live elapsed time updates
void usageUpdatedAt
if (!tasks || tasks.length === 0) return null

const startTime = new Date(run.createdAt).getTime()
Expand All @@ -452,7 +499,6 @@ export function Run({ run }: { run: Run }) {

// If still running, use current time
return Date.now() - startTime
// eslint-disable-next-line react-hooks/exhaustive-deps
}, [tasks, run.createdAt, run.taskMetricsId, usageUpdatedAt])

return (
Expand Down Expand Up @@ -655,7 +701,14 @@ export function Run({ run }: { run: Run }) {
{formatTokens(taskMetrics[task.id]!.tokensContext)}
</TableCell>
{toolColumns.map((toolName) => {
const usage = task.taskMetrics?.toolUsage?.[toolName]
// Use DB values for finished tasks, but fall back to streaming values
// if DB values are missing (handles race condition during timeout)
const dbUsage = task.taskMetrics?.toolUsage?.[toolName]
const streamingUsage = toolUsage.get(task.id)?.[toolName]
const usage = task.finishedAt
? (dbUsage ?? streamingUsage)
: streamingUsage

const successRate =
usage && usage.attempts > 0
? ((usage.attempts - usage.failures) / usage.attempts) * 100
Expand Down
11 changes: 10 additions & 1 deletion apps/web-evals/src/hooks/use-run-status.ts
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
import { useState, useCallback, useRef } from "react"
import { useQuery, keepPreviousData } from "@tanstack/react-query"

import { type TokenUsage, RooCodeEventName, taskEventSchema } from "@roo-code/types"
import { type TokenUsage, type ToolUsage, RooCodeEventName, taskEventSchema } from "@roo-code/types"
import type { Run, Task, TaskMetrics } from "@roo-code/evals"

import { getHeartbeat } from "@/actions/heartbeat"
Expand All @@ -15,6 +15,7 @@ export type RunStatus = {
runners: string[] | undefined
tasks: (Task & { taskMetrics: TaskMetrics | null })[] | undefined
tokenUsage: Map<number, TokenUsage & { duration?: number }>
toolUsage: Map<number, ToolUsage>
usageUpdatedAt: number | undefined
}

Expand All @@ -23,6 +24,7 @@ export const useRunStatus = (run: Run): RunStatus => {
const [usageUpdatedAt, setUsageUpdatedAt] = useState<number>()

const tokenUsage = useRef<Map<number, TokenUsage & { duration?: number }>>(new Map())
const toolUsage = useRef<Map<number, ToolUsage>>(new Map())
const startTimes = useRef<Map<number, number>>(new Map())

const { data: heartbeat } = useQuery({
Expand Down Expand Up @@ -78,6 +80,12 @@ export const useRunStatus = (run: Run): RunStatus => {
const startTime = startTimes.current.get(taskId)
const duration = startTime ? Date.now() - startTime : undefined
tokenUsage.current.set(taskId, { ...payload[1], duration })

// Track tool usage from streaming updates
if (payload[2]) {
toolUsage.current.set(taskId, payload[2])
}

setUsageUpdatedAt(Date.now())
break
}
Expand All @@ -96,6 +104,7 @@ export const useRunStatus = (run: Run): RunStatus => {
runners,
tasks,
tokenUsage: tokenUsage.current,
toolUsage: toolUsage.current,
usageUpdatedAt,
}
}
5 changes: 5 additions & 0 deletions apps/web-roo-code/next.config.ts
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,11 @@ const nextConfig: NextConfig = {
destination: "https://roo-code.notion.site/238fd1401b0a8087b858e1ad431507cf?pvs=105",
permanent: false,
},
{
source: "/provider/pricing",
destination: "/provider",
permanent: true,
},
]
},
}
Expand Down
Binary file modified apps/web-roo-code/public/heroes/cloud-screen.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Loading
Loading