diff --git a/packages/evals/src/cli/messageLogDeduper.test.ts b/packages/evals/src/cli/messageLogDeduper.test.ts new file mode 100644 index 00000000000..5556c0c8505 --- /dev/null +++ b/packages/evals/src/cli/messageLogDeduper.test.ts @@ -0,0 +1,35 @@ +import { MessageLogDeduper } from "./messageLogDeduper.js" + +describe("MessageLogDeduper", () => { + it("dedupes identical messages for same action+ts", () => { + const d = new MessageLogDeduper() + const msg = { ts: 123, type: "say", say: "reasoning", text: "hello", partial: false } + + expect(d.shouldLog("updated", msg)).toBe(true) + expect(d.shouldLog("updated", msg)).toBe(false) + }) + + it("logs again if payload changes for same action+ts", () => { + const d = new MessageLogDeduper() + expect(d.shouldLog("updated", { ts: 123, text: "a" })).toBe(true) + expect(d.shouldLog("updated", { ts: 123, text: "b" })).toBe(true) + }) + + it("does not dedupe across different actions", () => { + const d = new MessageLogDeduper() + const msg = { ts: 123, text: "same" } + expect(d.shouldLog("created", msg)).toBe(true) + expect(d.shouldLog("updated", msg)).toBe(true) + }) + + it("evicts oldest entries", () => { + const d = new MessageLogDeduper(2) + + expect(d.shouldLog("updated", { ts: 1, text: "a" })).toBe(true) + expect(d.shouldLog("updated", { ts: 2, text: "b" })).toBe(true) + // causes eviction of ts:1 + expect(d.shouldLog("updated", { ts: 3, text: "c" })).toBe(true) + // ts:1 was evicted so it should log again + expect(d.shouldLog("updated", { ts: 1, text: "a" })).toBe(true) + }) +}) diff --git a/packages/evals/src/cli/messageLogDeduper.ts b/packages/evals/src/cli/messageLogDeduper.ts new file mode 100644 index 00000000000..ed0c7714b4f --- /dev/null +++ b/packages/evals/src/cli/messageLogDeduper.ts @@ -0,0 +1,50 @@ +export class MessageLogDeduper { + private readonly lastLoggedByKey = new Map() + + constructor(private readonly maxEntries = 10_000) {} + + /** + * Returns true if this message should be logged. + * Dedupe key: `${action}:${message.ts}`. + * Dedupe rule: skip if payload is identical to the last logged payload for that key. + */ + public shouldLog(action: string | undefined, message: unknown): boolean { + if (!action || !message || typeof message !== "object") { + return true + } + + const ts = (message as { ts?: unknown }).ts + if (typeof ts !== "number") { + return true + } + + let serialized: string + try { + serialized = JSON.stringify(message) + } catch { + // If serialization fails, prefer logging. + return true + } + + const key = `${action}:${ts}` + const prev = this.lastLoggedByKey.get(key) + if (prev === serialized) { + return false + } + + // Refresh insertion order so eviction removes true oldest. + if (this.lastLoggedByKey.has(key)) { + this.lastLoggedByKey.delete(key) + } + this.lastLoggedByKey.set(key, serialized) + + if (this.lastLoggedByKey.size > this.maxEntries) { + const oldestKey = this.lastLoggedByKey.keys().next().value as string | undefined + if (oldestKey) { + this.lastLoggedByKey.delete(oldestKey) + } + } + + return true + } +} diff --git a/packages/evals/src/cli/runTask.ts b/packages/evals/src/cli/runTask.ts index d7f37e72a1f..a6ae6c03059 100644 --- a/packages/evals/src/cli/runTask.ts +++ b/packages/evals/src/cli/runTask.ts @@ -32,6 +32,7 @@ import { EVALS_REPO_PATH } from "../exercises/index.js" import { Logger, getTag, isDockerContainer } from "./utils.js" import { redisClient, getPubSubKey, registerRunner, deregisterRunner } from "./redis.js" import { runUnitTest } from "./runUnitTest.js" +import { MessageLogDeduper } from "./messageLogDeduper.js" class SubprocessTimeoutError extends Error { constructor(timeout: number) { @@ -305,6 +306,7 @@ export const runTask = async ({ run, task, publish, logger, jobToken }: RunTaskO ] let isApiUnstable = false + const messageLogDeduper = new MessageLogDeduper() client.on(IpcMessageType.TaskEvent, async (taskEvent) => { const { eventName, payload } = taskEvent @@ -330,6 +332,15 @@ export const runTask = async ({ run, task, publish, logger, jobToken }: RunTaskO (payload[0].message.say && loggableSays.includes(payload[0].message.say)) || payload[0].message.partial !== true) ) { + // Dedupe identical repeated message events (same message.ts + same payload) + if (eventName === RooCodeEventName.Message) { + const action = payload[0]?.action as string | undefined + const message = payload[0]?.message + if (!messageLogDeduper.shouldLog(action, message)) { + return + } + } + // Extract tool name for tool-related messages for clearer logging let logEventName: string = eventName if (eventName === RooCodeEventName.Message && payload[0]?.message?.ask === "tool") {