diff --git a/apps/web/src/domains/chat/inspector/compaction-trail-api.ts b/apps/web/src/domains/chat/inspector/compaction-trail-api.ts index c9be8423008..e0e3b490fd3 100644 --- a/apps/web/src/domains/chat/inspector/compaction-trail-api.ts +++ b/apps/web/src/domains/chat/inspector/compaction-trail-api.ts @@ -2,10 +2,12 @@ * React Query hook for the Compaction tab. * * **Call-scoped.** The hook fetches the set of compaction events that - * led up to a specific LLM call — not the entire conversation. Picking - * a different call in the rail produces a different trail (cache key - * varies on `callId`), so the question "what happened to the context - * before this call ran?" gets a focused answer. + * ran in the open window between the previous non-`compactionAgent` + * LLM call and the selected call — not the entire conversation. + * Picking a different call in the rail produces a different trail + * (cache key varies on `callId`), so the question "what did the + * compactor do to my context before *this specific* call ran?" gets a + * focused answer. * * Lazy-load contract: the underlying `queryFn` only fires when the * tab is mounted (i.e. selected). Callers should not invoke this from @@ -14,26 +16,17 @@ * never triggers the fetch. `staleTime` matches the rest of the * inspector hooks (30s) so re-selecting the tab inside that window * serves from cache without a re-fetch. - * - * Today the `queryFn` resolves to `fetchCompactionTrailMock`. When - * the daemon ships a real route, swap the import — the response shape - * is pinned by `CompactionTrailResponse` in `compaction-trail-types.ts`. */ import { queryOptions, useQuery } from "@tanstack/react-query"; -import { fetchCompactionTrailMock } from "./compaction-trail-mock"; import type { CompactionTrailResponse } from "./compaction-trail-types"; +import { + CompactionTrailRequestError, + fetchCompactionTrail, +} from "./compaction-trail-fetch"; -export class CompactionTrailRequestError extends Error { - status: number; - - constructor(status: number, message: string) { - super(message); - this.name = "CompactionTrailRequestError"; - this.status = status; - } -} +export { CompactionTrailRequestError }; export function compactionTrailQueryOptions( assistantId: string | undefined, @@ -60,13 +53,12 @@ export function compactionTrailQueryOptions( if (!callId) { throw new CompactionTrailRequestError(0, "Missing callId"); } - // TODO: replace with real daemon fetch once the route exists: - // GET /v1/assistants/{assistantId}/conversations/{conversationId}/compaction - // ?callId={callId} - // Returns the same `CompactionTrailResponse` shape this mock does - // — see `compaction-trail-types.ts`. The daemon scopes the result - // server-side to compactions that happened before the call ran. - return await fetchCompactionTrailMock(conversationId, callId, signal); + return await fetchCompactionTrail( + assistantId, + conversationId, + callId, + signal, + ); }, enabled, staleTime: 30_000, diff --git a/apps/web/src/domains/chat/inspector/compaction-trail-fetch.test.ts b/apps/web/src/domains/chat/inspector/compaction-trail-fetch.test.ts new file mode 100644 index 00000000000..e322c86584c --- /dev/null +++ b/apps/web/src/domains/chat/inspector/compaction-trail-fetch.test.ts @@ -0,0 +1,168 @@ +/** + * Tests for the Compaction Trail real fetcher. + * + * Spies on `client.get` rather than `mock.module`-ing the whole SDK, + * matching the pattern in `apps/web/src/domains/chat/api/messages.test.ts` + * — keeps the module registry clean for sibling test files. + * + * What's pinned: + * - URL pattern + path params + `callId` query reach the SDK + * exactly (the daemon route is hand-rolled, not generated, so + * drift here would silently 404). + * - The abort signal is forwarded so React Query can cancel + * in-flight requests when the tab unmounts. + * - HTTP failures raise `CompactionTrailRequestError` with the + * status code — the Compaction tab branches on `error.status`. + * - Malformed payloads raise the same error type with status `0` + * rather than silently returning an `events: []` trail. + */ + +import { + afterEach, + beforeEach, + describe, + expect, + mock, + test, +} from "bun:test"; + +import { client } from "@/domains/chat/api/client"; + +import { + CompactionTrailRequestError, + fetchCompactionTrail, +} from "./compaction-trail-fetch"; +import type { CompactionTrailResponse } from "./compaction-trail-types"; + +type CapturedGetOptions = { + url: string; + path?: Record; + query?: Record; + signal?: AbortSignal; +}; + +let captured: CapturedGetOptions | null = null; +let nextGetResult: { data: unknown; error: unknown; response: Response }; +const originalGet = client.get; + +const SAMPLE_RESPONSE: CompactionTrailResponse = { + conversationId: "conv-abc", + events: [ + { + id: "compaction-1", + createdAt: Date.parse("2026-05-26T22:19:11Z"), + model: "claude-sonnet-4-5", + provider: "anthropic", + inputTokens: 184_000, + outputTokens: 4_800, + durationMs: null, + responsePreview: "Picked up the New Conversation 404 Bug thread.", + requestMessageCount: 130, + stopReason: "end_turn", + estimatedCostUsd: 0.62, + }, + ], +}; + +beforeEach(() => { + captured = null; + nextGetResult = { + data: SAMPLE_RESPONSE, + error: null, + response: new Response(null, { status: 200 }), + }; + client.get = mock(async (options: CapturedGetOptions) => { + captured = options; + return nextGetResult; + }) as typeof client.get; +}); + +afterEach(() => { + client.get = originalGet; +}); + +describe("fetchCompactionTrail", () => { + test("calls the assistant route with the platform path + query params", async () => { + await fetchCompactionTrail( + "assistant-1", + "conv-abc", + "call-32", + undefined, + ); + + expect(captured).not.toBeNull(); + expect(captured!.url).toBe( + "/v1/assistants/{assistant_id}/conversations/{conversation_id}/compaction", + ); + expect(captured!.path).toEqual({ + assistant_id: "assistant-1", + conversation_id: "conv-abc", + }); + expect(captured!.query).toEqual({ callId: "call-32" }); + }); + + test("forwards the abort signal so React Query can cancel", async () => { + const controller = new AbortController(); + await fetchCompactionTrail( + "assistant-1", + "conv-abc", + "call-32", + controller.signal, + ); + expect(captured!.signal).toBe(controller.signal); + }); + + test("resolves with the response body on a 200", async () => { + const result = await fetchCompactionTrail( + "assistant-1", + "conv-abc", + "call-32", + undefined, + ); + expect(result.conversationId).toBe("conv-abc"); + expect(result.events).toHaveLength(1); + expect(result.events[0].id).toBe("compaction-1"); + }); + + test("throws CompactionTrailRequestError with the HTTP status on non-OK", async () => { + nextGetResult = { + data: null, + error: { detail: "not found" }, + response: new Response(null, { status: 404 }), + }; + + try { + await fetchCompactionTrail( + "assistant-1", + "conv-abc", + "call-32", + undefined, + ); + throw new Error("expected fetchCompactionTrail to throw"); + } catch (err) { + expect(err).toBeInstanceOf(CompactionTrailRequestError); + expect((err as CompactionTrailRequestError).status).toBe(404); + } + }); + + test("throws CompactionTrailRequestError(0) when the body is malformed", async () => { + nextGetResult = { + data: { conversationId: "conv-abc" }, // missing `events` + error: null, + response: new Response(null, { status: 200 }), + }; + + try { + await fetchCompactionTrail( + "assistant-1", + "conv-abc", + "call-32", + undefined, + ); + throw new Error("expected fetchCompactionTrail to throw"); + } catch (err) { + expect(err).toBeInstanceOf(CompactionTrailRequestError); + expect((err as CompactionTrailRequestError).status).toBe(0); + } + }); +}); diff --git a/apps/web/src/domains/chat/inspector/compaction-trail-fetch.ts b/apps/web/src/domains/chat/inspector/compaction-trail-fetch.ts new file mode 100644 index 00000000000..94fba8e01df --- /dev/null +++ b/apps/web/src/domains/chat/inspector/compaction-trail-fetch.ts @@ -0,0 +1,81 @@ +/** + * Real fetcher for the Compaction tab. + * + * Talks to the assistant's per-conversation route at + * `GET /v1/assistants/{assistantId}/conversations/{conversationId}/compaction?callId=…`, + * routed via the platform's `RuntimeProxyWildcardView`. Handler: + * `assistant/src/runtime/routes/conversation-compaction-routes.ts`. + * + * The assistant scopes the result **server-side** to the open window + * between the previous non-`compactionAgent` LLM call and the call + * identified by `callId` — picking a different call in the rail + * produces a different trail. See the route's doc comment for the + * floor/ceiling semantics. + * + * No generated SDK function exists for this route yet (the + * OpenAPI regen hasn't picked it up). We call `client.get` directly + * with the URL pattern + path/query params, matching sibling + * inspector hand-rolled fetchers (`fetchConversationMessages`, + * `archiveConversation`). + */ + +import { client, SDK_BASE_OPTIONS } from "@/domains/chat/api/client"; +import { assertHasResponse } from "@/lib/api-errors"; + +import type { CompactionTrailResponse } from "./compaction-trail-types"; + +export class CompactionTrailRequestError extends Error { + status: number; + + constructor(status: number, message: string) { + super(message); + this.name = "CompactionTrailRequestError"; + this.status = status; + } +} + +/** + * Type guard for the wire shape returned by the assistant route. The + * `client.get` call is typed but `data` is still `unknown` on the wire + * — narrow defensively rather than trusting the generic. + */ +function isCompactionTrailResponse( + value: unknown, +): value is CompactionTrailResponse { + if (!value || typeof value !== "object") return false; + const v = value as Record; + return typeof v.conversationId === "string" && Array.isArray(v.events); +} + +export async function fetchCompactionTrail( + assistantId: string, + conversationId: string, + callId: string, + signal: AbortSignal | undefined, +): Promise { + const { data, error, response } = await client.get< + CompactionTrailResponse, + unknown + >({ + ...SDK_BASE_OPTIONS, + url: "/v1/assistants/{assistant_id}/conversations/{conversation_id}/compaction", + path: { assistant_id: assistantId, conversation_id: conversationId }, + query: { callId }, + signal, + throwOnError: false, + }); + assertHasResponse(response, error, "Failed to fetch compaction trail"); + if (!response.ok) { + throw new CompactionTrailRequestError( + response.status, + `Compaction trail request failed (HTTP ${response.status})`, + ); + } + if (!isCompactionTrailResponse(data)) { + throw new CompactionTrailRequestError( + 0, + "Compaction trail response was malformed", + ); + } + return data; +} diff --git a/apps/web/src/domains/chat/inspector/compaction-trail-mock.test.ts b/apps/web/src/domains/chat/inspector/compaction-trail-mock.test.ts deleted file mode 100644 index 7f1889ca800..00000000000 --- a/apps/web/src/domains/chat/inspector/compaction-trail-mock.test.ts +++ /dev/null @@ -1,127 +0,0 @@ -/** - * Sanity tests for the compaction-trail mock module. - * - * The mock simulates a real network call (latency + abort signal), - * so the rest of the inspector can develop against a realistic - * loading state. These tests pin: - * - * - The simulated fetch resolves with a chronologically-ordered list. - * - At least one event has a non-`end_turn` stop reason so the UI's - * error-state path is exercised in development. - * - Aborting the signal rejects with an AbortError instead of - * leaving the promise hanging. - * - The mock is **call-scoped**: different callIds yield different - * event counts (deterministically), including the empty case so - * the empty-state UI exercises in dev too. - * - * When the real API ships, this file deletes alongside the mock — - * the API tests take over. - * - * Hard-coded callIds below are precomputed against the deterministic - * hash in `mockEventCountForCallId`. If MOCK_EVENTS length changes, - * regenerate them — there's no looser way to assert "this id yields - * N events" without coupling tests to the hash function. - */ - -import { describe, expect, test } from "bun:test"; - -import { fetchCompactionTrailMock } from "./compaction-trail-mock"; - -// `call-4` hashes to the max bucket (5 events) and `call-3` hashes to -// the empty bucket (0 events) — pinned to the current MOCK_EVENTS -// length of 5. -const FULL_TRAIL_CALL_ID = "call-4"; -const EMPTY_TRAIL_CALL_ID = "call-3"; - -describe("fetchCompactionTrailMock", () => { - test("resolves with a non-empty, chronologically-ordered event list", async () => { - const result = await fetchCompactionTrailMock( - "conv-abc", - FULL_TRAIL_CALL_ID, - undefined, - ); - - expect(result.conversationId).toBe("conv-abc"); - expect(result.events.length).toBeGreaterThan(0); - - for (let i = 1; i < result.events.length; i++) { - const prev = result.events[i - 1]!; - const curr = result.events[i]!; - expect(curr.createdAt).toBeGreaterThanOrEqual(prev.createdAt); - } - }); - - test("includes at least one failure event to exercise the error UI", async () => { - const result = await fetchCompactionTrailMock( - "conv-abc", - FULL_TRAIL_CALL_ID, - undefined, - ); - - const failures = result.events.filter( - (e) => e.stopReason != null && e.stopReason !== "end_turn", - ); - expect(failures.length).toBeGreaterThanOrEqual(1); - }); - - test("rejects with AbortError when the signal aborts mid-fetch", async () => { - const controller = new AbortController(); - const promise = fetchCompactionTrailMock( - "conv-abc", - FULL_TRAIL_CALL_ID, - controller.signal, - ); - // Abort before the simulated latency resolves. - controller.abort(); - - let caught: unknown = null; - try { - await promise; - } catch (err) { - caught = err; - } - expect(caught).toBeInstanceOf(DOMException); - expect((caught as DOMException).name).toBe("AbortError"); - }); - - test("is deterministic for a given callId", async () => { - const a = await fetchCompactionTrailMock( - "conv-abc", - FULL_TRAIL_CALL_ID, - undefined, - ); - const b = await fetchCompactionTrailMock( - "conv-abc", - FULL_TRAIL_CALL_ID, - undefined, - ); - expect(a.events.length).toBe(b.events.length); - expect(a.events.map((e) => e.id)).toEqual(b.events.map((e) => e.id)); - }); - - test("returns an empty trail for callIds that hash to the empty bucket", async () => { - const result = await fetchCompactionTrailMock( - "conv-abc", - EMPTY_TRAIL_CALL_ID, - undefined, - ); - expect(result.events).toEqual([]); - // Empty-state UI must still receive the conversationId so it can - // surface "no compaction ran before this call" without erroring. - expect(result.conversationId).toBe("conv-abc"); - }); - - test("yields different counts for different callIds", async () => { - const full = await fetchCompactionTrailMock( - "conv-abc", - FULL_TRAIL_CALL_ID, - undefined, - ); - const empty = await fetchCompactionTrailMock( - "conv-abc", - EMPTY_TRAIL_CALL_ID, - undefined, - ); - expect(full.events.length).not.toBe(empty.events.length); - }); -}); diff --git a/apps/web/src/domains/chat/inspector/compaction-trail-mock.ts b/apps/web/src/domains/chat/inspector/compaction-trail-mock.ts deleted file mode 100644 index 40d2fba0cf9..00000000000 --- a/apps/web/src/domains/chat/inspector/compaction-trail-mock.ts +++ /dev/null @@ -1,147 +0,0 @@ -/** - * Mock compaction trail data + simulated fetch. - * - * This module exists for one reason: validate the Compaction tab UX - * before we lock the data model. Today the daemon doesn't expose a - * compaction route — when it does, the swap is one import in - * `compaction-trail-api.ts`. - * - * The shape returned here is the **minimal** option from - * `llm_request_logs` filtered by `call_site = "compactionAgent"`. If - * this UI feels thin during review, that's our signal that the new - * `compaction_logs` table (or a structured JSON column on - * `llm_request_logs`) earns its keep. If it feels sufficient, we ship - * the API route against the existing column and save ourselves a - * migration. - * - * **Call-scoped filtering.** The mock derives a deterministic event - * count from the callId (hashed → modulo `MOCK_EVENTS.length + 1`) - * and returns the first N events. The point: selecting different - * calls in the rail produces visibly different trails during dev, - * including the empty case. The real daemon route will scope by - * actual createdAt cutoff — this is just a stand-in so the UI - * exercise feels real. - * - * Mock latency (250ms) simulates the network so the loading state is - * visible in dev. - */ - -import type { - CompactionTrailEvent, - CompactionTrailResponse, -} from "./compaction-trail-types"; - -const MOCK_LATENCY_MS = 250; - -/** - * Fabricated events spanning ~90 minutes of a long-running conversation. - * Token counts roughly match what a real ~190k-context compaction - * produces (180k+ input shrinks to a 3-5k summary). The final event is - * an error so the UI surfaces a failure state out of the box. - */ -const MOCK_EVENTS: CompactionTrailEvent[] = [ - { - id: "compaction-mock-1", - createdAt: Date.parse("2026-05-26T15:32:11Z"), - model: "claude-sonnet-4-5", - provider: "anthropic", - inputTokens: 184_231, - outputTokens: 4_872, - durationMs: 8_412, - responsePreview: - "User is debugging a flaky CI job on the inspector test suite. Established that the flake is timing-related and only reproduces under -p1. Tried adjusting timeouts (no effect) and isolating the offending describe block (narrowed to `aggregateSkillLoads`). Currently investigating whether the aggregator's sort is non-stable when two loads share a timestamp.", - requestMessageCount: 142, - stopReason: "end_turn", - estimatedCostUsd: 0.62, - }, - { - id: "compaction-mock-2", - createdAt: Date.parse("2026-05-26T15:54:38Z"), - model: "claude-sonnet-4-5", - provider: "anthropic", - inputTokens: 178_904, - outputTokens: 3_211, - durationMs: 6_984, - responsePreview: - "Continuing from the previous compaction: pinned the flake to `Array.prototype.sort` stability assumptions across Bun versions. Patched the comparator to break ties on `logId` and the test now passes 100/100 runs.", - requestMessageCount: 96, - stopReason: "end_turn", - estimatedCostUsd: 0.48, - }, - { - id: "compaction-mock-3", - createdAt: Date.parse("2026-05-26T16:18:02Z"), - model: "claude-sonnet-4-5", - provider: "anthropic", - inputTokens: 191_018, - outputTokens: 5_134, - durationMs: 9_211, - responsePreview: - "Pivoted to a new task: add a Compaction Trail tab to the inspector. Reviewed existing tab structure (overview/prompt/response/raw/skills/memory). Decided on a chronological per-conversation timeline with expandable summary excerpts. Mock data is in flight to validate UX before locking the API contract.", - requestMessageCount: 168, - stopReason: "end_turn", - estimatedCostUsd: 0.71, - }, - { - id: "compaction-mock-4", - createdAt: Date.parse("2026-05-26T16:41:47Z"), - model: "claude-sonnet-4-5", - provider: "anthropic", - inputTokens: 187_452, - outputTokens: 0, - durationMs: 14_902, - responsePreview: null, - requestMessageCount: 154, - stopReason: "provider_error", - estimatedCostUsd: 0.0, - }, - { - id: "compaction-mock-5", - createdAt: Date.parse("2026-05-26T16:43:12Z"), - model: "claude-sonnet-4-5", - provider: "anthropic", - inputTokens: 187_452, - outputTokens: 4_408, - durationMs: 7_521, - responsePreview: - "Retry after provider error. Same context as the previous attempt — compactor backed off and re-issued. Summary captures the same conversation state and the assistant resumed from this checkpoint.", - requestMessageCount: 154, - stopReason: "end_turn", - estimatedCostUsd: 0.55, - }, -]; - -export async function fetchCompactionTrailMock( - conversationId: string, - callId: string, - signal: AbortSignal | undefined, -): Promise { - await new Promise((resolve, reject) => { - const timer = setTimeout(resolve, MOCK_LATENCY_MS); - if (signal) { - signal.addEventListener("abort", () => { - clearTimeout(timer); - reject(new DOMException("Aborted", "AbortError")); - }); - } - }); - const eventCount = mockEventCountForCallId(callId); - return { - conversationId, - events: MOCK_EVENTS.slice(0, eventCount), - }; -} - -/** - * Deterministic 0-based count derived from the callId so each call - * in the rail shows a different (but reproducible) trail length. - * Range: `[0, MOCK_EVENTS.length]` inclusive — includes the empty - * case so the empty-state UI exercises in dev too. - */ -function mockEventCountForCallId(callId: string): number { - let hash = 0; - for (let i = 0; i < callId.length; i++) { - hash = (hash * 31 + callId.charCodeAt(i)) | 0; - } - return Math.abs(hash) % (MOCK_EVENTS.length + 1); -} diff --git a/apps/web/src/domains/chat/inspector/components/tabs/compaction-tab.tsx b/apps/web/src/domains/chat/inspector/components/tabs/compaction-tab.tsx index 252382fb904..50a46286c4d 100644 --- a/apps/web/src/domains/chat/inspector/components/tabs/compaction-tab.tsx +++ b/apps/web/src/domains/chat/inspector/components/tabs/compaction-tab.tsx @@ -26,14 +26,16 @@ import type { LLMRequestLogEntry } from "@/domains/chat/types/inspector-types"; * call in the rail busts the query cache key, so the displayed trail * tracks the selection. * - * Today the data comes from `compaction-trail-mock.ts`. The real API - * route (planned: `GET /v1/assistants/:id/conversations/:cid/compaction?callId=…`) - * will return the same `CompactionTrailResponse` shape, projected from - * `llm_request_logs` filtered by `call_site = "compactionAgent"` and - * a createdAt cutoff at the selected call. Whether the existing column - * is sufficient or we need a richer `compaction_logs` table is the - * question this UI is here to answer — review feedback drives that - * data-model decision. + * Data source: `GET /v1/assistants/:id/conversations/:cid/compaction?callId=…`, + * projected from `llm_request_logs` rows where + * `call_site = "compactionAgent"`. The assistant resolves the trail's + * floor server-side to the most recent non-`compactionAgent` call + * before the selected one — so the events here are strictly the + * compactions that ran between the previous outbound call and this + * one, not the conversation's full compaction history. Whether + * `llm_request_logs` is sufficient or we need a richer + * `compaction_logs` table is still open — review feedback against + * this surface drives that data-model decision. */ interface CompactionTabProps { assistantId: string | undefined;