diff --git a/assistant/src/__tests__/conversation-surfaces-app-control.test.ts b/assistant/src/__tests__/conversation-surfaces-app-control.test.ts index 4530f78aec..696f3d2e3c 100644 --- a/assistant/src/__tests__/conversation-surfaces-app-control.test.ts +++ b/assistant/src/__tests__/conversation-surfaces-app-control.test.ts @@ -49,6 +49,9 @@ type SurfaceConversationContext = function buildMockContext( hostAppControlProxy?: InstanceType, conversationId = "test-session", + setHostAppControlProxy?: ( + proxy: InstanceType | undefined, + ) => void, ): SurfaceConversationContext { return { conversationId, @@ -62,6 +65,7 @@ function buildMockContext( surfaceActionRequestIds: new Set(), currentTurnSurfaces: [], hostAppControlProxy, + setHostAppControlProxy, isProcessing: () => false, enqueueMessage: () => ({ queued: false, requestId: "r1" }), getQueueDepth: () => 0, @@ -210,5 +214,103 @@ describe("surfaceProxyResolver — app-control tool routing", () => { // No envelope dispatched for the local short-circuit. expect(sentMessages).toHaveLength(0); }); + + test("clears the conversation reference via setHostAppControlProxy(undefined) when the setter is provided", async () => { + const proxy = new HostAppControlProxy("conv-1"); + + // Capture how the resolver clears the proxy reference. The setter + // mirrors Conversation.setHostAppControlProxy: dispose the existing + // proxy when transitioning to undefined. + const setterCalls: Array = []; + let attached: InstanceType | undefined = + proxy; + const setter = ( + next: InstanceType | undefined, + ) => { + setterCalls.push(next); + if (attached && attached !== next) attached.dispose(); + attached = next; + }; + + const ctx = buildMockContext(proxy, "conv-1", setter); + + const result = await surfaceProxyResolver(ctx, "app_control_stop", { + tool: "stop", + }); + + expect(result.isError).toBe(false); + // The resolver invoked the setter with undefined exactly once. + expect(setterCalls).toEqual([undefined]); + expect(attached).toBeUndefined(); + }); + }); + + // ------------------------------------------------------------------------- + // Discriminator injection (Gap A) + // ------------------------------------------------------------------------- + + describe("tool discriminator injection", () => { + test("injects `tool` derived from toolName when the agent input omits it", async () => { + const proxy = new HostAppControlProxy("conv-1"); + const ctx = buildMockContext(proxy, "conv-1"); + + // Agent inputs do not carry the discriminator — the resolver has to + // synthesize it from `toolName` ("app_control_observe" → "observe") + // before forwarding to the proxy / desktop client. + const resultPromise = surfaceProxyResolver(ctx, "app_control_observe", { + app: "com.example.editor", + }); + + expect(sentMessages).toHaveLength(1); + const sent = sentMessages[0] as Record; + expect(sent.input).toEqual({ + tool: "observe", + app: "com.example.editor", + }); + + const requestId = sent.requestId as string; + proxy.resolve(requestId, { + requestId: "ignored-by-proxy", + state: "running", + }); + await resultPromise; + + proxy.dispose(); + }); + + test('injects `tool: "start"` so the singleton-lock guard fires', async () => { + // Establish a lock owned by conv-other. + const ownerProxy = new HostAppControlProxy("conv-other"); + const ownerCtrl = new AbortController(); + const ownerPromise = ownerProxy.request( + "app_control_start", + { tool: "start", app: "com.example.editor" }, + "conv-other", + ownerCtrl.signal, + ); + const ownerSent = sentMessages[0] as Record; + ownerProxy.resolve(ownerSent.requestId as string, { + requestId: "ignored-by-proxy", + state: "running", + }); + await ownerPromise; + sentMessages.length = 0; + + // conv-1 attempts to start without a discriminator in its input. The + // resolver must inject `tool: "start"`, which causes the proxy's + // singleton-lock guard to fire and reject without dispatching. + const proxy = new HostAppControlProxy("conv-1"); + const ctx = buildMockContext(proxy, "conv-1"); + const result = await surfaceProxyResolver(ctx, "app_control_start", { + app: "com.example.editor", + }); + + expect(result.isError).toBe(true); + expect(result.content).toContain("conv-other"); + expect(sentMessages).toHaveLength(0); // No envelope dispatched. + + proxy.dispose(); + ownerProxy.dispose(); + }); }); }); diff --git a/assistant/src/__tests__/host-app-control-proxy.test.ts b/assistant/src/__tests__/host-app-control-proxy.test.ts index b8ed338f35..0165a9e8c9 100644 --- a/assistant/src/__tests__/host-app-control-proxy.test.ts +++ b/assistant/src/__tests__/host-app-control-proxy.test.ts @@ -302,9 +302,9 @@ describe("HostAppControlProxy", () => { expect(r0.content).not.toContain("WARNING"); expect(proxy.observationRepeatCount).toBe(0); - // 4 additional identical observations bring the repeat count to 4 — - // still below the threshold (5). - for (let i = 0; i < 4; i++) { + // 3 additional identical observations bring the repeat count to 3 — + // still below the threshold (4). + for (let i = 0; i < 3; i++) { const p = proxy.request( "app_control_observe", { tool: "observe", app: "com.example.editor" }, @@ -316,16 +316,16 @@ describe("HostAppControlProxy", () => { const r = await p; expect(r.content).not.toContain("WARNING"); } - expect(proxy.observationRepeatCount).toBe(4); + expect(proxy.observationRepeatCount).toBe(3); - // 5th identical observation — count reaches 5, warning fires. + // 5th total identical observation — count reaches 4, warning fires. const pFinal = proxy.request( "app_control_observe", { tool: "observe", app: "com.example.editor" }, "conv-1", ctrl.signal, ); - const sentFinal = sentMessages[5] as Record; + const sentFinal = sentMessages[4] as Record; proxy.resolve( sentFinal.requestId as string, payload({ pngBase64: PNG_A }), @@ -333,7 +333,7 @@ describe("HostAppControlProxy", () => { const rFinal = await pFinal; expect(rFinal.content).toContain("WARNING"); expect(rFinal.content.toLowerCase()).toContain("stuck"); - expect(proxy.observationRepeatCount).toBe(5); + expect(proxy.observationRepeatCount).toBe(4); proxy.dispose(); }); @@ -579,39 +579,6 @@ describe("HostAppControlProxy", () => { }); }); - // ------------------------------------------------------------------------- - // Action history bounding - // ------------------------------------------------------------------------- - - describe("action history", () => { - test("keeps only the most recent 5 entries", async () => { - const proxy = new HostAppControlProxy("conv-1"); - const ctrl = new AbortController(); - - for (let i = 0; i < 8; i++) { - const p = proxy.request( - "app_control_press", - { tool: "press", app: "com.example.editor", key: `k${i}` }, - "conv-1", - ctrl.signal, - ); - const sent = sentMessages[i] as Record; - proxy.resolve( - sent.requestId as string, - payload({ pngBase64: `P${i}` }), - ); - await p; - } - - expect(proxy.actionHistory).toHaveLength(5); - // Oldest retained entry should fingerprint key "k3". - expect(proxy.actionHistory[0]).toContain('"key":"k3"'); - expect(proxy.actionHistory[4]).toContain('"key":"k7"'); - - proxy.dispose(); - }); - }); - // ------------------------------------------------------------------------- // isAvailable // ------------------------------------------------------------------------- diff --git a/assistant/src/daemon/conversation-surfaces.ts b/assistant/src/daemon/conversation-surfaces.ts index e566aa5046..6df0eaa781 100644 --- a/assistant/src/daemon/conversation-surfaces.ts +++ b/assistant/src/daemon/conversation-surfaces.ts @@ -324,6 +324,13 @@ export interface SurfaceConversationContext { hostCuProxy?: HostCuProxy; /** Optional proxy for delegating per-app app-control actions to a connected desktop client. */ hostAppControlProxy?: HostAppControlProxy; + /** + * Setter that lets the resolver detach the conversation's app-control proxy + * after `app_control_stop`. Disposes the existing proxy when transitioning + * to undefined so subsequent tool calls cleanly fail with "unavailable" + * rather than dispatching to a torn-down proxy. + */ + setHostAppControlProxy?(proxy: HostAppControlProxy | undefined): void; /** True when no interactive client is connected (headless / channel-only). */ readonly hasNoClient?: boolean; isProcessing(): boolean; @@ -1796,15 +1803,37 @@ export async function surfaceProxyResolver( // `app_control_stop` resolves immediately: tear down the proxy without // a client round-trip. Mirrors CU's terminal-tool short-circuit - // (`computer_use_done` / `computer_use_respond`). + // (`computer_use_done` / `computer_use_respond`). Clear the + // conversation's reference (setter disposes the existing proxy) so a + // later `app_control_observe`/etc. cleanly fails with "unavailable" + // instead of dispatching against a torn-down proxy, and so a sibling + // conversation can acquire the released singleton lock without the + // disposed proxy still being addressable. if (toolName === "app_control_stop") { - ctx.hostAppControlProxy.dispose(); + if (ctx.setHostAppControlProxy) { + ctx.setHostAppControlProxy(undefined); + } else { + ctx.hostAppControlProxy.dispose(); + } return { content: "App control stopped.", isError: false }; } + // The TS `HostAppControlInput` (and the Swift mirror) is a discriminated + // union on `tool` ("start" | "observe" | "press" | …). The agent's raw + // tool input only carries the action-specific payload (app, x/y, text, + // …) — the discriminator is implied by `toolName` (`app_control_`). + // Inject it here so the proxy's singleton-lock guard (`input.tool === + // "start"`) and the Swift client's discriminated-union decoder both see + // the field they require. + const tool = toolName.slice("app_control_".length); + const inputWithTool = { + ...input, + tool, + } as unknown as HostAppControlInput; + return ctx.hostAppControlProxy.request( toolName, - input as unknown as HostAppControlInput, + inputWithTool, ctx.conversationId, signal ?? new AbortController().signal, ); diff --git a/assistant/src/daemon/host-app-control-proxy.ts b/assistant/src/daemon/host-app-control-proxy.ts index 74e4fc3bbe..ac80cae4d1 100644 --- a/assistant/src/daemon/host-app-control-proxy.ts +++ b/assistant/src/daemon/host-app-control-proxy.ts @@ -8,8 +8,8 @@ * * Lifecycle (pending map, timeout, abort SSE, dispose, isAvailable) lives * in {@link HostProxyBase}; this class layers app-control-specific state - * (active app, PNG-hash loop guard, action history) and the result-payload - * → ToolExecutionResult translation on top. + * (active app, PNG-hash loop guard) and the result-payload → + * ToolExecutionResult translation on top. * * **Singleton lock.** Only one conversation may hold an active app-control * session at a time. The lock is module-level (`activeAppControlConversationId`) @@ -44,8 +44,11 @@ const log = getLogger("host-app-control-proxy"); // --------------------------------------------------------------------------- const REQUEST_TIMEOUT_MS = 60 * 1000; -const ACTION_HISTORY_LIMIT = 5; -const STUCK_REPEAT_THRESHOLD = 5; +// Threshold of 4 means the warning fires on the 5th identical observation: +// the first observation establishes the baseline (count = 0), each +// subsequent identical observation increments the counter, so count = 4 is +// reached on the 5th total observation. +const STUCK_REPEAT_THRESHOLD = 4; // --------------------------------------------------------------------------- // Tool name constants @@ -114,9 +117,6 @@ export class HostAppControlProxy extends HostProxyBase< */ private observationHashRepeatCount = 0; - /** Ring buffer of the last {@link ACTION_HISTORY_LIMIT} tool+input fingerprints (FIFO). */ - private _actionHistory: string[] = []; - constructor(conversationId: string) { super({ capabilityName: "host_app_control", @@ -141,10 +141,6 @@ export class HostAppControlProxy extends HostProxyBase< return this.observationHashRepeatCount; } - get actionHistory(): readonly string[] { - return this._actionHistory; - } - // --------------------------------------------------------------------------- // Public request entry point // --------------------------------------------------------------------------- @@ -181,10 +177,6 @@ export class HostAppControlProxy extends HostProxyBase< } } - // Record the action fingerprint up-front so it shows up in history - // even if the request times out / aborts. - this.recordActionFingerprint(toolName, input); - try { const payload = await this.dispatchRequest( toolName, @@ -311,18 +303,6 @@ export class HostAppControlProxy extends HostProxyBase< }; } - /** Append `:` to the bounded action history. */ - private recordActionFingerprint( - toolName: string, - input: HostAppControlInput, - ): void { - const fingerprint = `${toolName}:${JSON.stringify(input)}`; - this._actionHistory.push(fingerprint); - if (this._actionHistory.length > ACTION_HISTORY_LIMIT) { - this._actionHistory = this._actionHistory.slice(-ACTION_HISTORY_LIMIT); - } - } - // --------------------------------------------------------------------------- // Lifecycle // ---------------------------------------------------------------------------