Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
102 changes: 102 additions & 0 deletions assistant/src/__tests__/conversation-surfaces-app-control.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -49,6 +49,9 @@ type SurfaceConversationContext =
function buildMockContext(
hostAppControlProxy?: InstanceType<typeof HostAppControlProxy>,
conversationId = "test-session",
setHostAppControlProxy?: (
proxy: InstanceType<typeof HostAppControlProxy> | undefined,
) => void,
): SurfaceConversationContext {
return {
conversationId,
Expand All @@ -62,6 +65,7 @@ function buildMockContext(
surfaceActionRequestIds: new Set(),
currentTurnSurfaces: [],
hostAppControlProxy,
setHostAppControlProxy,
isProcessing: () => false,
enqueueMessage: () => ({ queued: false, requestId: "r1" }),
getQueueDepth: () => 0,
Expand Down Expand Up @@ -210,5 +214,103 @@ describe("surfaceProxyResolver — app-control tool routing", () => {
// No envelope dispatched for the local short-circuit.
expect(sentMessages).toHaveLength(0);
});

test("clears the conversation reference via setHostAppControlProxy(undefined) when the setter is provided", async () => {
const proxy = new HostAppControlProxy("conv-1");

// Capture how the resolver clears the proxy reference. The setter
// mirrors Conversation.setHostAppControlProxy: dispose the existing
// proxy when transitioning to undefined.
const setterCalls: Array<unknown> = [];
let attached: InstanceType<typeof HostAppControlProxy> | undefined =
proxy;
const setter = (
next: InstanceType<typeof HostAppControlProxy> | undefined,
) => {
setterCalls.push(next);
if (attached && attached !== next) attached.dispose();
attached = next;
};

const ctx = buildMockContext(proxy, "conv-1", setter);

const result = await surfaceProxyResolver(ctx, "app_control_stop", {
tool: "stop",
});

expect(result.isError).toBe(false);
// The resolver invoked the setter with undefined exactly once.
expect(setterCalls).toEqual([undefined]);
expect(attached).toBeUndefined();
});
});

// -------------------------------------------------------------------------
// Discriminator injection (Gap A)
// -------------------------------------------------------------------------

describe("tool discriminator injection", () => {
test("injects `tool` derived from toolName when the agent input omits it", async () => {
const proxy = new HostAppControlProxy("conv-1");
const ctx = buildMockContext(proxy, "conv-1");

// Agent inputs do not carry the discriminator — the resolver has to
// synthesize it from `toolName` ("app_control_observe" → "observe")
// before forwarding to the proxy / desktop client.
const resultPromise = surfaceProxyResolver(ctx, "app_control_observe", {
app: "com.example.editor",
});

expect(sentMessages).toHaveLength(1);
const sent = sentMessages[0] as Record<string, unknown>;
expect(sent.input).toEqual({
tool: "observe",
app: "com.example.editor",
});

const requestId = sent.requestId as string;
proxy.resolve(requestId, {
requestId: "ignored-by-proxy",
state: "running",
});
await resultPromise;

proxy.dispose();
});

test('injects `tool: "start"` so the singleton-lock guard fires', async () => {
// Establish a lock owned by conv-other.
const ownerProxy = new HostAppControlProxy("conv-other");
const ownerCtrl = new AbortController();
const ownerPromise = ownerProxy.request(
"app_control_start",
{ tool: "start", app: "com.example.editor" },
"conv-other",
ownerCtrl.signal,
);
const ownerSent = sentMessages[0] as Record<string, unknown>;
ownerProxy.resolve(ownerSent.requestId as string, {
requestId: "ignored-by-proxy",
state: "running",
});
await ownerPromise;
sentMessages.length = 0;

// conv-1 attempts to start without a discriminator in its input. The
// resolver must inject `tool: "start"`, which causes the proxy's
// singleton-lock guard to fire and reject without dispatching.
const proxy = new HostAppControlProxy("conv-1");
const ctx = buildMockContext(proxy, "conv-1");
const result = await surfaceProxyResolver(ctx, "app_control_start", {
app: "com.example.editor",
});

expect(result.isError).toBe(true);
expect(result.content).toContain("conv-other");
expect(sentMessages).toHaveLength(0); // No envelope dispatched.

proxy.dispose();
ownerProxy.dispose();
});
});
});
47 changes: 7 additions & 40 deletions assistant/src/__tests__/host-app-control-proxy.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -302,9 +302,9 @@ describe("HostAppControlProxy", () => {
expect(r0.content).not.toContain("WARNING");
expect(proxy.observationRepeatCount).toBe(0);

// 4 additional identical observations bring the repeat count to 4
// still below the threshold (5).
for (let i = 0; i < 4; i++) {
// 3 additional identical observations bring the repeat count to 3
// still below the threshold (4).
for (let i = 0; i < 3; i++) {
const p = proxy.request(
"app_control_observe",
{ tool: "observe", app: "com.example.editor" },
Expand All @@ -316,24 +316,24 @@ describe("HostAppControlProxy", () => {
const r = await p;
expect(r.content).not.toContain("WARNING");
}
expect(proxy.observationRepeatCount).toBe(4);
expect(proxy.observationRepeatCount).toBe(3);

// 5th identical observation — count reaches 5, warning fires.
// 5th total identical observation — count reaches 4, warning fires.
const pFinal = proxy.request(
"app_control_observe",
{ tool: "observe", app: "com.example.editor" },
"conv-1",
ctrl.signal,
);
const sentFinal = sentMessages[5] as Record<string, unknown>;
const sentFinal = sentMessages[4] as Record<string, unknown>;
proxy.resolve(
sentFinal.requestId as string,
payload({ pngBase64: PNG_A }),
);
const rFinal = await pFinal;
expect(rFinal.content).toContain("WARNING");
expect(rFinal.content.toLowerCase()).toContain("stuck");
expect(proxy.observationRepeatCount).toBe(5);
expect(proxy.observationRepeatCount).toBe(4);

proxy.dispose();
});
Expand Down Expand Up @@ -579,39 +579,6 @@ describe("HostAppControlProxy", () => {
});
});

// -------------------------------------------------------------------------
// Action history bounding
// -------------------------------------------------------------------------

describe("action history", () => {
test("keeps only the most recent 5 entries", async () => {
const proxy = new HostAppControlProxy("conv-1");
const ctrl = new AbortController();

for (let i = 0; i < 8; i++) {
const p = proxy.request(
"app_control_press",
{ tool: "press", app: "com.example.editor", key: `k${i}` },
"conv-1",
ctrl.signal,
);
const sent = sentMessages[i] as Record<string, unknown>;
proxy.resolve(
sent.requestId as string,
payload({ pngBase64: `P${i}` }),
);
await p;
}

expect(proxy.actionHistory).toHaveLength(5);
// Oldest retained entry should fingerprint key "k3".
expect(proxy.actionHistory[0]).toContain('"key":"k3"');
expect(proxy.actionHistory[4]).toContain('"key":"k7"');

proxy.dispose();
});
});

// -------------------------------------------------------------------------
// isAvailable
// -------------------------------------------------------------------------
Expand Down
35 changes: 32 additions & 3 deletions assistant/src/daemon/conversation-surfaces.ts
Comment thread
siddseethepalli marked this conversation as resolved.
Original file line number Diff line number Diff line change
Expand Up @@ -324,6 +324,13 @@ export interface SurfaceConversationContext {
hostCuProxy?: HostCuProxy;
/** Optional proxy for delegating per-app app-control actions to a connected desktop client. */
hostAppControlProxy?: HostAppControlProxy;
/**
* Setter that lets the resolver detach the conversation's app-control proxy
* after `app_control_stop`. Disposes the existing proxy when transitioning
* to undefined so subsequent tool calls cleanly fail with "unavailable"
* rather than dispatching to a torn-down proxy.
*/
setHostAppControlProxy?(proxy: HostAppControlProxy | undefined): void;
/** True when no interactive client is connected (headless / channel-only). */
readonly hasNoClient?: boolean;
isProcessing(): boolean;
Expand Down Expand Up @@ -1796,15 +1803,37 @@ export async function surfaceProxyResolver(

// `app_control_stop` resolves immediately: tear down the proxy without
// a client round-trip. Mirrors CU's terminal-tool short-circuit
// (`computer_use_done` / `computer_use_respond`).
// (`computer_use_done` / `computer_use_respond`). Clear the
// conversation's reference (setter disposes the existing proxy) so a
// later `app_control_observe`/etc. cleanly fails with "unavailable"
// instead of dispatching against a torn-down proxy, and so a sibling
// conversation can acquire the released singleton lock without the
// disposed proxy still being addressable.
if (toolName === "app_control_stop") {
ctx.hostAppControlProxy.dispose();
if (ctx.setHostAppControlProxy) {
ctx.setHostAppControlProxy(undefined);
} else {
ctx.hostAppControlProxy.dispose();
}
return { content: "App control stopped.", isError: false };
}

// The TS `HostAppControlInput` (and the Swift mirror) is a discriminated
// union on `tool` ("start" | "observe" | "press" | …). The agent's raw
// tool input only carries the action-specific payload (app, x/y, text,
// …) — the discriminator is implied by `toolName` (`app_control_<tool>`).
// Inject it here so the proxy's singleton-lock guard (`input.tool ===
// "start"`) and the Swift client's discriminated-union decoder both see
// the field they require.
const tool = toolName.slice("app_control_".length);
const inputWithTool = {
...input,
tool,
} as unknown as HostAppControlInput;

return ctx.hostAppControlProxy.request(
toolName,
input as unknown as HostAppControlInput,
inputWithTool,
ctx.conversationId,
signal ?? new AbortController().signal,
);
Expand Down
34 changes: 7 additions & 27 deletions assistant/src/daemon/host-app-control-proxy.ts
Original file line number Diff line number Diff line change
Expand Up @@ -8,8 +8,8 @@
*
* Lifecycle (pending map, timeout, abort SSE, dispose, isAvailable) lives
* in {@link HostProxyBase}; this class layers app-control-specific state
* (active app, PNG-hash loop guard, action history) and the result-payload
* ToolExecutionResult translation on top.
* (active app, PNG-hash loop guard) and the result-payload
* ToolExecutionResult translation on top.
*
* **Singleton lock.** Only one conversation may hold an active app-control
* session at a time. The lock is module-level (`activeAppControlConversationId`)
Expand Down Expand Up @@ -44,8 +44,11 @@ const log = getLogger("host-app-control-proxy");
// ---------------------------------------------------------------------------

const REQUEST_TIMEOUT_MS = 60 * 1000;
const ACTION_HISTORY_LIMIT = 5;
const STUCK_REPEAT_THRESHOLD = 5;
// Threshold of 4 means the warning fires on the 5th identical observation:
// the first observation establishes the baseline (count = 0), each
// subsequent identical observation increments the counter, so count = 4 is
// reached on the 5th total observation.
const STUCK_REPEAT_THRESHOLD = 4;

// ---------------------------------------------------------------------------
// Tool name constants
Expand Down Expand Up @@ -114,9 +117,6 @@ export class HostAppControlProxy extends HostProxyBase<
*/
private observationHashRepeatCount = 0;

/** Ring buffer of the last {@link ACTION_HISTORY_LIMIT} tool+input fingerprints (FIFO). */
private _actionHistory: string[] = [];

constructor(conversationId: string) {
super({
capabilityName: "host_app_control",
Expand All @@ -141,10 +141,6 @@ export class HostAppControlProxy extends HostProxyBase<
return this.observationHashRepeatCount;
}

get actionHistory(): readonly string[] {
return this._actionHistory;
}

// ---------------------------------------------------------------------------
// Public request entry point
// ---------------------------------------------------------------------------
Expand Down Expand Up @@ -181,10 +177,6 @@ export class HostAppControlProxy extends HostProxyBase<
}
}

// Record the action fingerprint up-front so it shows up in history
// even if the request times out / aborts.
this.recordActionFingerprint(toolName, input);

try {
const payload = await this.dispatchRequest(
toolName,
Expand Down Expand Up @@ -311,18 +303,6 @@ export class HostAppControlProxy extends HostProxyBase<
};
}

/** Append `<toolName>:<JSON(input)>` to the bounded action history. */
private recordActionFingerprint(
toolName: string,
input: HostAppControlInput,
): void {
const fingerprint = `${toolName}:${JSON.stringify(input)}`;
this._actionHistory.push(fingerprint);
if (this._actionHistory.length > ACTION_HISTORY_LIMIT) {
this._actionHistory = this._actionHistory.slice(-ACTION_HISTORY_LIMIT);
}
}

// ---------------------------------------------------------------------------
// Lifecycle
// ---------------------------------------------------------------------------
Expand Down
Loading