Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
78 changes: 33 additions & 45 deletions .cursor/rules/sdk/docs/request-lifecycle-system.mdc

Large diffs are not rendered by default.

101 changes: 86 additions & 15 deletions .cursor/rules/sdk/request-lifecycle-primitives.mdc

Large diffs are not rendered by default.

30 changes: 30 additions & 0 deletions packages/sdk/client/api/client-request-id.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,30 @@
/**
* UUIDv4 generator for client-side request ids. The Web Crypto API
* ships `crypto.randomUUID` everywhere we run today (Bun, modern Node,
* modern browsers, React Native via the polyfill that the workbench-
* desktop / RN runtime config injects). The fallback exists so the SDK
* never crashes in an exotic JS environment without
* `crypto.randomUUID` β€” `requestId` semantics still hold (uniqueness,
* opaque to the caller), just without the UUIDv4 wire shape.
*
* Shared with the long-running call sites that decorate their promises
* with `requestId`: `completion(...)`, `loadModel(...)`,
* `downloadAsset(...)`. The decorated promise's `op.requestId` and the
* server's registry entry's `requestId` must match β€” generating once
* here keeps that invariant.
*/
export function generateClientRequestId(): string {
const c = (
globalThis as {
crypto?: { randomUUID?: () => string };
}
).crypto;
if (c?.randomUUID) return c.randomUUID();
// Fallback: 128 random bits encoded as a hex string. Distinct enough
// for in-flight cancel targeting; not a wire-spec UUID.
const bytes = new Uint8Array(16);
for (let i = 0; i < bytes.length; i++) {
bytes[i] = Math.floor(Math.random() * 256);
}
return Array.from(bytes, (b) => b.toString(16).padStart(2, "0")).join("");
}
25 changes: 1 addition & 24 deletions packages/sdk/client/api/completion-stream.ts
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@ import {
type ToolInput,
} from "@/utils/tool-helpers";
import { buildFinalFromEvents } from "@/utils/aggregate-events";
import { generateClientRequestId } from "@/client/api/client-request-id";

const logger = getClientLogger();

Expand Down Expand Up @@ -409,27 +410,3 @@ export function completion(params: CompletionParams): CompletionRun {
}
}

/**
* UUIDv4 generator for client-side request ids. The Web Crypto API ships
* `crypto.randomUUID` everywhere we run today (Bun, modern Node, modern
* browsers, React Native via the polyfill that the workbench-desktop /
* RN runtime config injects). The fallback exists so the SDK never
* crashes in an exotic JS environment without `crypto.randomUUID` β€”
* `requestId` semantics still hold (uniqueness, opaque to the caller),
* just without the UUIDv4 wire shape.
*/
function generateClientRequestId(): string {
const c = (
globalThis as {
crypto?: { randomUUID?: () => string };
}
).crypto;
if (c?.randomUUID) return c.randomUUID();
// Fallback: 128 random bits encoded as a hex string. Distinct enough
// for in-flight cancel targeting; not a wire-spec UUID.
const bytes = new Uint8Array(16);
for (let i = 0; i < bytes.length; i++) {
bytes[i] = Math.floor(Math.random() * 256);
}
return Array.from(bytes, (b) => b.toString(16).padStart(2, "0")).join("");
}
29 changes: 26 additions & 3 deletions packages/sdk/client/api/download-asset.ts
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,8 @@ import {
StreamEndedError,
InvalidResponseError,
} from "@/utils/errors-client";
import { decoratePromise } from "@/utils/decorate-promise";
import { generateClientRequestId } from "@/client/api/client-request-id";

export type DownloadAssetOptions = BaseDownloadAssetOptions;

Expand All @@ -25,7 +27,9 @@ export type DownloadAssetOptions = BaseDownloadAssetOptions;
* - onProgress: Optional callback for download progress
* @param rpcOptions - Optional RPC options including per-call profiling configuration
*
* @returns Promise that resolves to the asset ID (either the provided assetSrc or a generated ID)
* @returns Promise that resolves to the asset ID (either the provided assetSrc or a generated ID),
* decorated with a synchronous `requestId` field for use with
* `cancel({ requestId: op.requestId })` before the promise resolves.
*
* @throws {QvacErrorBase} When asset download fails, with details in the error message
* @throws {QvacErrorBase} When streaming ends unexpectedly (only when using onProgress)
Expand All @@ -46,13 +50,32 @@ export type DownloadAssetOptions = BaseDownloadAssetOptions;
* console.log(`Downloaded: ${progress.percentage}%`);
* }
* });
*
* // Targeted cancel by requestId β€” grab the id synchronously, then
* // cancel before the download resolves.
* const op = downloadAsset({ assetSrc: "https://example.com/large.gguf" });
* setTimeout(() => cancel({ requestId: op.requestId }), 1000);
* await op; // rejects with `InferenceCancelledError`
* ```
*/
export async function downloadAsset(
export function downloadAsset(
options: DownloadAssetOptions,
rpcOptions?: RPCOptions,
): Promise<string> & { requestId: string } {
const requestId = generateClientRequestId();
const inner = runDownloadAsset(options, requestId, rpcOptions);
return decoratePromise(inner, { requestId });
}

async function runDownloadAsset(
options: DownloadAssetOptions,
requestId: string,
rpcOptions?: RPCOptions,
): Promise<string> {
const request = downloadAssetOptionsToRequestSchema.parse(options);
const request = downloadAssetOptionsToRequestSchema.parse({
...options,
requestId,
});

if (options.onProgress) {
// Use streaming for progress updates
Expand Down
44 changes: 39 additions & 5 deletions packages/sdk/client/api/load-model.ts
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,8 @@ import {
} from "@/utils/errors-client";
import { assertModelSrcMatchesModelType } from "@/utils/load-model-validation";
import { getClientLogger } from "@/logging";
import { decoratePromise } from "@/utils/decorate-promise";
import { generateClientRequestId } from "@/client/api/client-request-id";

const logger = getClientLogger();

Expand All @@ -46,7 +48,7 @@ const logger = getClientLogger();
export function loadModel<S extends ModelDescriptor>(
options: LoadModelDescriptorParam<S>,
rpcOptions?: RPCOptions,
): Promise<string>;
): Promise<string> & { requestId: string };

/**
* Loads a machine learning model from a local path, remote URL, or Hyperdrive key.
Expand Down Expand Up @@ -143,7 +145,7 @@ export function loadModel<S extends ModelDescriptor>(
export function loadModel(
options: LoadModelOptions,
rpcOptions?: RPCOptions,
): Promise<string>;
): Promise<string> & { requestId: string };

/**
* Loads a custom plugin model (any non-built-in `modelType` string).
Expand All @@ -159,7 +161,7 @@ export function loadModel(
export function loadModel<T extends string>(
options: LoadCustomPluginModelOptions<T>,
rpcOptions?: RPCOptions,
): Promise<string>;
): Promise<string> & { requestId: string };

/**
* Hot-reloads configuration on an already loaded model.
Expand Down Expand Up @@ -196,14 +198,39 @@ export function loadModel<T extends string>(
export function loadModel(
options: ReloadConfigOptions,
rpcOptions?: RPCOptions,
): Promise<string>;
): Promise<string> & { requestId: string };

export async function loadModel(
export function loadModel(
options:
| LoadModelOptions
| LoadCustomPluginModelOptions<string>
| LoadModelDescriptorOnlyOptions
| ReloadConfigOptions,
rpcOptions?: RPCOptions,
): Promise<string> & { requestId: string } {
// Generate a stable `requestId` once, synchronously, before kicking
// off any async work. The same id is:
// - threaded onto the request envelope (`request.requestId`) so the
// server's `registry.begin(...)` records it on the registry
// entry; and
// - attached to the returned promise via `decoratePromise` so the
// caller can target this exact call with `cancel({ requestId })`
// before `await` resolves. Generating client-side and surfacing
// it synchronously is what closes the "stop-button race" gap for
// `loadModel` / `downloadAsset` callers β€” same shape as the
// `CompletionRun.requestId` contract.
const requestId = generateClientRequestId();
const inner = runLoadModel(options, requestId, rpcOptions);
return decoratePromise(inner, { requestId });
}

async function runLoadModel(
options:
| LoadModelOptions
| LoadCustomPluginModelOptions<string>
| LoadModelDescriptorOnlyOptions
| ReloadConfigOptions,
requestId: string,
rpcOptions?: RPCOptions,
): Promise<string> {
const isReloadConfig = "modelId" in options && !("modelSrc" in options);
Expand Down Expand Up @@ -232,6 +259,13 @@ export async function loadModel(
}
}

// Splice the client-generated `requestId` onto the resolved options so
// the wire envelope carries it. The server uses the same value as the
// registry-entry key β€” that match is what makes
// `cancel({ requestId: op.requestId })` a no-op when no match exists
// and a precise abort when it does.
resolvedOptions = { ...resolvedOptions, requestId };

const request = isReloadConfig
? reloadConfigOptionsToRequestSchema.parse(resolvedOptions)
: loadModelOptionsToRequestSchema.parse(resolvedOptions);
Expand Down
30 changes: 24 additions & 6 deletions packages/sdk/schemas/download-asset.ts
Original file line number Diff line number Diff line change
Expand Up @@ -18,20 +18,38 @@ export const downloadAssetOptionsToRequestSchema =
.extend({
onProgress: z.unknown().optional(),
withProgress: z.boolean().optional(),
requestId: z.string().min(1).optional(),
})
.transform((data) => ({
type: "downloadAsset" as const,
assetSrc: modelInputToSrcSchema.parse(data.assetSrc),
withProgress: data.withProgress ?? !!data.onProgress,
seed: data.seed ?? false,
}));
.transform((data) => {
const out: {
type: "downloadAsset";
assetSrc: string;
withProgress: boolean;
seed: boolean;
requestId?: string;
} = {
type: "downloadAsset" as const,
assetSrc: modelInputToSrcSchema.parse(data.assetSrc),
withProgress: data.withProgress ?? !!data.onProgress,
seed: data.seed ?? false,
};
if (data.requestId !== undefined) out.requestId = data.requestId;
return out;
});

export const downloadAssetRequestSchema = z
.object({
type: z.literal("downloadAsset"),
assetSrc: z.string(),
withProgress: z.boolean().optional(),
seed: z.boolean().optional(),
requestId: z
.string()
.min(1)
.optional()
.describe(
"Stable identifier for this in-flight download, generated by the client at call time. Optional on the wire so legacy clients keep working β€” the server falls back to a server-generated id when the field is missing. Exposed on the client-side decorated promise so callers can target this download with `cancel({ requestId })`.",
),
})
.transform((data) => ({
...data,
Expand Down
17 changes: 17 additions & 0 deletions packages/sdk/schemas/load-model.ts
Original file line number Diff line number Diff line change
Expand Up @@ -54,6 +54,7 @@ const loadModelRequestCommonFields = {
onProgress: z.unknown().optional(),
logger: z.unknown().optional(),
withProgress: z.boolean().optional(),
requestId: z.string().min(1).optional(),
};

export const loadBuiltinModelOptionsBaseSchema = z.union([
Expand Down Expand Up @@ -153,6 +154,7 @@ const loadModelOptionsToRequestBaseSchema = z.union([
seed: data.seed ?? false,
withProgress: data.withProgress ?? !!data.onProgress,
delegate: data.delegate,
...(data.requestId !== undefined && { requestId: data.requestId }),
})),
z
.object({
Expand All @@ -170,6 +172,7 @@ const loadModelOptionsToRequestBaseSchema = z.union([
seed: data.seed ?? false,
withProgress: data.withProgress ?? !!data.onProgress,
delegate: data.delegate,
...(data.requestId !== undefined && { requestId: data.requestId }),
})),
z
.object({
Expand All @@ -187,6 +190,7 @@ const loadModelOptionsToRequestBaseSchema = z.union([
seed: data.seed ?? false,
withProgress: data.withProgress ?? !!data.onProgress,
delegate: data.delegate,
...(data.requestId !== undefined && { requestId: data.requestId }),
})),
z
.object({
Expand All @@ -204,6 +208,7 @@ const loadModelOptionsToRequestBaseSchema = z.union([
seed: data.seed ?? false,
withProgress: data.withProgress ?? !!data.onProgress,
delegate: data.delegate,
...(data.requestId !== undefined && { requestId: data.requestId }),
})),
z
.object({
Expand Down Expand Up @@ -232,6 +237,7 @@ const loadModelOptionsToRequestBaseSchema = z.union([
seed: data.seed ?? false,
withProgress: data.withProgress ?? !!data.onProgress,
delegate: data.delegate,
...(data.requestId !== undefined && { requestId: data.requestId }),
})),
z
.object({
Expand All @@ -249,6 +255,7 @@ const loadModelOptionsToRequestBaseSchema = z.union([
seed: data.seed ?? false,
withProgress: data.withProgress ?? !!data.onProgress,
delegate: data.delegate,
...(data.requestId !== undefined && { requestId: data.requestId }),
})),
z
.object({
Expand All @@ -266,6 +273,7 @@ const loadModelOptionsToRequestBaseSchema = z.union([
seed: data.seed ?? false,
withProgress: data.withProgress ?? !!data.onProgress,
delegate: data.delegate,
...(data.requestId !== undefined && { requestId: data.requestId }),
})),
z
.object({
Expand All @@ -283,6 +291,7 @@ const loadModelOptionsToRequestBaseSchema = z.union([
seed: data.seed ?? false,
withProgress: data.withProgress ?? !!data.onProgress,
delegate: data.delegate,
...(data.requestId !== undefined && { requestId: data.requestId }),
})),
z
.object({
Expand All @@ -301,6 +310,7 @@ const loadModelOptionsToRequestBaseSchema = z.union([
seed: data.seed ?? false,
withProgress: data.withProgress ?? !!data.onProgress,
delegate: data.delegate,
...(data.requestId !== undefined && { requestId: data.requestId }),
})),
]);

Expand All @@ -314,6 +324,13 @@ const commonModelConfigSchema = z.object({
withProgress: z.boolean().optional(),
seed: z.boolean().optional(),
delegate: delegateSchema,
requestId: z
.string()
.min(1)
.optional()
.describe(
"Stable identifier for this in-flight load, generated by the client at call time. Optional on the wire so legacy clients keep working β€” the server falls back to a server-generated id when the field is missing. Exposed on the client-side decorated promise so callers can target this load with `cancel({ requestId })`.",
),
});

// Request schemas for each model type (use canonical types since transforms normalize)
Expand Down
27 changes: 18 additions & 9 deletions packages/sdk/schemas/rag.ts
Original file line number Diff line number Diff line change
Expand Up @@ -183,16 +183,25 @@ const ragDeleteWorkspaceOperationSchema = ragDeleteWorkspaceParamsSchema.extend(

// ============== Unified Request Schema ==============

// `requestId` is threaded onto every RAG operation as an optional field so
// the request registry can correlate the in-flight context with the
// client-side decorated-promise (`op.requestId`). Optional on the wire
// so legacy clients keep working β€” the server falls back to a
// server-generated id when the field is missing.
const ragRequestIdField = {
requestId: z.string().min(1).optional(),
};

export const ragRequestSchema = z.discriminatedUnion("operation", [
ragChunkOperationSchema,
ragIngestOperationSchema,
ragSaveEmbeddingsOperationSchema,
ragSearchOperationSchema,
ragDeleteEmbeddingsOperationSchema,
ragReindexOperationSchema,
ragListWorkspacesOperationSchema,
ragCloseWorkspaceOperationSchema,
ragDeleteWorkspaceOperationSchema,
ragChunkOperationSchema.extend(ragRequestIdField),
ragIngestOperationSchema.extend(ragRequestIdField),
ragSaveEmbeddingsOperationSchema.extend(ragRequestIdField),
ragSearchOperationSchema.extend(ragRequestIdField),
ragDeleteEmbeddingsOperationSchema.extend(ragRequestIdField),
ragReindexOperationSchema.extend(ragRequestIdField),
ragListWorkspacesOperationSchema.extend(ragRequestIdField),
ragCloseWorkspaceOperationSchema.extend(ragRequestIdField),
ragDeleteWorkspaceOperationSchema.extend(ragRequestIdField),
]);

// ============== Response Schemas ==============
Expand Down
7 changes: 4 additions & 3 deletions packages/sdk/server/bare/rag-hyperdb/index.ts
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,8 @@ export {
type RagWorkspaceInfo,
} from "@/server/bare/rag-hyperdb/rag-workspace-manager";
export {
registerRagOperation,
unregisterRagOperation,
cancelRagOperation,
getActiveRagRequest,
setActiveRagRequest,
clearActiveRagRequest,
getWorkspaceKey,
} from "@/server/bare/rag-hyperdb/rag-operation-manager";
Loading
Loading