Skip to content

Commit 35a1a09

Browse files
committed
expose expose tool_behavior and tool_response_scheduling like in the python lib
livekit/agents#3482
1 parent 7fc7808 commit 35a1a09

File tree

3 files changed

+82
-35
lines changed

3 files changed

+82
-35
lines changed

plugins/google/src/beta/realtime/index.ts

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2,4 +2,4 @@
22
//
33
// SPDX-License-Identifier: Apache-2.0
44
export type { ClientEvents, LiveAPIModels, Voice } from './api_proto.js';
5-
export { RealtimeModel } from './realtime_api.js';
5+
export { Behavior, FunctionResponseScheduling, RealtimeModel } from './realtime_api.js';

plugins/google/src/beta/realtime/realtime_api.ts

Lines changed: 68 additions & 30 deletions
Original file line numberDiff line numberDiff line change
@@ -1,17 +1,33 @@
11
// SPDX-FileCopyrightText: 2025 LiveKit, Inc.
22
//
33
// SPDX-License-Identifier: Apache-2.0
4-
import type { Session } from '@google/genai';
5-
import * as types from '@google/genai';
64
import {
5+
type FunctionDeclaration,
6+
type FunctionResponse,
7+
type GoogleGenAIOptions,
8+
type LiveClientToolResponse,
9+
type Session,
710
ActivityHandling,
811
type AudioTranscriptionConfig,
12+
Behavior,
913
type ContextWindowCompressionConfig,
14+
FunctionResponseScheduling,
1015
GoogleGenAI,
1116
type HttpOptions,
12-
Modality,
1317
type RealtimeInputConfig,
14-
} from '@google/genai';
18+
type LiveServerMessage,
19+
type LiveConnectConfig,
20+
type LiveServerContent,
21+
type LiveServerToolCall,
22+
type LiveServerToolCallCancellation,
23+
type UsageMetadata,
24+
type ModalityTokenCount,
25+
MediaModality,
26+
type LiveServerGoAway,
27+
Modality,
28+
type Content,
29+
type LiveClientRealtimeInput
30+
} from "@google/genai";
1531
import type { APIConnectOptions } from '@livekit/agents';
1632
import {
1733
APIConnectionError,
@@ -35,6 +51,8 @@ import { toFunctionDeclarations } from '../../utils.js';
3551
import type * as api_proto from './api_proto.js';
3652
import type { LiveAPIModels, Voice } from './api_proto.js';
3753

54+
export { Behavior, FunctionResponseScheduling, Modality };
55+
3856
// Input audio constants (matching Python)
3957
const INPUT_AUDIO_SAMPLE_RATE = 16000;
4058
const INPUT_AUDIO_CHANNELS = 1;
@@ -102,6 +120,8 @@ interface RealtimeOptions {
102120
contextWindowCompression?: ContextWindowCompressionConfig;
103121
apiVersion?: string;
104122
geminiTools?: LLMTools;
123+
toolBehavior?: Behavior;
124+
toolResponseScheduling?: FunctionResponseScheduling;
105125
}
106126

107127
/**
@@ -273,6 +293,18 @@ export class RealtimeModel extends llm.RealtimeModel {
273293
* Gemini-specific tools to use for the session
274294
*/
275295
geminiTools?: LLMTools;
296+
297+
/**
298+
* Tool behavior for function calls (BLOCKING or NON_BLOCKING)
299+
* Defaults to NON_BLOCKING to prevent generateReply timeouts
300+
*/
301+
toolBehavior?: Behavior;
302+
303+
/**
304+
* Function response scheduling (SILENT, WHEN_IDLE, or INTERRUPT)
305+
* Defaults to WHEN_IDLE
306+
*/
307+
toolResponseScheduling?: FunctionResponseScheduling;
276308
} = {},
277309
) {
278310
const inputAudioTranscription =
@@ -329,6 +361,9 @@ export class RealtimeModel extends llm.RealtimeModel {
329361
contextWindowCompression: options.contextWindowCompression,
330362
apiVersion: options.apiVersion,
331363
geminiTools: options.geminiTools,
364+
toolBehavior: options.toolBehavior ?? Behavior.NON_BLOCKING,
365+
toolResponseScheduling:
366+
options.toolResponseScheduling ?? FunctionResponseScheduling.WHEN_IDLE,
332367
};
333368
}
334369

@@ -372,7 +407,7 @@ export class RealtimeSession extends llm.RealtimeSession {
372407
private _chatCtx = llm.ChatContext.empty();
373408

374409
private options: RealtimeOptions;
375-
private geminiDeclarations: types.FunctionDeclaration[] = [];
410+
private geminiDeclarations: FunctionDeclaration[] = [];
376411
private messageChannel = new Queue<api_proto.ClientEvents>();
377412
private inputResampler?: AudioResampler;
378413
private inputResamplerInputRate?: number;
@@ -421,7 +456,7 @@ export class RealtimeSession extends llm.RealtimeSession {
421456
timeout: this.options.connOptions.timeoutMs,
422457
};
423458

424-
const clientOptions: types.GoogleGenAIOptions = vertexai
459+
const clientOptions: GoogleGenAIOptions = vertexai
425460
? {
426461
vertexai: true,
427462
project,
@@ -463,15 +498,18 @@ export class RealtimeSession extends llm.RealtimeSession {
463498
private getToolResultsForRealtime(
464499
ctx: llm.ChatContext,
465500
vertexai: boolean,
466-
): types.LiveClientToolResponse | undefined {
467-
const toolResponses: types.FunctionResponse[] = [];
501+
): LiveClientToolResponse | undefined {
502+
const toolResponses: FunctionResponse[] = [];
468503

469504
for (const item of ctx.items) {
470505
if (item.type === 'function_call_output') {
471-
const response: types.FunctionResponse = {
506+
const response: FunctionResponse = {
472507
id: item.callId,
473508
name: item.name,
474-
response: { output: item.output },
509+
response: {
510+
output: item.output,
511+
scheduling: this.options.toolResponseScheduling,
512+
},
475513
};
476514

477515
if (!vertexai) {
@@ -552,7 +590,7 @@ export class RealtimeSession extends llm.RealtimeSession {
552590
this.sendClientEvent({
553591
type: 'content',
554592
value: {
555-
turns: turns as types.Content[],
593+
turns: turns as Content[],
556594
turnComplete: false,
557595
},
558596
});
@@ -572,7 +610,7 @@ export class RealtimeSession extends llm.RealtimeSession {
572610
}
573611

574612
async updateTools(tools: llm.ToolContext): Promise<void> {
575-
const newDeclarations = toFunctionDeclarations(tools);
613+
const newDeclarations = toFunctionDeclarations(tools, this.options.toolBehavior);
576614
const currentToolNames = new Set(this.geminiDeclarations.map((f) => f.name));
577615
const newToolNames = new Set(newDeclarations.map((f) => f.name));
578616

@@ -601,7 +639,7 @@ export class RealtimeSession extends llm.RealtimeSession {
601639

602640
for (const f of this.resampleAudio(frame)) {
603641
for (const nf of this.bstream.write(f.data.buffer)) {
604-
const realtimeInput: types.LiveClientRealtimeInput = {
642+
const realtimeInput: LiveClientRealtimeInput = {
605643
mediaChunks: [
606644
{
607645
mimeType: 'audio/pcm',
@@ -648,7 +686,7 @@ export class RealtimeSession extends llm.RealtimeSession {
648686

649687
// Gemini requires the last message to end with user's turn
650688
// so we need to add a placeholder user turn in order to trigger a new generation
651-
const turns: types.Content[] = [];
689+
const turns: Content[] = [];
652690
if (instructions !== undefined) {
653691
turns.push({
654692
parts: [{ text: instructions }],
@@ -752,7 +790,7 @@ export class RealtimeSession extends llm.RealtimeSession {
752790
model: this.options.model,
753791
callbacks: {
754792
onopen: () => sessionOpened.set(),
755-
onmessage: (message: types.LiveServerMessage) => {
793+
onmessage: (message: LiveServerMessage) => {
756794
this.onReceiveMessage(session, message);
757795
},
758796
onerror: (error: ErrorEvent) => {
@@ -846,7 +884,7 @@ export class RealtimeSession extends llm.RealtimeSession {
846884
}
847885
}
848886

849-
private async sendTask(session: types.Session, controller: AbortController): Promise<void> {
887+
private async sendTask(session: Session, controller: AbortController): Promise<void> {
850888
try {
851889
while (!this.#closed && !this.sessionShouldClose.isSet && !controller.signal.aborted) {
852890
const msg = await this.messageChannel.get();
@@ -912,8 +950,8 @@ export class RealtimeSession extends llm.RealtimeSession {
912950
}
913951

914952
private async onReceiveMessage(
915-
session: types.Session,
916-
response: types.LiveServerMessage,
953+
session: Session,
954+
response: LiveServerMessage,
917955
): Promise<void> {
918956
// Skip logging verbose audio data events
919957
const hasAudioData = response.serverContent?.modelTurn?.parts?.some(
@@ -1006,7 +1044,7 @@ export class RealtimeSession extends llm.RealtimeSession {
10061044
}
10071045

10081046
private loggableServerMessage(
1009-
message: types.LiveServerMessage,
1047+
message: LiveServerMessage,
10101048
maxLength: number = 30,
10111049
): Record<string, unknown> {
10121050
const obj: any = { ...message };
@@ -1090,10 +1128,10 @@ export class RealtimeSession extends llm.RealtimeSession {
10901128
});
10911129
}
10921130

1093-
private buildConnectConfig(): types.LiveConnectConfig {
1131+
private buildConnectConfig(): LiveConnectConfig {
10941132
const opts = this.options;
10951133

1096-
const config: types.LiveConnectConfig = {
1134+
const config: LiveConnectConfig = {
10971135
responseModalities: opts.responseModalities,
10981136
systemInstruction: opts.instructions
10991137
? {
@@ -1214,7 +1252,7 @@ export class RealtimeSession extends llm.RealtimeSession {
12141252
} as llm.InputSpeechStoppedEvent);
12151253
}
12161254

1217-
private handleServerContent(serverContent: types.LiveServerContent): void {
1255+
private handleServerContent(serverContent: LiveServerContent): void {
12181256
if (!this.currentGeneration) {
12191257
this.#logger.warn('received server content but no active generation.');
12201258
return;
@@ -1298,7 +1336,7 @@ export class RealtimeSession extends llm.RealtimeSession {
12981336
}
12991337
}
13001338

1301-
private handleToolCall(toolCall: types.LiveServerToolCall): void {
1339+
private handleToolCall(toolCall: LiveServerToolCall): void {
13021340
if (!this.currentGeneration) {
13031341
this.#logger.warn('received tool call but no active generation.');
13041342
return;
@@ -1317,7 +1355,7 @@ export class RealtimeSession extends llm.RealtimeSession {
13171355
this.markCurrentGenerationDone();
13181356
}
13191357

1320-
private handleToolCallCancellation(cancellation: types.LiveServerToolCallCancellation): void {
1358+
private handleToolCallCancellation(cancellation: LiveServerToolCallCancellation): void {
13211359
this.#logger.warn(
13221360
{
13231361
functionCallIds: cancellation.ids,
@@ -1326,7 +1364,7 @@ export class RealtimeSession extends llm.RealtimeSession {
13261364
);
13271365
}
13281366

1329-
private handleUsageMetadata(usage: types.UsageMetadata): void {
1367+
private handleUsageMetadata(usage: UsageMetadata): void {
13301368
if (!this.currentGeneration) {
13311369
this.#logger.debug('Received usage metadata but no active generation');
13321370
return;
@@ -1371,7 +1409,7 @@ export class RealtimeSession extends llm.RealtimeSession {
13711409
this.emit('metrics_collected', realtimeMetrics);
13721410
}
13731411

1374-
private tokenDetailsMap(tokenDetails: types.ModalityTokenCount[] | undefined): {
1412+
private tokenDetailsMap(tokenDetails: ModalityTokenCount[] | undefined): {
13751413
audioTokens: number;
13761414
textTokens: number;
13771415
imageTokens: number;
@@ -1386,18 +1424,18 @@ export class RealtimeSession extends llm.RealtimeSession {
13861424
continue;
13871425
}
13881426

1389-
if (tokenDetail.modality === types.MediaModality.AUDIO) {
1427+
if (tokenDetail.modality === MediaModality.AUDIO) {
13901428
tokenDetailsMap.audioTokens += tokenDetail.tokenCount;
1391-
} else if (tokenDetail.modality === types.MediaModality.TEXT) {
1429+
} else if (tokenDetail.modality === MediaModality.TEXT) {
13921430
tokenDetailsMap.textTokens += tokenDetail.tokenCount;
1393-
} else if (tokenDetail.modality === types.MediaModality.IMAGE) {
1431+
} else if (tokenDetail.modality === MediaModality.IMAGE) {
13941432
tokenDetailsMap.imageTokens += tokenDetail.tokenCount;
13951433
}
13961434
}
13971435
return tokenDetailsMap;
13981436
}
13991437

1400-
private handleGoAway(goAway: types.LiveServerGoAway): void {
1438+
private handleGoAway(goAway: LiveServerGoAway): void {
14011439
this.#logger.warn({ timeLeft: goAway.timeLeft }, 'Gemini server indicates disconnection soon.');
14021440
// TODO(brian): this isn't a seamless reconnection just yet
14031441
this.sessionShouldClose.set();

plugins/google/src/utils.ts

Lines changed: 13 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
// SPDX-FileCopyrightText: 2025 LiveKit, Inc.
22
//
33
// SPDX-License-Identifier: Apache-2.0
4-
import type { FunctionDeclaration, Schema } from '@google/genai';
4+
import type { Behavior, FunctionDeclaration, Schema } from '@google/genai';
55
import { llm } from '@livekit/agents';
66
import type { JSONSchema7 } from 'json-schema';
77

@@ -136,7 +136,10 @@ function isEmptyObjectSchema(jsonSchema: JSONSchema7Definition): boolean {
136136
);
137137
}
138138

139-
export function toFunctionDeclarations(toolCtx: llm.ToolContext): FunctionDeclaration[] {
139+
export function toFunctionDeclarations(
140+
toolCtx: llm.ToolContext,
141+
behavior?: Behavior,
142+
): FunctionDeclaration[] {
140143
const functionDeclarations: FunctionDeclaration[] = [];
141144

142145
for (const [name, tool] of Object.entries(toolCtx)) {
@@ -146,11 +149,17 @@ export function toFunctionDeclarations(toolCtx: llm.ToolContext): FunctionDeclar
146149
// Create a deep copy to prevent the Google GenAI library from mutating the schema
147150
const schemaCopy = JSON.parse(JSON.stringify(jsonSchema));
148151

149-
functionDeclarations.push({
152+
const declaration: FunctionDeclaration = {
150153
name,
151154
description,
152155
parameters: convertJSONSchemaToOpenAPISchema(schemaCopy) as Schema,
153-
});
156+
};
157+
158+
if (behavior !== undefined) {
159+
declaration.behavior = behavior;
160+
}
161+
162+
functionDeclarations.push(declaration);
154163
}
155164

156165
return functionDeclarations;

0 commit comments

Comments
 (0)