[8.x] [inference] Add simulated function calling (elastic#192544) (el…

…astic#193275) # Backport This will backport the following commits from `main` to `8.x`: - [[inference] Add simulated function calling (elastic#192544)](elastic#192544)  ### Questions ? Please refer to the [Backport tool documentation](https://github.com/sqren/backport)  Co-authored-by: Pierre Gayvallet <[email protected]>
kibanamachine · Sep 18, 2024 · c8043c9 · c8043c9
1 parent 44abd1a
commit c8043c9
Show file tree

Hide file tree

Showing 32 changed files with 472 additions and 33 deletions.
diff --git a/x-pack/plugins/inference/common/chat_complete/index.ts b/x-pack/plugins/inference/common/chat_complete/index.ts
@@ -78,6 +78,8 @@ export type ChatCompletionEvent<TToolOptions extends ToolOptions = ToolOptions>
   | ChatCompletionTokenCountEvent
   | ChatCompletionMessageEvent<TToolOptions>;
 
+export type FunctionCallingMode = 'native' | 'simulated';
+
 /**
  * Request a completion from the LLM based on a prompt or conversation.
  *
@@ -92,5 +94,6 @@ export type ChatCompleteAPI = <TToolOptions extends ToolOptions = ToolOptions>(
     connectorId: string;
     system?: string;
     messages: Message[];
+    functionCalling?: FunctionCallingMode;
   } & TToolOptions
 ) => ChatCompletionResponse<TToolOptions>;
diff --git a/x-pack/plugins/inference/common/chat_complete/request.ts b/x-pack/plugins/inference/common/chat_complete/request.ts
@@ -5,12 +5,13 @@
  * 2.0.
  */
 
-import type { Message } from '.';
-import { ToolOptions } from './tools';
+import type { Message, FunctionCallingMode } from '.';
+import type { ToolOptions } from './tools';
 
 export type ChatCompleteRequestBody = {
   connectorId: string;
   stream?: boolean;
   system?: string;
   messages: Message[];
+  functionCalling?: FunctionCallingMode;
 } & ToolOptions;
diff --git a/x-pack/plugins/inference/common/output/create_output_api.ts b/x-pack/plugins/inference/common/output/create_output_api.ts
@@ -12,10 +12,11 @@ import { OutputAPI, OutputEvent, OutputEventType } from '.';
 import { ensureMultiTurn } from '../ensure_multi_turn';
 
 export function createOutputApi(chatCompleteApi: ChatCompleteAPI): OutputAPI {
-  return (id, { connectorId, input, schema, system, previousMessages }) => {
+  return (id, { connectorId, input, schema, system, previousMessages, functionCalling }) => {
     return chatCompleteApi({
       connectorId,
       system,
+      functionCalling,
       messages: ensureMultiTurn([
         ...(previousMessages || []),
         {
@@ -26,12 +27,12 @@ export function createOutputApi(chatCompleteApi: ChatCompleteAPI): OutputAPI {
       ...(schema
         ? {
             tools: {
-              output: {
+              structuredOutput: {
                 description: `Use the following schema to respond to the user's request in structured data, so it can be parsed and handled.`,
                 schema,
               },
             },
-            toolChoice: { function: 'output' as const },
+            toolChoice: { function: 'structuredOutput' as const },
           }
         : {}),
     }).pipe(

diff --git a/x-pack/plugins/inference/common/output/index.ts b/x-pack/plugins/inference/common/output/index.ts
@@ -8,7 +8,7 @@
 import { Observable } from 'rxjs';
 import { ServerSentEventBase } from '@kbn/sse-utils';
 import { FromToolSchema, ToolSchema } from '../chat_complete/tool_schema';
-import { Message } from '../chat_complete';
+import type { Message, FunctionCallingMode } from '../chat_complete';
 
 export enum OutputEventType {
   OutputUpdate = 'output',
@@ -61,6 +61,7 @@ export type OutputAPI = <
     input: string;
     schema?: TOutputSchema;
     previousMessages?: Message[];
+    functionCalling?: FunctionCallingMode;
   }
 ) => Observable<
   OutputEvent<TId, TOutputSchema extends ToolSchema ? FromToolSchema<TOutputSchema> : undefined>

diff --git a/x-pack/plugins/inference/public/chat_complete/index.ts b/x-pack/plugins/inference/public/chat_complete/index.ts
@@ -12,13 +12,14 @@ import type { ChatCompleteRequestBody } from '../../common/chat_complete/request
 import { httpResponseIntoObservable } from '../util/http_response_into_observable';
 
 export function createChatCompleteApi({ http }: { http: HttpStart }): ChatCompleteAPI {
-  return ({ connectorId, messages, system, toolChoice, tools }) => {
+  return ({ connectorId, messages, system, toolChoice, tools, functionCalling }) => {
     const body: ChatCompleteRequestBody = {
       connectorId,
       system,
       messages,
       toolChoice,
       tools,
+      functionCalling,
     };
 
     return from(

diff --git a/x-pack/plugins/inference/server/chat_complete/adapters/openai/openai_adapter.ts b/x-pack/plugins/inference/server/chat_complete/adapters/openai/openai_adapter.ts
@@ -13,7 +13,7 @@ import type {
   ChatCompletionToolMessageParam,
   ChatCompletionUserMessageParam,
 } from 'openai/resources';
-import { filter, from, map, switchMap, tap, throwError } from 'rxjs';
+import { filter, from, map, switchMap, tap, throwError, identity } from 'rxjs';
 import { Readable, isReadable } from 'stream';
 import {
   ChatCompletionChunkEvent,
@@ -26,18 +26,38 @@ import { createTokenLimitReachedError } from '../../../../common/chat_complete/e
 import { createInferenceInternalError } from '../../../../common/errors';
 import { eventSourceStreamIntoObservable } from '../../../util/event_source_stream_into_observable';
 import type { InferenceConnectorAdapter } from '../../types';
+import {
+  wrapWithSimulatedFunctionCalling,
+  parseInlineFunctionCalls,
+} from '../../simulated_function_calling';
 
 export const openAIAdapter: InferenceConnectorAdapter = {
-  chatComplete: ({ executor, system, messages, toolChoice, tools }) => {
+  chatComplete: ({ executor, system, messages, toolChoice, tools, functionCalling, logger }) => {
     const stream = true;
+    const simulatedFunctionCalling = functionCalling === 'simulated';
 
-    const request: Omit<OpenAI.ChatCompletionCreateParams, 'model'> & { model?: string } = {
-      stream,
-      messages: messagesToOpenAI({ system, messages }),
-      tool_choice: toolChoiceToOpenAI(toolChoice),
-      tools: toolsToOpenAI(tools),
-      temperature: 0,
-    };
+    let request: Omit<OpenAI.ChatCompletionCreateParams, 'model'> & { model?: string };
+    if (simulatedFunctionCalling) {
+      const wrapped = wrapWithSimulatedFunctionCalling({
+        system,
+        messages,
+        toolChoice,
+        tools,
+      });
+      request = {
+        stream,
+        messages: messagesToOpenAI({ system: wrapped.system, messages: wrapped.messages }),
+        temperature: 0,
+      };
+    } else {
+      request = {
+        stream,
+        messages: messagesToOpenAI({ system, messages }),
+        tool_choice: toolChoiceToOpenAI(toolChoice),
+        tools: toolsToOpenAI(tools),
+        temperature: 0,
+      };
+    }
 
     return from(
       executor.invoke({
@@ -94,7 +114,8 @@ export const openAIAdapter: InferenceConnectorAdapter = {
               };
             }) ?? [],
         };
-      })
+      }),
+      simulatedFunctionCalling ? parseInlineFunctionCalls({ logger }) : identity
     );
   },
 };

diff --git a/x-pack/plugins/inference/server/chat_complete/api.ts b/x-pack/plugins/inference/server/chat_complete/api.ts
@@ -31,6 +31,7 @@ export function createChatCompleteApi({
     toolChoice,
     tools,
     system,
+    functionCalling,
   }): ChatCompletionResponse => {
     return defer(async () => {
       const actionsClient = await actions.getActionsClientWithRequest(request);
@@ -58,6 +59,7 @@ export function createChatCompleteApi({
           toolChoice,
           tools,
           logger,
+          functionCalling,
         });
       }),
       chunksIntoMessage({

diff --git a/x-pack/plugins/inference/server/chat_complete/simulated_function_calling/constants.ts b/x-pack/plugins/inference/server/chat_complete/simulated_function_calling/constants.ts
@@ -0,0 +1,9 @@
+/*
+ * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
+ * or more contributor license agreements. Licensed under the Elastic License
+ * 2.0; you may not use this file except in compliance with the Elastic License
+ * 2.0.
+ */
+
+export const TOOL_USE_START = '<|tool_use_start|>';
+export const TOOL_USE_END = '<|tool_use_end|>';
diff --git a/...gins/inference/server/chat_complete/simulated_function_calling/get_system_instructions.ts b/...gins/inference/server/chat_complete/simulated_function_calling/get_system_instructions.ts
@@ -0,0 +1,84 @@
+/*
+ * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
+ * or more contributor license agreements. Licensed under the Elastic License
+ * 2.0; you may not use this file except in compliance with the Elastic License
+ * 2.0.
+ */
+
+import { TOOL_USE_END, TOOL_USE_START } from './constants';
+import { ToolDefinition } from '../../../common/chat_complete/tools';
+
+export function getSystemMessageInstructions({
+  tools,
+}: {
+  tools?: Record<string, ToolDefinition>;
+}) {
+  const formattedTools = Object.entries(tools ?? {}).map(([name, tool]) => {
+    return {
+      name,
+      ...tool,
+    };
+  });
+
+  if (formattedTools.length) {
+    return `In this environment, you have access to a set of tools you can use to answer the user's question.
+
+    DO NOT call a tool when it is not listed.
+    ONLY define input that is defined in the tool properties.
+    If a tool does not have properties, leave them out.
+
+    It is EXTREMELY important that you generate valid JSON between the \`\`\`json and \`\`\` delimiters.
+
+    You may call them like this.
+
+    Given the following tool:
+
+    ${JSON.stringify({
+      name: 'my_tool',
+      description: 'A tool to call',
+      schema: {
+        type: 'object',
+        properties: {
+          myProperty: {
+            type: 'string',
+          },
+        },
+      },
+    })}
+
+    Use it the following way:
+
+    ${TOOL_USE_START}
+    \`\`\`json
+    ${JSON.stringify({ name: 'my_tool', input: { myProperty: 'myValue' } })}
+    \`\`\`\
+    ${TOOL_USE_END}
+
+    Given the following tool:
+    ${JSON.stringify({
+      name: 'my_tool_without_parameters',
+      description: 'A tool to call without parameters',
+    })}
+
+    Use it the following way:
+    ${TOOL_USE_START}
+    \`\`\`json
+    ${JSON.stringify({ name: 'my_tool_without_parameters', input: {} })}
+    \`\`\`\
+    ${TOOL_USE_END}
+
+    Here are the tools available:
+
+    ${JSON.stringify(
+      formattedTools.map((tool) => ({
+        name: tool.name,
+        description: tool.description,
+        ...(tool.schema ? { schema: tool.schema } : {}),
+      }))
+    )}
+
+    `;
+  }
+
+  return `No tools are available anymore. DO NOT UNDER ANY CIRCUMSTANCES call any tool, regardless of whether it was previously called.`;
+}
diff --git a/x-pack/plugins/inference/server/chat_complete/simulated_function_calling/index.ts b/x-pack/plugins/inference/server/chat_complete/simulated_function_calling/index.ts
@@ -0,0 +1,9 @@
+/*
+ * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
+ * or more contributor license agreements. Licensed under the Elastic License
+ * 2.0; you may not use this file except in compliance with the Elastic License
+ * 2.0.
+ */
+
+export { wrapWithSimulatedFunctionCalling } from './wrap_with_simulated_function_calling';
+export { parseInlineFunctionCalls } from './parse_inline_function_calls';