elastic · jonathan-buttner · Mar 6, 2025 · Jan 16, 2025 · Jan 16, 2025 · Jan 16, 2025
diff --git a/specification/_json_spec/inference.chat_completion_inference.json b/specification/_json_spec/inference.chat_completion_inference.json
@@ -0,0 +1,31 @@
+{
+  "inference.chat_completion_unified": {
+    "documentation": {
+      "url": "https://www.elastic.co/guide/en/elasticsearch/reference/master/chat-completion-inference.html",
+      "description": "Perform chat completion inference"
+    },
+    "stability": "stable",
+    "visibility": "public",
+    "headers": {
+      "accept": ["text/event-stream"],
+      "content_type": ["application/json"]
+    },
+    "url": {
+      "paths": [
+        {
+          "path": "/_inference/chat_completion/{inference_id}/_unified",
-          "path": "/_inference/chat_completion/{inference_id}/_unified",
+          "path": "/_inference/chat_completion/{inference_id}/_stream",
-          "path": "/_inference/chat_completion/{inference_id}/_unified",
+          "path": "/_inference/chat_completion/{inference_id}/_stream",
+          "methods": ["POST"],
+          "parts": {
+            "inference_id": {
+              "type": "string",
+              "description": "The inference Id"
+            }
+          }
+        }
+      ]
+    },
+    "body": {
+      "description": "The inference payload"
+    }
+  }
+}
@@ -0,0 +1,31 @@
+{
+  "inference.inference": {
+    "documentation": {
+      "url": "https://www.elastic.co/guide/en/elasticsearch/reference/master/post-inference-api.html",
+      "description": "Perform completion inference"
+    },
+    "stability": "stable",
+    "visibility": "public",
+    "headers": {
+      "accept": ["application/json"],
+      "content_type": ["application/json"]
+    },
+    "url": {
+      "paths": [
+        {
+          "path": "/_inference/completion/{inference_id}",
+          "methods": ["POST"],
+          "parts": {
+            "inference_id": {
+              "type": "string",
+              "description": "The inference Id"
+            }
+          }
+        }
+      ]
+    },
+    "body": {
+      "description": "The inference payload"
+    }
+  }
+}
@@ -0,0 +1,31 @@
+{
+  "inference.inference": {
+    "documentation": {
+      "url": "https://www.elastic.co/guide/en/elasticsearch/reference/master/post-inference-api.html",
+      "description": "Perform reranking inference"
+    },
+    "stability": "stable",
+    "visibility": "public",
+    "headers": {
+      "accept": ["application/json"],
+      "content_type": ["application/json"]
+    },
+    "url": {
+      "paths": [
+        {
+          "path": "/_inference/rerank/{inference_id}",
+          "methods": ["POST"],
+          "parts": {
+            "inference_id": {
+              "type": "string",
+              "description": "The inference Id"
+            }
+          }
+        }
+      ]
+    },
+    "body": {
+      "description": "The inference payload"
+    }
+  }
+}
@@ -0,0 +1,31 @@
+{
+  "inference.inference": {
+    "documentation": {
+      "url": "https://www.elastic.co/guide/en/elasticsearch/reference/master/post-inference-api.html",
+      "description": "Perform sparse embedding inference"
+    },
+    "stability": "stable",
+    "visibility": "public",
+    "headers": {
+      "accept": ["application/json"],
+      "content_type": ["application/json"]
+    },
+    "url": {
+      "paths": [
+        {
+          "path": "/_inference/sparse_embedding/{inference_id}",
+          "methods": ["POST"],
+          "parts": {
+            "inference_id": {
+              "type": "string",
+              "description": "The inference Id"
+            }
+          }
+        }
+      ]
+    },
+    "body": {
+      "description": "The inference payload"
+    }
+  }
+}
@@ -1,5 +1,5 @@
 {
-  "inference.stream_inference": {
+  "inference.stream_completion": {
     "documentation": {
       "url": "https://www.elastic.co/guide/en/elasticsearch/reference/master/post-stream-inference-api.html",
       "description": "Perform streaming inference"
@@ -12,24 +12,10 @@
     },
     "url": {
       "paths": [
-        {
-          "path": "/_inference/{inference_id}/_stream",
-          "methods": ["POST"],
-          "parts": {
-            "inference_id": {
-              "type": "string",
-              "description": "The inference Id"
-            }
-          }
-        },
         {
           "path": "/_inference/{task_type}/{inference_id}/_stream",
           "methods": ["POST"],
           "parts": {
-            "task_type": {
-              "type": "string",
-              "description": "The task type"
-            },
             "inference_id": {
               "type": "string",
               "description": "The inference Id"

@@ -0,0 +1,31 @@
+{
+  "inference.inference": {
+    "documentation": {
+      "url": "https://www.elastic.co/guide/en/elasticsearch/reference/master/post-inference-api.html",
+      "description": "Perform text embedding inference"
+    },
+    "stability": "stable",
+    "visibility": "public",
+    "headers": {
+      "accept": ["application/json"],
+      "content_type": ["application/json"]
+    },
+    "url": {
+      "paths": [
+        {
+          "path": "/_inference/text_embedding/{inference_id}",
+          "methods": ["POST"],
+          "parts": {
+            "inference_id": {
+              "type": "string",
+              "description": "The inference Id"
+            }
+          }
+        }
+      ]
+    },
+    "body": {
+      "description": "The inference payload"
+    }
+  }
+}
diff --git a/specification/inference/_types/Results.ts b/specification/inference/_types/Results.ts
@@ -37,6 +37,14 @@ export class SparseEmbeddingResult {
   embedding: SparseVector
 }
 
+/**
+ * The response format for the sparse embedding request.
+ */
+export class SparseEmbeddingInferenceResult {
+  // TODO should we make this optional if we ever support multiple encoding types? So we can make it a variant
+  sparse_embedding: Array<SparseEmbeddingResult>
+}
+
 /**
  * Text Embedding results containing bytes are represented as Dense
  * Vectors of bytes.
@@ -57,13 +65,29 @@ export class TextEmbeddingResult {
   embedding: DenseVector
 }
 
+/**
+ * TextEmbeddingInferenceResult is an aggregation of mutually exclusive text_embedding variants
+ * @variants container
+ */
+export class TextEmbeddingInferenceResult {
+  text_embedding_bytes?: Array<TextEmbeddingByteResult>
+  text_embedding?: Array<TextEmbeddingResult>
+}
+
 /**
  * The completion result object
  */
 export class CompletionResult {
   result: string
 }
 
+/**
+ * Defines the completion result.
+ */
+export class CompletionInferenceResult {
+  completion: Array<CompletionResult>
+}
+
 /**
  * The rerank result object representing a single ranked document
  * id: the original index of the document in the request
@@ -76,6 +100,13 @@ export class RankedDocument {
   text?: string
 }
 
+/**
+ * Defines the response for a rerank request.
+ */
+export class RerankedInferenceResult {
+  rerank: Array<RankedDocument>
+}
+
 /**
  * InferenceResult is an aggregation of mutually exclusive variants
  * @variants container

diff --git a/...rence/unified_inference/UnifiedRequest.ts → ...chat_completion_unified/UnifiedRequest.ts b/...rence/unified_inference/UnifiedRequest.ts → ...chat_completion_unified/UnifiedRequest.ts
@@ -17,7 +17,6 @@
  * under the License.
  */
 
-import { TaskType } from '@inference/_types/TaskType'
 import { UserDefinedValue } from '@spec_utils/UserDefinedValue'
 import { RequestBase } from '@_types/Base'
 import { Id } from '@_types/common'
@@ -33,19 +32,11 @@ import { Duration } from '@_types/Time'
 export interface Request extends RequestBase {
   urls: [
     {
-      path: '/_inference/{inference_id}/_unified'
-      methods: ['POST']
-    },
-    {
-      path: '/_inference/{task_type}/{inference_id}/_unified'
+      path: '/_inference/chat_completion/{inference_id}/_unified'
       methods: ['POST']
     }
   ]
   path_parts: {
-    /**
-     * The task type
-     */
-    task_type?: TaskType
     /**
      * The inference Id
      */

diff --git a/...ence/unified_inference/UnifiedResponse.ts → ...hat_completion_unified/UnifiedResponse.ts b/...ence/unified_inference/UnifiedResponse.ts → ...hat_completion_unified/UnifiedResponse.ts
diff --git a/specification/inference/completion/CompletionRequest.ts b/specification/inference/completion/CompletionRequest.ts
@@ -0,0 +1,62 @@
+/*
+ * Licensed to Elasticsearch B.V. under one or more contributor
+ * license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright
+ * ownership. Elasticsearch B.V. licenses this file to you under
+ * the Apache License, Version 2.0 (the "License"); you may
+ * not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+import { TaskSettings } from '@inference/_types/Services'
+import { RequestBase } from '@_types/Base'
+import { Id } from '@_types/common'
+import { Duration } from '@_types/Time'
+
+/**
+ * Perform inference on the service
+ * @rest_spec_name inference.inference
+ * @availability stack since=8.11.0 stability=stable visibility=public
+ * @availability serverless stability=stable visibility=public
+ */
+export interface Request extends RequestBase {
+  urls: [
+    {
+      path: '/_inference/completion/{inference_id}'
+      methods: ['POST']
+    }
+  ]
+  path_parts: {
+    /**
+     * The inference Id
+     */
+    inference_id: Id
+  }
+  query_parameters: {
+    /**
+     * Specifies the amount of time to wait for the inference request to complete.
+     * @server_default 30s
+     */
+    timeout?: Duration
+  }
+  body: {
+    /**
+     * Inference input.
+     * Either a string or an array of strings.
+     */
+    input: string | Array<string>
+    /**
+     * Optional task settings
+     */
+    task_settings?: TaskSettings
+  }
+}