Skip to content
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
31 changes: 31 additions & 0 deletions specification/_json_spec/inference.chat_completion_inference.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,31 @@
{
"inference.chat_completion_unified": {
"documentation": {
"url": "https://www.elastic.co/guide/en/elasticsearch/reference/master/chat-completion-inference.html",
"description": "Perform chat completion inference"
},
"stability": "stable",
"visibility": "public",
"headers": {
"accept": ["text/event-stream"],
"content_type": ["application/json"]
},
"url": {
"paths": [
{
"path": "/_inference/chat_completion/{inference_id}/_unified",
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Shouldn't this be _stream

Suggested change
"path": "/_inference/chat_completion/{inference_id}/_unified",
"path": "/_inference/chat_completion/{inference_id}/_stream",

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

No, it does have _unified at the end. I think technically we could remove it since the client code doesn't need the results to be in a specific format for SSE.

"methods": ["POST"],
"parts": {
"inference_id": {
"type": "string",
"description": "The inference Id"
}
}
}
]
},
"body": {
"description": "The inference payload"
}
}
}
31 changes: 31 additions & 0 deletions specification/_json_spec/inference.completion.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,31 @@
{
"inference.inference": {
"documentation": {
"url": "https://www.elastic.co/guide/en/elasticsearch/reference/master/post-inference-api.html",
"description": "Perform completion inference"
},
"stability": "stable",
"visibility": "public",
"headers": {
"accept": ["application/json"],
"content_type": ["application/json"]
},
"url": {
"paths": [
{
"path": "/_inference/completion/{inference_id}",
"methods": ["POST"],
"parts": {
"inference_id": {
"type": "string",
"description": "The inference Id"
}
}
}
]
},
"body": {
"description": "The inference payload"
}
}
}
31 changes: 31 additions & 0 deletions specification/_json_spec/inference.rerank.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,31 @@
{
"inference.inference": {
"documentation": {
"url": "https://www.elastic.co/guide/en/elasticsearch/reference/master/post-inference-api.html",
"description": "Perform reranking inference"
},
"stability": "stable",
"visibility": "public",
"headers": {
"accept": ["application/json"],
"content_type": ["application/json"]
},
"url": {
"paths": [
{
"path": "/_inference/rerank/{inference_id}",
"methods": ["POST"],
"parts": {
"inference_id": {
"type": "string",
"description": "The inference Id"
}
}
}
]
},
"body": {
"description": "The inference payload"
}
}
}
31 changes: 31 additions & 0 deletions specification/_json_spec/inference.sparse_embedding.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,31 @@
{
"inference.inference": {
"documentation": {
"url": "https://www.elastic.co/guide/en/elasticsearch/reference/master/post-inference-api.html",
"description": "Perform sparse embedding inference"
},
"stability": "stable",
"visibility": "public",
"headers": {
"accept": ["application/json"],
"content_type": ["application/json"]
},
"url": {
"paths": [
{
"path": "/_inference/sparse_embedding/{inference_id}",
"methods": ["POST"],
"parts": {
"inference_id": {
"type": "string",
"description": "The inference Id"
}
}
}
]
},
"body": {
"description": "The inference payload"
}
}
}
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
{
"inference.stream_inference": {
"inference.stream_completion": {
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

In the future we might have a streaming endpoint for text embeddings for example.

"documentation": {
"url": "https://www.elastic.co/guide/en/elasticsearch/reference/master/post-stream-inference-api.html",
"description": "Perform streaming inference"
Expand All @@ -12,24 +12,10 @@
},
"url": {
"paths": [
{
"path": "/_inference/{inference_id}/_stream",
"methods": ["POST"],
"parts": {
"inference_id": {
"type": "string",
"description": "The inference Id"
}
}
},
{
"path": "/_inference/{task_type}/{inference_id}/_stream",
"methods": ["POST"],
"parts": {
"task_type": {
"type": "string",
"description": "The task type"
},
"inference_id": {
"type": "string",
"description": "The inference Id"
Expand Down
31 changes: 31 additions & 0 deletions specification/_json_spec/inference.text_embedding.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,31 @@
{
"inference.inference": {
"documentation": {
"url": "https://www.elastic.co/guide/en/elasticsearch/reference/master/post-inference-api.html",
"description": "Perform text embedding inference"
},
"stability": "stable",
"visibility": "public",
"headers": {
"accept": ["application/json"],
"content_type": ["application/json"]
},
"url": {
"paths": [
{
"path": "/_inference/text_embedding/{inference_id}",
"methods": ["POST"],
"parts": {
"inference_id": {
"type": "string",
"description": "The inference Id"
}
}
}
]
},
"body": {
"description": "The inference payload"
}
}
}
45 changes: 0 additions & 45 deletions specification/_json_spec/inference.unified_inference.json

This file was deleted.

31 changes: 31 additions & 0 deletions specification/inference/_types/Results.ts
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,14 @@ export class SparseEmbeddingResult {
embedding: SparseVector
}

/**
* The response format for the sparse embedding request.
*/
export class SparseEmbeddingInferenceResult {
// TODO should we make this optional if we ever support multiple encoding types? So we can make it a variant
sparse_embedding: Array<SparseEmbeddingResult>
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I could see us having a variant here for a different type of response (like byte encoding for text embedding). That would be returned using the same URL so it wouldn't be a new response. Should we make this a variant and make sparse_embedding optional?

I suppose changing some from required to optional in the future would be a breaking change right?

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

For text embeddings, the pattern used in the InferenceResult class (also in this file) is to have a different variant for each type.

  text_embedding_bytes?: Array<TextEmbeddingByteResult>
  text_embedding_bits?: Array<TextEmbeddingByteResult>
  text_embedding?: Array<TextEmbeddingResult>

Sparse would be the same:

  sparse_embedding?: Array<SparseEmbeddingResult>
  sparse_embedding_byte?: Array<SparseEmbeddingByteResult>

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Actually when I make the sparse embedding type a variant, I get an error that indicates there must be multiple fields in the type to be able to leverage the variant type. So I think we can make this change when we need to.

}

/**
* Text Embedding results containing bytes are represented as Dense
* Vectors of bytes.
Expand All @@ -57,13 +65,29 @@ export class TextEmbeddingResult {
embedding: DenseVector
}

/**
* TextEmbeddingInferenceResult is an aggregation of mutually exclusive text_embedding variants
* @variants container
*/
export class TextEmbeddingInferenceResult {
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Same thing here, one URL multiple response formats so keeping this as it was.

text_embedding_bytes?: Array<TextEmbeddingByteResult>
text_embedding?: Array<TextEmbeddingResult>
}

/**
* The completion result object
*/
export class CompletionResult {
result: string
}

/**
* Defines the completion result.
*/
export class CompletionInferenceResult {
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I'm open to other ideas for naming the classes. *Result was already taken for everything for the nested field which is why I went with *InferenceResult.

completion: Array<CompletionResult>
}

/**
* The rerank result object representing a single ranked document
* id: the original index of the document in the request
Expand All @@ -76,6 +100,13 @@ export class RankedDocument {
text?: string
}

/**
* Defines the response for a rerank request.
*/
export class RerankedInferenceResult {
rerank: Array<RankedDocument>
}

/**
* InferenceResult is an aggregation of mutually exclusive variants
* @variants container
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,6 @@
* under the License.
*/

import { TaskType } from '@inference/_types/TaskType'
import { UserDefinedValue } from '@spec_utils/UserDefinedValue'
import { RequestBase } from '@_types/Base'
import { Id } from '@_types/common'
Expand All @@ -33,19 +32,11 @@ import { Duration } from '@_types/Time'
export interface Request extends RequestBase {
urls: [
{
path: '/_inference/{inference_id}/_unified'
methods: ['POST']
},
{
path: '/_inference/{task_type}/{inference_id}/_unified'
path: '/_inference/chat_completion/{inference_id}/_unified'
methods: ['POST']
}
]
path_parts: {
/**
* The task type
*/
task_type?: TaskType
/**
* The inference Id
*/
Expand Down
62 changes: 62 additions & 0 deletions specification/inference/completion/CompletionRequest.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,62 @@
/*
* Licensed to Elasticsearch B.V. under one or more contributor
* license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright
* ownership. Elasticsearch B.V. licenses this file to you under
* the Apache License, Version 2.0 (the "License"); you may
* not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/

import { TaskSettings } from '@inference/_types/Services'
import { RequestBase } from '@_types/Base'
import { Id } from '@_types/common'
import { Duration } from '@_types/Time'

/**
* Perform inference on the service
* @rest_spec_name inference.inference
* @availability stack since=8.11.0 stability=stable visibility=public
* @availability serverless stability=stable visibility=public
*/
export interface Request extends RequestBase {
urls: [
{
path: '/_inference/completion/{inference_id}'
methods: ['POST']
}
]
path_parts: {
/**
* The inference Id
*/
inference_id: Id
}
query_parameters: {
/**
* Specifies the amount of time to wait for the inference request to complete.
* @server_default 30s
*/
timeout?: Duration
}
body: {
/**
* Inference input.
* Either a string or an array of strings.
*/
input: string | Array<string>
/**
* Optional task settings
*/
task_settings?: TaskSettings
}
}
Loading
Loading