-
Notifications
You must be signed in to change notification settings - Fork 115
Inference task type endpoints #3545
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from 1 commit
687063f
0442e31
05864d4
78ce8a1
749c78c
02219ba
bf22312
bd16539
797d6b5
bc1a277
14edb60
2aaebdf
00e81e5
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,31 @@ | ||
| { | ||
| "inference.chat_completion_unified": { | ||
| "documentation": { | ||
| "url": "https://www.elastic.co/guide/en/elasticsearch/reference/master/chat-completion-inference.html", | ||
| "description": "Perform chat completion inference" | ||
| }, | ||
| "stability": "stable", | ||
| "visibility": "public", | ||
| "headers": { | ||
| "accept": ["text/event-stream"], | ||
| "content_type": ["application/json"] | ||
| }, | ||
| "url": { | ||
| "paths": [ | ||
| { | ||
| "path": "/_inference/chat_completion/{inference_id}/_unified", | ||
| "methods": ["POST"], | ||
| "parts": { | ||
| "inference_id": { | ||
| "type": "string", | ||
| "description": "The inference Id" | ||
| } | ||
| } | ||
| } | ||
| ] | ||
| }, | ||
| "body": { | ||
| "description": "The inference payload" | ||
| } | ||
| } | ||
| } | ||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,31 @@ | ||
| { | ||
| "inference.inference": { | ||
| "documentation": { | ||
| "url": "https://www.elastic.co/guide/en/elasticsearch/reference/master/post-inference-api.html", | ||
| "description": "Perform completion inference" | ||
| }, | ||
| "stability": "stable", | ||
| "visibility": "public", | ||
| "headers": { | ||
| "accept": ["application/json"], | ||
| "content_type": ["application/json"] | ||
| }, | ||
| "url": { | ||
| "paths": [ | ||
| { | ||
| "path": "/_inference/completion/{inference_id}", | ||
| "methods": ["POST"], | ||
| "parts": { | ||
| "inference_id": { | ||
| "type": "string", | ||
| "description": "The inference Id" | ||
| } | ||
| } | ||
| } | ||
| ] | ||
| }, | ||
| "body": { | ||
| "description": "The inference payload" | ||
| } | ||
| } | ||
| } |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,31 @@ | ||
| { | ||
| "inference.inference": { | ||
| "documentation": { | ||
| "url": "https://www.elastic.co/guide/en/elasticsearch/reference/master/post-inference-api.html", | ||
| "description": "Perform reranking inference" | ||
| }, | ||
| "stability": "stable", | ||
| "visibility": "public", | ||
| "headers": { | ||
| "accept": ["application/json"], | ||
| "content_type": ["application/json"] | ||
| }, | ||
| "url": { | ||
| "paths": [ | ||
| { | ||
| "path": "/_inference/rerank/{inference_id}", | ||
| "methods": ["POST"], | ||
| "parts": { | ||
| "inference_id": { | ||
| "type": "string", | ||
| "description": "The inference Id" | ||
| } | ||
| } | ||
| } | ||
| ] | ||
| }, | ||
| "body": { | ||
| "description": "The inference payload" | ||
| } | ||
| } | ||
| } |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,31 @@ | ||
| { | ||
| "inference.inference": { | ||
| "documentation": { | ||
| "url": "https://www.elastic.co/guide/en/elasticsearch/reference/master/post-inference-api.html", | ||
| "description": "Perform sparse embedding inference" | ||
| }, | ||
| "stability": "stable", | ||
| "visibility": "public", | ||
| "headers": { | ||
| "accept": ["application/json"], | ||
| "content_type": ["application/json"] | ||
| }, | ||
| "url": { | ||
| "paths": [ | ||
| { | ||
| "path": "/_inference/sparse_embedding/{inference_id}", | ||
| "methods": ["POST"], | ||
| "parts": { | ||
| "inference_id": { | ||
| "type": "string", | ||
| "description": "The inference Id" | ||
| } | ||
| } | ||
| } | ||
| ] | ||
| }, | ||
| "body": { | ||
| "description": "The inference payload" | ||
| } | ||
| } | ||
| } |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -1,5 +1,5 @@ | ||
| { | ||
| "inference.stream_inference": { | ||
| "inference.stream_completion": { | ||
|
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. In the future we might have a streaming endpoint for text embeddings for example. |
||
| "documentation": { | ||
| "url": "https://www.elastic.co/guide/en/elasticsearch/reference/master/post-stream-inference-api.html", | ||
| "description": "Perform streaming inference" | ||
|
|
@@ -12,24 +12,10 @@ | |
| }, | ||
| "url": { | ||
| "paths": [ | ||
| { | ||
| "path": "/_inference/{inference_id}/_stream", | ||
| "methods": ["POST"], | ||
| "parts": { | ||
| "inference_id": { | ||
| "type": "string", | ||
| "description": "The inference Id" | ||
| } | ||
| } | ||
| }, | ||
| { | ||
| "path": "/_inference/{task_type}/{inference_id}/_stream", | ||
| "methods": ["POST"], | ||
| "parts": { | ||
| "task_type": { | ||
| "type": "string", | ||
| "description": "The task type" | ||
| }, | ||
| "inference_id": { | ||
| "type": "string", | ||
| "description": "The inference Id" | ||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,31 @@ | ||
| { | ||
| "inference.inference": { | ||
| "documentation": { | ||
| "url": "https://www.elastic.co/guide/en/elasticsearch/reference/master/post-inference-api.html", | ||
| "description": "Perform text embedding inference" | ||
| }, | ||
| "stability": "stable", | ||
| "visibility": "public", | ||
| "headers": { | ||
| "accept": ["application/json"], | ||
| "content_type": ["application/json"] | ||
| }, | ||
| "url": { | ||
| "paths": [ | ||
| { | ||
| "path": "/_inference/text_embedding/{inference_id}", | ||
| "methods": ["POST"], | ||
| "parts": { | ||
| "inference_id": { | ||
| "type": "string", | ||
| "description": "The inference Id" | ||
| } | ||
| } | ||
| } | ||
| ] | ||
| }, | ||
| "body": { | ||
| "description": "The inference payload" | ||
| } | ||
| } | ||
| } |
This file was deleted.
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -37,6 +37,14 @@ export class SparseEmbeddingResult { | |
| embedding: SparseVector | ||
| } | ||
|
|
||
| /** | ||
| * The response format for the sparse embedding request. | ||
| */ | ||
| export class SparseEmbeddingInferenceResult { | ||
| // TODO should we make this optional if we ever support multiple encoding types? So we can make it a variant | ||
| sparse_embedding: Array<SparseEmbeddingResult> | ||
|
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I could see us having a variant here for a different type of response (like byte encoding for text embedding). That would be returned using the same URL so it wouldn't be a new response. Should we make this a variant and make I suppose changing some from required to optional in the future would be a breaking change right?
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. For text embeddings, the pattern used in the Sparse would be the same:
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Actually when I make the sparse embedding type a variant, I get an error that indicates there must be multiple fields in the type to be able to leverage the variant type. So I think we can make this change when we need to. |
||
| } | ||
|
|
||
| /** | ||
| * Text Embedding results containing bytes are represented as Dense | ||
| * Vectors of bytes. | ||
|
|
@@ -57,13 +65,29 @@ export class TextEmbeddingResult { | |
| embedding: DenseVector | ||
| } | ||
|
|
||
| /** | ||
| * TextEmbeddingInferenceResult is an aggregation of mutually exclusive text_embedding variants | ||
| * @variants container | ||
| */ | ||
| export class TextEmbeddingInferenceResult { | ||
|
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Same thing here, one URL multiple response formats so keeping this as it was. |
||
| text_embedding_bytes?: Array<TextEmbeddingByteResult> | ||
| text_embedding?: Array<TextEmbeddingResult> | ||
| } | ||
|
|
||
| /** | ||
| * The completion result object | ||
| */ | ||
| export class CompletionResult { | ||
| result: string | ||
| } | ||
|
|
||
| /** | ||
| * Defines the completion result. | ||
| */ | ||
| export class CompletionInferenceResult { | ||
|
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I'm open to other ideas for naming the classes. |
||
| completion: Array<CompletionResult> | ||
| } | ||
|
|
||
| /** | ||
| * The rerank result object representing a single ranked document | ||
| * id: the original index of the document in the request | ||
|
|
@@ -76,6 +100,13 @@ export class RankedDocument { | |
| text?: string | ||
| } | ||
|
|
||
| /** | ||
| * Defines the response for a rerank request. | ||
| */ | ||
| export class RerankedInferenceResult { | ||
| rerank: Array<RankedDocument> | ||
| } | ||
|
|
||
| /** | ||
| * InferenceResult is an aggregation of mutually exclusive variants | ||
| * @variants container | ||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,62 @@ | ||
| /* | ||
| * Licensed to Elasticsearch B.V. under one or more contributor | ||
| * license agreements. See the NOTICE file distributed with | ||
| * this work for additional information regarding copyright | ||
| * ownership. Elasticsearch B.V. licenses this file to you under | ||
| * the Apache License, Version 2.0 (the "License"); you may | ||
| * not use this file except in compliance with the License. | ||
| * You may obtain a copy of the License at | ||
| * | ||
| * http://www.apache.org/licenses/LICENSE-2.0 | ||
| * | ||
| * Unless required by applicable law or agreed to in writing, | ||
| * software distributed under the License is distributed on an | ||
| * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY | ||
| * KIND, either express or implied. See the License for the | ||
| * specific language governing permissions and limitations | ||
| * under the License. | ||
| */ | ||
|
|
||
| import { TaskSettings } from '@inference/_types/Services' | ||
| import { RequestBase } from '@_types/Base' | ||
| import { Id } from '@_types/common' | ||
| import { Duration } from '@_types/Time' | ||
|
|
||
| /** | ||
| * Perform inference on the service | ||
| * @rest_spec_name inference.inference | ||
| * @availability stack since=8.11.0 stability=stable visibility=public | ||
| * @availability serverless stability=stable visibility=public | ||
| */ | ||
| export interface Request extends RequestBase { | ||
| urls: [ | ||
| { | ||
| path: '/_inference/completion/{inference_id}' | ||
| methods: ['POST'] | ||
| } | ||
| ] | ||
| path_parts: { | ||
| /** | ||
| * The inference Id | ||
| */ | ||
| inference_id: Id | ||
| } | ||
| query_parameters: { | ||
| /** | ||
| * Specifies the amount of time to wait for the inference request to complete. | ||
| * @server_default 30s | ||
| */ | ||
| timeout?: Duration | ||
| } | ||
| body: { | ||
| /** | ||
| * Inference input. | ||
| * Either a string or an array of strings. | ||
| */ | ||
| input: string | Array<string> | ||
| /** | ||
| * Optional task settings | ||
| */ | ||
| task_settings?: TaskSettings | ||
| } | ||
| } |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Shouldn't this be _stream
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
No, it does have
_unifiedat the end. I think technically we could remove it since the client code doesn't need the results to be in a specific format for SSE.