From 90de26ac3fb7972bec4d03395a46bbb4fad6341a Mon Sep 17 00:00:00 2001 From: Jan Kazlouski Date: Thu, 23 Oct 2025 14:12:25 +0000 Subject: [PATCH 1/4] Add support for multiple Google Model Garden providers for completion and chat_completion tasks --- output/openapi/elasticsearch-openapi.json | 16 +++- .../elasticsearch-serverless-openapi.json | 16 +++- output/schema/schema.json | 90 ++++++++++++------- output/typescript/types.ts | 2 +- specification/inference/_types/CommonTypes.ts | 6 +- .../PutGoogleVertexAiRequestExample5.yaml | 14 +++ .../PutGoogleVertexAiRequestExample6.yaml | 14 +++ 7 files changed, 121 insertions(+), 37 deletions(-) create mode 100644 specification/inference/put_googlevertexai/examples/request/PutGoogleVertexAiRequestExample5.yaml create mode 100644 specification/inference/put_googlevertexai/examples/request/PutGoogleVertexAiRequestExample6.yaml diff --git a/output/openapi/elasticsearch-openapi.json b/output/openapi/elasticsearch-openapi.json index 2d2b1bf4d2..aa5ce8f31e 100644 --- a/output/openapi/elasticsearch-openapi.json +++ b/output/openapi/elasticsearch-openapi.json @@ -22760,6 +22760,16 @@ "summary": "A chat_completion task for Google Model Garden Anthropic endpoint", "description": "Run `PUT _inference/chat_completion/google_model_garden_anthropic_chat_completion` to create an inference endpoint to perform a `chat_completion` task using Anthropic's model hosted on Google Model Garden.", "value": "{\n \"service\": \"googlevertexai\",\n \"service_settings\": {\n \"provider\": \"anthropic\",\n \"service_account_json\": \"service-account-json\",\n \"url\": \"https://url:rawPredict\",\n \"streaming_url\": \"https://streaming_url:streamRawPredict\"\n },\n \"task_settings\": {\n \"max_tokens\": 128\n }\n}" + }, + "PutGoogleVertexAiRequestExample5": { + "summary": "A completion task for Google Model Garden Meta endpoint with single URL provided", + "description": "Run `PUT _inference/completion/google_model_garden_meta_completion` to create an inference endpoint to perform a `completion` task using Meta's model hosted on Google Model Garden with single URL provided.", + "value": "{\n \"service\": \"googlevertexai\",\n \"service_settings\": {\n \"provider\": \"meta\",\n \"model_id\": \"meta/llama-3.3-70b-instruct-maas\",\n \"service_account_json\": \"service-account-json\",\n \"url\": \"https://url/openapi/chat/completions\"\n }\n}" + }, + "PutGoogleVertexAiRequestExample6": { + "summary": "A chat_completion task for Google Model Garden Meta endpoint with single streaming URL provided", + "description": "Run `PUT _inference/chat_completion/google_model_garden_meta_chat_completion` to create an inference endpoint to perform a `chat_completion` task using Meta's model hosted on Google Model Garden with single streaming URL provided.", + "value": "{\n \"service\": \"googlevertexai\",\n \"service_settings\": {\n \"provider\": \"meta\",\n \"model_id\": \"meta/llama-3.3-70b-instruct-maas\",\n \"service_account_json\": \"service-account-json\",\n \"streaming_url\": \"https://url/openapi/chat/completions\"\n }\n}" } } } @@ -103015,7 +103025,11 @@ "type": "string", "enum": [ "google", - "anthropic" + "anthropic", + "meta", + "hugging_face", + "mistral", + "ai21" ] }, "inference._types.GoogleVertexAITaskSettings": { diff --git a/output/openapi/elasticsearch-serverless-openapi.json b/output/openapi/elasticsearch-serverless-openapi.json index 7ce70fcc14..3821fbb172 100644 --- a/output/openapi/elasticsearch-serverless-openapi.json +++ b/output/openapi/elasticsearch-serverless-openapi.json @@ -13742,6 +13742,16 @@ "summary": "A chat_completion task for Google Model Garden Anthropic endpoint", "description": "Run `PUT _inference/chat_completion/google_model_garden_anthropic_chat_completion` to create an inference endpoint to perform a `chat_completion` task using Anthropic's model hosted on Google Model Garden.", "value": "{\n \"service\": \"googlevertexai\",\n \"service_settings\": {\n \"provider\": \"anthropic\",\n \"service_account_json\": \"service-account-json\",\n \"url\": \"https://url:rawPredict\",\n \"streaming_url\": \"https://streaming_url:streamRawPredict\"\n },\n \"task_settings\": {\n \"max_tokens\": 128\n }\n}" + }, + "PutGoogleVertexAiRequestExample5": { + "summary": "A completion task for Google Model Garden Meta endpoint with single URL provided", + "description": "Run `PUT _inference/completion/google_model_garden_meta_completion` to create an inference endpoint to perform a `completion` task using Meta's model hosted on Google Model Garden with single URL provided.", + "value": "{\n \"service\": \"googlevertexai\",\n \"service_settings\": {\n \"provider\": \"meta\",\n \"model_id\": \"meta/llama-3.3-70b-instruct-maas\",\n \"service_account_json\": \"service-account-json\",\n \"url\": \"https://url/openapi/chat/completions\"\n }\n}" + }, + "PutGoogleVertexAiRequestExample6": { + "summary": "A chat_completion task for Google Model Garden Meta endpoint with single streaming URL provided", + "description": "Run `PUT _inference/chat_completion/google_model_garden_meta_chat_completion` to create an inference endpoint to perform a `chat_completion` task using Meta's model hosted on Google Model Garden with single streaming URL provided.", + "value": "{\n \"service\": \"googlevertexai\",\n \"service_settings\": {\n \"provider\": \"meta\",\n \"model_id\": \"meta/llama-3.3-70b-instruct-maas\",\n \"service_account_json\": \"service-account-json\",\n \"streaming_url\": \"https://url/openapi/chat/completions\"\n }\n}" } } } @@ -67095,7 +67105,11 @@ "type": "string", "enum": [ "google", - "anthropic" + "anthropic", + "meta", + "hugging_face", + "mistral", + "ai21" ] }, "inference._types.GoogleVertexAITaskSettings": { diff --git a/output/schema/schema.json b/output/schema/schema.json index 3081b4bcce..fac8f24cbd 100644 --- a/output/schema/schema.json +++ b/output/schema/schema.json @@ -173219,13 +173219,25 @@ }, { "name": "anthropic" + }, + { + "name": "meta" + }, + { + "name": "hugging_face" + }, + { + "name": "mistral" + }, + { + "name": "ai21" } ], "name": { "name": "GoogleModelGardenProvider", "namespace": "inference._types" }, - "specLocation": "inference/_types/CommonTypes.ts#L1498-L1501" + "specLocation": "inference/_types/CommonTypes.ts#L1498-L1505" }, { "kind": "interface", @@ -173362,7 +173374,7 @@ "name": "GoogleVertexAIServiceType", "namespace": "inference._types" }, - "specLocation": "inference/_types/CommonTypes.ts#L1542-L1544" + "specLocation": "inference/_types/CommonTypes.ts#L1546-L1548" }, { "kind": "interface", @@ -173424,7 +173436,7 @@ } } ], - "specLocation": "inference/_types/CommonTypes.ts#L1503-L1526" + "specLocation": "inference/_types/CommonTypes.ts#L1507-L1530" }, { "kind": "enum", @@ -173446,7 +173458,7 @@ "name": "GoogleVertexAITaskType", "namespace": "inference._types" }, - "specLocation": "inference/_types/CommonTypes.ts#L1535-L1540" + "specLocation": "inference/_types/CommonTypes.ts#L1539-L1544" }, { "kind": "interface", @@ -173508,7 +173520,7 @@ } } ], - "specLocation": "inference/_types/CommonTypes.ts#L1546-L1578" + "specLocation": "inference/_types/CommonTypes.ts#L1550-L1582" }, { "kind": "enum", @@ -173521,7 +173533,7 @@ "name": "HuggingFaceServiceType", "namespace": "inference._types" }, - "specLocation": "inference/_types/CommonTypes.ts#L1599-L1601" + "specLocation": "inference/_types/CommonTypes.ts#L1603-L1605" }, { "kind": "interface", @@ -173555,7 +173567,7 @@ } } ], - "specLocation": "inference/_types/CommonTypes.ts#L1580-L1590" + "specLocation": "inference/_types/CommonTypes.ts#L1584-L1594" }, { "kind": "enum", @@ -173577,7 +173589,7 @@ "name": "HuggingFaceTaskType", "namespace": "inference._types" }, - "specLocation": "inference/_types/CommonTypes.ts#L1592-L1597" + "specLocation": "inference/_types/CommonTypes.ts#L1596-L1601" }, { "kind": "interface", @@ -174809,7 +174821,7 @@ } } ], - "specLocation": "inference/_types/CommonTypes.ts#L1603-L1632" + "specLocation": "inference/_types/CommonTypes.ts#L1607-L1636" }, { "kind": "enum", @@ -174822,7 +174834,7 @@ "name": "JinaAIServiceType", "namespace": "inference._types" }, - "specLocation": "inference/_types/CommonTypes.ts#L1662-L1664" + "specLocation": "inference/_types/CommonTypes.ts#L1666-L1668" }, { "kind": "enum", @@ -174841,7 +174853,7 @@ "name": "JinaAISimilarityType", "namespace": "inference._types" }, - "specLocation": "inference/_types/CommonTypes.ts#L1666-L1670" + "specLocation": "inference/_types/CommonTypes.ts#L1670-L1674" }, { "kind": "interface", @@ -174887,7 +174899,7 @@ } } ], - "specLocation": "inference/_types/CommonTypes.ts#L1634-L1655" + "specLocation": "inference/_types/CommonTypes.ts#L1638-L1659" }, { "kind": "enum", @@ -174903,7 +174915,7 @@ "name": "JinaAITaskType", "namespace": "inference._types" }, - "specLocation": "inference/_types/CommonTypes.ts#L1657-L1660" + "specLocation": "inference/_types/CommonTypes.ts#L1661-L1664" }, { "kind": "enum", @@ -174925,7 +174937,7 @@ "name": "JinaAITextEmbeddingTask", "namespace": "inference._types" }, - "specLocation": "inference/_types/CommonTypes.ts#L1672-L1677" + "specLocation": "inference/_types/CommonTypes.ts#L1676-L1681" }, { "kind": "interface", @@ -174997,7 +175009,7 @@ } } ], - "specLocation": "inference/_types/CommonTypes.ts#L1679-L1709" + "specLocation": "inference/_types/CommonTypes.ts#L1683-L1713" }, { "kind": "enum", @@ -175010,7 +175022,7 @@ "name": "LlamaServiceType", "namespace": "inference._types" }, - "specLocation": "inference/_types/CommonTypes.ts#L1717-L1719" + "specLocation": "inference/_types/CommonTypes.ts#L1721-L1723" }, { "kind": "enum", @@ -175029,7 +175041,7 @@ "name": "LlamaSimilarityType", "namespace": "inference._types" }, - "specLocation": "inference/_types/CommonTypes.ts#L1721-L1725" + "specLocation": "inference/_types/CommonTypes.ts#L1725-L1729" }, { "kind": "enum", @@ -175048,7 +175060,7 @@ "name": "LlamaTaskType", "namespace": "inference._types" }, - "specLocation": "inference/_types/CommonTypes.ts#L1711-L1715" + "specLocation": "inference/_types/CommonTypes.ts#L1715-L1719" }, { "kind": "interface", @@ -175206,7 +175218,7 @@ } } ], - "specLocation": "inference/_types/CommonTypes.ts#L1727-L1754" + "specLocation": "inference/_types/CommonTypes.ts#L1731-L1758" }, { "kind": "enum", @@ -175219,7 +175231,7 @@ "name": "MistralServiceType", "namespace": "inference._types" }, - "specLocation": "inference/_types/CommonTypes.ts#L1762-L1764" + "specLocation": "inference/_types/CommonTypes.ts#L1766-L1768" }, { "kind": "enum", @@ -175238,7 +175250,7 @@ "name": "MistralTaskType", "namespace": "inference._types" }, - "specLocation": "inference/_types/CommonTypes.ts#L1756-L1760" + "specLocation": "inference/_types/CommonTypes.ts#L1760-L1764" }, { "kind": "interface", @@ -175325,7 +175337,7 @@ } } ], - "specLocation": "inference/_types/CommonTypes.ts#L1766-L1808" + "specLocation": "inference/_types/CommonTypes.ts#L1770-L1812" }, { "kind": "enum", @@ -175338,7 +175350,7 @@ "name": "OpenAIServiceType", "namespace": "inference._types" }, - "specLocation": "inference/_types/CommonTypes.ts#L1835-L1837" + "specLocation": "inference/_types/CommonTypes.ts#L1839-L1841" }, { "kind": "interface", @@ -175368,7 +175380,7 @@ } } ], - "specLocation": "inference/_types/CommonTypes.ts#L1810-L1827" + "specLocation": "inference/_types/CommonTypes.ts#L1814-L1831" }, { "kind": "enum", @@ -175387,7 +175399,7 @@ "name": "OpenAITaskType", "namespace": "inference._types" }, - "specLocation": "inference/_types/CommonTypes.ts#L1829-L1833" + "specLocation": "inference/_types/CommonTypes.ts#L1833-L1837" }, { "kind": "interface", @@ -176233,7 +176245,7 @@ } } ], - "specLocation": "inference/_types/CommonTypes.ts#L1528-L1533" + "specLocation": "inference/_types/CommonTypes.ts#L1532-L1537" }, { "kind": "interface", @@ -176379,7 +176391,7 @@ } } ], - "specLocation": "inference/_types/CommonTypes.ts#L1839-L1870" + "specLocation": "inference/_types/CommonTypes.ts#L1843-L1874" }, { "kind": "enum", @@ -176392,7 +176404,7 @@ "name": "VoyageAIServiceType", "namespace": "inference._types" }, - "specLocation": "inference/_types/CommonTypes.ts#L1903-L1905" + "specLocation": "inference/_types/CommonTypes.ts#L1907-L1909" }, { "kind": "interface", @@ -176452,7 +176464,7 @@ } } ], - "specLocation": "inference/_types/CommonTypes.ts#L1872-L1896" + "specLocation": "inference/_types/CommonTypes.ts#L1876-L1900" }, { "kind": "enum", @@ -176468,7 +176480,7 @@ "name": "VoyageAITaskType", "namespace": "inference._types" }, - "specLocation": "inference/_types/CommonTypes.ts#L1898-L1901" + "specLocation": "inference/_types/CommonTypes.ts#L1902-L1905" }, { "kind": "interface", @@ -176556,7 +176568,7 @@ } } ], - "specLocation": "inference/_types/CommonTypes.ts#L1907-L1945" + "specLocation": "inference/_types/CommonTypes.ts#L1911-L1949" }, { "kind": "enum", @@ -176569,7 +176581,7 @@ "name": "WatsonxServiceType", "namespace": "inference._types" }, - "specLocation": "inference/_types/CommonTypes.ts#L1953-L1955" + "specLocation": "inference/_types/CommonTypes.ts#L1957-L1959" }, { "kind": "enum", @@ -176588,7 +176600,7 @@ "name": "WatsonxTaskType", "namespace": "inference._types" }, - "specLocation": "inference/_types/CommonTypes.ts#L1947-L1951" + "specLocation": "inference/_types/CommonTypes.ts#L1951-L1955" }, { "kind": "request", @@ -180361,6 +180373,18 @@ "method_request": "PUT _inference/chat_completion/google_model_garden_anthropic_chat_completion", "summary": "A chat_completion task for Google Model Garden Anthropic endpoint", "value": "{\n \"service\": \"googlevertexai\",\n \"service_settings\": {\n \"provider\": \"anthropic\",\n \"service_account_json\": \"service-account-json\",\n \"url\": \"https://url:rawPredict\",\n \"streaming_url\": \"https://streaming_url:streamRawPredict\"\n },\n \"task_settings\": {\n \"max_tokens\": 128\n }\n}" + }, + "PutGoogleVertexAiRequestExample5": { + "description": "Run `PUT _inference/completion/google_model_garden_meta_completion` to create an inference endpoint to perform a `completion` task using Meta's model hosted on Google Model Garden with single URL provided.", + "method_request": "PUT _inference/completion/google_model_garden_meta_completion", + "summary": "A completion task for Google Model Garden Meta endpoint with single URL provided", + "value": "{\n \"service\": \"googlevertexai\",\n \"service_settings\": {\n \"provider\": \"meta\",\n \"model_id\": \"meta/llama-3.3-70b-instruct-maas\",\n \"service_account_json\": \"service-account-json\",\n \"url\": \"https://url/openapi/chat/completions\"\n }\n}" + }, + "PutGoogleVertexAiRequestExample6": { + "description": "Run `PUT _inference/chat_completion/google_model_garden_meta_chat_completion` to create an inference endpoint to perform a `chat_completion` task using Meta's model hosted on Google Model Garden with single streaming URL provided.", + "method_request": "PUT _inference/chat_completion/google_model_garden_meta_chat_completion", + "summary": "A chat_completion task for Google Model Garden Meta endpoint with single streaming URL provided", + "value": "{\n \"service\": \"googlevertexai\",\n \"service_settings\": {\n \"provider\": \"meta\",\n \"model_id\": \"meta/llama-3.3-70b-instruct-maas\",\n \"service_account_json\": \"service-account-json\",\n \"streaming_url\": \"https://url/openapi/chat/completions\"\n }\n}" } }, "inherits": { diff --git a/output/typescript/types.ts b/output/typescript/types.ts index f04b4076bc..2a93c1c4e7 100644 --- a/output/typescript/types.ts +++ b/output/typescript/types.ts @@ -14248,7 +14248,7 @@ export interface InferenceGoogleAiStudioServiceSettings { export type InferenceGoogleAiStudioTaskType = 'completion' | 'text_embedding' -export type InferenceGoogleModelGardenProvider = 'google' | 'anthropic' +export type InferenceGoogleModelGardenProvider = 'google' | 'anthropic' | 'meta' | 'hugging_face' | 'mistral' | 'ai21' export interface InferenceGoogleVertexAIServiceSettings { provider?: InferenceGoogleModelGardenProvider diff --git a/specification/inference/_types/CommonTypes.ts b/specification/inference/_types/CommonTypes.ts index 10f45fd4d7..91cb66d192 100644 --- a/specification/inference/_types/CommonTypes.ts +++ b/specification/inference/_types/CommonTypes.ts @@ -1497,7 +1497,11 @@ export class GoogleVertexAIServiceSettings { export enum GoogleModelGardenProvider { google, - anthropic + anthropic, + meta, + hugging_face, + mistral, + ai21 } export class GoogleVertexAITaskSettings { diff --git a/specification/inference/put_googlevertexai/examples/request/PutGoogleVertexAiRequestExample5.yaml b/specification/inference/put_googlevertexai/examples/request/PutGoogleVertexAiRequestExample5.yaml new file mode 100644 index 0000000000..750fc0eaf7 --- /dev/null +++ b/specification/inference/put_googlevertexai/examples/request/PutGoogleVertexAiRequestExample5.yaml @@ -0,0 +1,14 @@ +summary: A completion task for Google Model Garden Meta endpoint with single URL provided +description: Run `PUT _inference/completion/google_model_garden_meta_completion` to create an inference endpoint to perform a `completion` task using Meta's model hosted on Google Model Garden with single URL provided. +method_request: 'PUT _inference/completion/google_model_garden_meta_completion' +# type: "request" +value: |- + { + "service": "googlevertexai", + "service_settings": { + "provider": "meta", + "model_id": "meta/llama-3.3-70b-instruct-maas", + "service_account_json": "service-account-json", + "url": "https://url/openapi/chat/completions" + } + } diff --git a/specification/inference/put_googlevertexai/examples/request/PutGoogleVertexAiRequestExample6.yaml b/specification/inference/put_googlevertexai/examples/request/PutGoogleVertexAiRequestExample6.yaml new file mode 100644 index 0000000000..8d38a23ca5 --- /dev/null +++ b/specification/inference/put_googlevertexai/examples/request/PutGoogleVertexAiRequestExample6.yaml @@ -0,0 +1,14 @@ +summary: A chat_completion task for Google Model Garden Meta endpoint with single streaming URL provided +description: Run `PUT _inference/chat_completion/google_model_garden_meta_chat_completion` to create an inference endpoint to perform a `chat_completion` task using Meta's model hosted on Google Model Garden with single streaming URL provided. +method_request: 'PUT _inference/chat_completion/google_model_garden_meta_chat_completion' +# type: "request" +value: |- + { + "service": "googlevertexai", + "service_settings": { + "provider": "meta", + "model_id": "meta/llama-3.3-70b-instruct-maas", + "service_account_json": "service-account-json", + "streaming_url": "https://url/openapi/chat/completions" + } + } From 8c9c4ef49b54bfc4226ae38309b5f609eee92b69 Mon Sep 17 00:00:00 2001 From: Jan Kazlouski Date: Fri, 24 Oct 2025 11:52:11 +0000 Subject: [PATCH 2/4] Add chat_completion and completion task examples for various Google Model Garden providers with single and separate streaming URLs --- output/openapi/elasticsearch-openapi.json | 44 +++++++++++++--- .../elasticsearch-serverless-openapi.json | 44 +++++++++++++--- output/schema/schema.json | 50 ++++++++++++++++--- .../PutGoogleVertexAiRequestExample10.yaml | 14 ++++++ .../PutGoogleVertexAiRequestExample11.yaml | 14 ++++++ .../PutGoogleVertexAiRequestExample12.yaml | 13 +++++ .../PutGoogleVertexAiRequestExample3.yaml | 4 +- .../PutGoogleVertexAiRequestExample4.yaml | 5 +- .../PutGoogleVertexAiRequestExample5.yaml | 4 +- .../PutGoogleVertexAiRequestExample7.yaml | 13 +++++ .../PutGoogleVertexAiRequestExample8.yaml | 13 +++++ .../PutGoogleVertexAiRequestExample9.yaml | 15 ++++++ 12 files changed, 205 insertions(+), 28 deletions(-) create mode 100644 specification/inference/put_googlevertexai/examples/request/PutGoogleVertexAiRequestExample10.yaml create mode 100644 specification/inference/put_googlevertexai/examples/request/PutGoogleVertexAiRequestExample11.yaml create mode 100644 specification/inference/put_googlevertexai/examples/request/PutGoogleVertexAiRequestExample12.yaml create mode 100644 specification/inference/put_googlevertexai/examples/request/PutGoogleVertexAiRequestExample7.yaml create mode 100644 specification/inference/put_googlevertexai/examples/request/PutGoogleVertexAiRequestExample8.yaml create mode 100644 specification/inference/put_googlevertexai/examples/request/PutGoogleVertexAiRequestExample9.yaml diff --git a/output/openapi/elasticsearch-openapi.json b/output/openapi/elasticsearch-openapi.json index 174d8158e4..b038c6b679 100644 --- a/output/openapi/elasticsearch-openapi.json +++ b/output/openapi/elasticsearch-openapi.json @@ -22746,30 +22746,60 @@ "description": "Run `PUT _inference/text_embedding/google_vertex_ai_embeddings` to create an inference endpoint to perform a `text_embedding` task type.", "value": "{\n \"service\": \"googlevertexai\",\n \"service_settings\": {\n \"service_account_json\": \"service-account-json\",\n \"model_id\": \"model-id\",\n \"location\": \"location\",\n \"project_id\": \"project-id\"\n }\n}" }, + "PutGoogleVertexAiRequestExample10": { + "summary": "A chat_completion task for Google Model Garden Mistral endpoint with single streaming URL provided", + "description": "Run `PUT _inference/chat_completion/google_model_garden_mistral_chat_completion` to create an inference endpoint to perform a `chat_completion` task using Mistral's model hosted on Google Model Garden with single streaming URL provided.", + "value": "{\n \"service\": \"googlevertexai\",\n \"service_settings\": {\n \"provider\": \"mistral\",\n \"model_id\": \"mistral-small-2503\",\n \"service_account_json\": \"service-account-json\",\n \"streaming_url\": \"https://url:streamRawPredict\"\n }\n}" + }, + "PutGoogleVertexAiRequestExample11": { + "summary": "A completion task for Google Model Garden AI21 endpoint with separate URLs for streaming and non-streaming tasks", + "description": "Run `PUT _inference/completion/google_model_garden_ai21_completion` to create an inference endpoint to perform a `completion` task using AI21's model hosted on Google Model Garden with separate URLs for streaming and non-streaming tasks.", + "value": "{\n \"service\": \"googlevertexai\",\n \"service_settings\": {\n \"provider\": \"ai21\",\n \"service_account_json\": \"service-account-json\",\n \"url\": \"https://url:rawPredict\",\n \"streaming_url\": \"https://url:streamRawPredict\"\n }\n}" + }, + "PutGoogleVertexAiRequestExample12": { + "summary": "A chat_completion task for Google Model Garden AI21 endpoint with single streaming URL provided", + "description": "Run `PUT _inference/chat_completion/google_model_garden_ai21_chat_completion` to create an inference endpoint to perform a `chat_completion` task using AI21's model hosted on Google Model Garden with single streaming URL provided.", + "value": "{\n \"service\": \"googlevertexai\",\n \"service_settings\": {\n \"provider\": \"ai21\",\n \"service_account_json\": \"service-account-json\",\n \"streaming_url\": \"https://url:streamRawPredict\"\n }\n}" + }, "PutGoogleVertexAiRequestExample2": { "summary": "A rerank task", "description": "Run `PUT _inference/rerank/google_vertex_ai_rerank` to create an inference endpoint to perform a `rerank` task type.", "value": "{\n \"service\": \"googlevertexai\",\n \"service_settings\": {\n \"service_account_json\": \"service-account-json\",\n \"project_id\": \"project-id\"\n }\n}" }, "PutGoogleVertexAiRequestExample3": { - "summary": "A completion task for Google Model Garden Anthropic endpoint", - "description": "Run `PUT _inference/completion/google_model_garden_anthropic_completion` to create an inference endpoint to perform a `completion` task using Anthropic's model hosted on Google Model Garden.", + "summary": "A completion task for Google Model Garden Anthropic endpoint with separate URLs for streaming and non-streaming tasks", + "description": "Run `PUT _inference/completion/google_model_garden_anthropic_completion` to create an inference endpoint to perform a `completion` task using Anthropic's model hosted on Google Model Garden with separate URLs for streaming and non-streaming tasks.", "value": "{\n \"service\": \"googlevertexai\",\n \"service_settings\": {\n \"provider\": \"anthropic\",\n \"service_account_json\": \"service-account-json\",\n \"url\": \"https://url:rawPredict\",\n \"streaming_url\": \"https://streaming_url:streamRawPredict\"\n },\n \"task_settings\": {\n \"max_tokens\": 128\n }\n}" }, "PutGoogleVertexAiRequestExample4": { - "summary": "A chat_completion task for Google Model Garden Anthropic endpoint", - "description": "Run `PUT _inference/chat_completion/google_model_garden_anthropic_chat_completion` to create an inference endpoint to perform a `chat_completion` task using Anthropic's model hosted on Google Model Garden.", - "value": "{\n \"service\": \"googlevertexai\",\n \"service_settings\": {\n \"provider\": \"anthropic\",\n \"service_account_json\": \"service-account-json\",\n \"url\": \"https://url:rawPredict\",\n \"streaming_url\": \"https://streaming_url:streamRawPredict\"\n },\n \"task_settings\": {\n \"max_tokens\": 128\n }\n}" + "summary": "A chat_completion task for Google Model Garden Anthropic endpoint with single streaming URL provided", + "description": "Run `PUT _inference/chat_completion/google_model_garden_anthropic_chat_completion` to create an inference endpoint to perform a `chat_completion` task using Anthropic's model hosted on Google Model Garden with single streaming URL provided.", + "value": "{\n \"service\": \"googlevertexai\",\n \"service_settings\": {\n \"provider\": \"anthropic\",\n \"service_account_json\": \"service-account-json\",\n \"streaming_url\": \"https://streaming_url:streamRawPredict\"\n },\n \"task_settings\": {\n \"max_tokens\": 128\n }\n}" }, "PutGoogleVertexAiRequestExample5": { - "summary": "A completion task for Google Model Garden Meta endpoint with single URL provided", - "description": "Run `PUT _inference/completion/google_model_garden_meta_completion` to create an inference endpoint to perform a `completion` task using Meta's model hosted on Google Model Garden with single URL provided.", + "summary": "A completion task for Google Model Garden Meta endpoint with single URL provided for both streaming and non-streaming tasks", + "description": "Run `PUT _inference/completion/google_model_garden_meta_completion` to create an inference endpoint to perform a `completion` task using Meta's model hosted on Google Model Garden with single URL provided for both streaming and non-streaming tasks.", "value": "{\n \"service\": \"googlevertexai\",\n \"service_settings\": {\n \"provider\": \"meta\",\n \"model_id\": \"meta/llama-3.3-70b-instruct-maas\",\n \"service_account_json\": \"service-account-json\",\n \"url\": \"https://url/openapi/chat/completions\"\n }\n}" }, "PutGoogleVertexAiRequestExample6": { "summary": "A chat_completion task for Google Model Garden Meta endpoint with single streaming URL provided", "description": "Run `PUT _inference/chat_completion/google_model_garden_meta_chat_completion` to create an inference endpoint to perform a `chat_completion` task using Meta's model hosted on Google Model Garden with single streaming URL provided.", "value": "{\n \"service\": \"googlevertexai\",\n \"service_settings\": {\n \"provider\": \"meta\",\n \"model_id\": \"meta/llama-3.3-70b-instruct-maas\",\n \"service_account_json\": \"service-account-json\",\n \"streaming_url\": \"https://url/openapi/chat/completions\"\n }\n}" + }, + "PutGoogleVertexAiRequestExample7": { + "summary": "A completion task for Google Model Garden Hugging Face endpoint with single URL provided for both streaming and non-streaming tasks", + "description": "Run `PUT _inference/completion/google_model_garden_hugging_face_completion` to create an inference endpoint to perform a `completion` task using Hugging Face's model hosted on Google Model Garden with single URL provided for both streaming and non-streaming tasks.", + "value": "{\n \"service\": \"googlevertexai\",\n \"service_settings\": {\n \"provider\": \"hugging_face\",\n \"service_account_json\": \"service-account-json\",\n \"url\": \"https://url/chat/completions\"\n }\n}" + }, + "PutGoogleVertexAiRequestExample8": { + "summary": "A chat_completion task for Google Model Garden Hugging Face endpoint with single streaming URL provided", + "description": "Run `PUT _inference/chat_completion/google_model_garden_hugging_face_chat_completion` to create an inference endpoint to perform a `chat_completion` task using Hugging Face's model hosted on Google Model Garden with single streaming URL provided.", + "value": "{\n \"service\": \"googlevertexai\",\n \"service_settings\": {\n \"provider\": \"hugging_face\",\n \"service_account_json\": \"service-account-json\",\n \"streaming_url\": \"https://url/chat/completions\"\n }\n}" + }, + "PutGoogleVertexAiRequestExample9": { + "summary": "A completion task for Google Model Garden Mistral endpoint with separate URLs for streaming and non-streaming tasks", + "description": "Run `PUT _inference/completion/google_model_garden_mistral_completion` to create an inference endpoint to perform a `completion` task using Mistral's model hosted on Google Model Garden with separate URLs for streaming and non-streaming tasks.", + "value": "{\n \"service\": \"googlevertexai\",\n \"service_settings\": {\n \"provider\": \"mistral\",\n \"model_id\": \"mistral-small-2503\",\n \"service_account_json\": \"service-account-json\",\n \"url\": \"https://url:rawPredict\",\n \"streaming_url\": \"https://url:streamRawPredict\"\n }\n}" } } } diff --git a/output/openapi/elasticsearch-serverless-openapi.json b/output/openapi/elasticsearch-serverless-openapi.json index 4126e46f68..4924a1db9b 100644 --- a/output/openapi/elasticsearch-serverless-openapi.json +++ b/output/openapi/elasticsearch-serverless-openapi.json @@ -13728,30 +13728,60 @@ "description": "Run `PUT _inference/text_embedding/google_vertex_ai_embeddings` to create an inference endpoint to perform a `text_embedding` task type.", "value": "{\n \"service\": \"googlevertexai\",\n \"service_settings\": {\n \"service_account_json\": \"service-account-json\",\n \"model_id\": \"model-id\",\n \"location\": \"location\",\n \"project_id\": \"project-id\"\n }\n}" }, + "PutGoogleVertexAiRequestExample10": { + "summary": "A chat_completion task for Google Model Garden Mistral endpoint with single streaming URL provided", + "description": "Run `PUT _inference/chat_completion/google_model_garden_mistral_chat_completion` to create an inference endpoint to perform a `chat_completion` task using Mistral's model hosted on Google Model Garden with single streaming URL provided.", + "value": "{\n \"service\": \"googlevertexai\",\n \"service_settings\": {\n \"provider\": \"mistral\",\n \"model_id\": \"mistral-small-2503\",\n \"service_account_json\": \"service-account-json\",\n \"streaming_url\": \"https://url:streamRawPredict\"\n }\n}" + }, + "PutGoogleVertexAiRequestExample11": { + "summary": "A completion task for Google Model Garden AI21 endpoint with separate URLs for streaming and non-streaming tasks", + "description": "Run `PUT _inference/completion/google_model_garden_ai21_completion` to create an inference endpoint to perform a `completion` task using AI21's model hosted on Google Model Garden with separate URLs for streaming and non-streaming tasks.", + "value": "{\n \"service\": \"googlevertexai\",\n \"service_settings\": {\n \"provider\": \"ai21\",\n \"service_account_json\": \"service-account-json\",\n \"url\": \"https://url:rawPredict\",\n \"streaming_url\": \"https://url:streamRawPredict\"\n }\n}" + }, + "PutGoogleVertexAiRequestExample12": { + "summary": "A chat_completion task for Google Model Garden AI21 endpoint with single streaming URL provided", + "description": "Run `PUT _inference/chat_completion/google_model_garden_ai21_chat_completion` to create an inference endpoint to perform a `chat_completion` task using AI21's model hosted on Google Model Garden with single streaming URL provided.", + "value": "{\n \"service\": \"googlevertexai\",\n \"service_settings\": {\n \"provider\": \"ai21\",\n \"service_account_json\": \"service-account-json\",\n \"streaming_url\": \"https://url:streamRawPredict\"\n }\n}" + }, "PutGoogleVertexAiRequestExample2": { "summary": "A rerank task", "description": "Run `PUT _inference/rerank/google_vertex_ai_rerank` to create an inference endpoint to perform a `rerank` task type.", "value": "{\n \"service\": \"googlevertexai\",\n \"service_settings\": {\n \"service_account_json\": \"service-account-json\",\n \"project_id\": \"project-id\"\n }\n}" }, "PutGoogleVertexAiRequestExample3": { - "summary": "A completion task for Google Model Garden Anthropic endpoint", - "description": "Run `PUT _inference/completion/google_model_garden_anthropic_completion` to create an inference endpoint to perform a `completion` task using Anthropic's model hosted on Google Model Garden.", + "summary": "A completion task for Google Model Garden Anthropic endpoint with separate URLs for streaming and non-streaming tasks", + "description": "Run `PUT _inference/completion/google_model_garden_anthropic_completion` to create an inference endpoint to perform a `completion` task using Anthropic's model hosted on Google Model Garden with separate URLs for streaming and non-streaming tasks.", "value": "{\n \"service\": \"googlevertexai\",\n \"service_settings\": {\n \"provider\": \"anthropic\",\n \"service_account_json\": \"service-account-json\",\n \"url\": \"https://url:rawPredict\",\n \"streaming_url\": \"https://streaming_url:streamRawPredict\"\n },\n \"task_settings\": {\n \"max_tokens\": 128\n }\n}" }, "PutGoogleVertexAiRequestExample4": { - "summary": "A chat_completion task for Google Model Garden Anthropic endpoint", - "description": "Run `PUT _inference/chat_completion/google_model_garden_anthropic_chat_completion` to create an inference endpoint to perform a `chat_completion` task using Anthropic's model hosted on Google Model Garden.", - "value": "{\n \"service\": \"googlevertexai\",\n \"service_settings\": {\n \"provider\": \"anthropic\",\n \"service_account_json\": \"service-account-json\",\n \"url\": \"https://url:rawPredict\",\n \"streaming_url\": \"https://streaming_url:streamRawPredict\"\n },\n \"task_settings\": {\n \"max_tokens\": 128\n }\n}" + "summary": "A chat_completion task for Google Model Garden Anthropic endpoint with single streaming URL provided", + "description": "Run `PUT _inference/chat_completion/google_model_garden_anthropic_chat_completion` to create an inference endpoint to perform a `chat_completion` task using Anthropic's model hosted on Google Model Garden with single streaming URL provided.", + "value": "{\n \"service\": \"googlevertexai\",\n \"service_settings\": {\n \"provider\": \"anthropic\",\n \"service_account_json\": \"service-account-json\",\n \"streaming_url\": \"https://streaming_url:streamRawPredict\"\n },\n \"task_settings\": {\n \"max_tokens\": 128\n }\n}" }, "PutGoogleVertexAiRequestExample5": { - "summary": "A completion task for Google Model Garden Meta endpoint with single URL provided", - "description": "Run `PUT _inference/completion/google_model_garden_meta_completion` to create an inference endpoint to perform a `completion` task using Meta's model hosted on Google Model Garden with single URL provided.", + "summary": "A completion task for Google Model Garden Meta endpoint with single URL provided for both streaming and non-streaming tasks", + "description": "Run `PUT _inference/completion/google_model_garden_meta_completion` to create an inference endpoint to perform a `completion` task using Meta's model hosted on Google Model Garden with single URL provided for both streaming and non-streaming tasks.", "value": "{\n \"service\": \"googlevertexai\",\n \"service_settings\": {\n \"provider\": \"meta\",\n \"model_id\": \"meta/llama-3.3-70b-instruct-maas\",\n \"service_account_json\": \"service-account-json\",\n \"url\": \"https://url/openapi/chat/completions\"\n }\n}" }, "PutGoogleVertexAiRequestExample6": { "summary": "A chat_completion task for Google Model Garden Meta endpoint with single streaming URL provided", "description": "Run `PUT _inference/chat_completion/google_model_garden_meta_chat_completion` to create an inference endpoint to perform a `chat_completion` task using Meta's model hosted on Google Model Garden with single streaming URL provided.", "value": "{\n \"service\": \"googlevertexai\",\n \"service_settings\": {\n \"provider\": \"meta\",\n \"model_id\": \"meta/llama-3.3-70b-instruct-maas\",\n \"service_account_json\": \"service-account-json\",\n \"streaming_url\": \"https://url/openapi/chat/completions\"\n }\n}" + }, + "PutGoogleVertexAiRequestExample7": { + "summary": "A completion task for Google Model Garden Hugging Face endpoint with single URL provided for both streaming and non-streaming tasks", + "description": "Run `PUT _inference/completion/google_model_garden_hugging_face_completion` to create an inference endpoint to perform a `completion` task using Hugging Face's model hosted on Google Model Garden with single URL provided for both streaming and non-streaming tasks.", + "value": "{\n \"service\": \"googlevertexai\",\n \"service_settings\": {\n \"provider\": \"hugging_face\",\n \"service_account_json\": \"service-account-json\",\n \"url\": \"https://url/chat/completions\"\n }\n}" + }, + "PutGoogleVertexAiRequestExample8": { + "summary": "A chat_completion task for Google Model Garden Hugging Face endpoint with single streaming URL provided", + "description": "Run `PUT _inference/chat_completion/google_model_garden_hugging_face_chat_completion` to create an inference endpoint to perform a `chat_completion` task using Hugging Face's model hosted on Google Model Garden with single streaming URL provided.", + "value": "{\n \"service\": \"googlevertexai\",\n \"service_settings\": {\n \"provider\": \"hugging_face\",\n \"service_account_json\": \"service-account-json\",\n \"streaming_url\": \"https://url/chat/completions\"\n }\n}" + }, + "PutGoogleVertexAiRequestExample9": { + "summary": "A completion task for Google Model Garden Mistral endpoint with separate URLs for streaming and non-streaming tasks", + "description": "Run `PUT _inference/completion/google_model_garden_mistral_completion` to create an inference endpoint to perform a `completion` task using Mistral's model hosted on Google Model Garden with separate URLs for streaming and non-streaming tasks.", + "value": "{\n \"service\": \"googlevertexai\",\n \"service_settings\": {\n \"provider\": \"mistral\",\n \"model_id\": \"mistral-small-2503\",\n \"service_account_json\": \"service-account-json\",\n \"url\": \"https://url:rawPredict\",\n \"streaming_url\": \"https://url:streamRawPredict\"\n }\n}" } } } diff --git a/output/schema/schema.json b/output/schema/schema.json index 285d8bd8a8..9b86d2519b 100644 --- a/output/schema/schema.json +++ b/output/schema/schema.json @@ -180441,6 +180441,24 @@ "summary": "A text embedding task", "value": "{\n \"service\": \"googlevertexai\",\n \"service_settings\": {\n \"service_account_json\": \"service-account-json\",\n \"model_id\": \"model-id\",\n \"location\": \"location\",\n \"project_id\": \"project-id\"\n }\n}" }, + "PutGoogleVertexAiRequestExample10": { + "description": "Run `PUT _inference/chat_completion/google_model_garden_mistral_chat_completion` to create an inference endpoint to perform a `chat_completion` task using Mistral's model hosted on Google Model Garden with single streaming URL provided.", + "method_request": "PUT _inference/chat_completion/google_model_garden_mistral_chat_completion", + "summary": "A chat_completion task for Google Model Garden Mistral endpoint with single streaming URL provided", + "value": "{\n \"service\": \"googlevertexai\",\n \"service_settings\": {\n \"provider\": \"mistral\",\n \"model_id\": \"mistral-small-2503\",\n \"service_account_json\": \"service-account-json\",\n \"streaming_url\": \"https://url:streamRawPredict\"\n }\n}" + }, + "PutGoogleVertexAiRequestExample11": { + "description": "Run `PUT _inference/completion/google_model_garden_ai21_completion` to create an inference endpoint to perform a `completion` task using AI21's model hosted on Google Model Garden with separate URLs for streaming and non-streaming tasks.", + "method_request": "PUT _inference/completion/google_model_garden_ai21_completion", + "summary": "A completion task for Google Model Garden AI21 endpoint with separate URLs for streaming and non-streaming tasks", + "value": "{\n \"service\": \"googlevertexai\",\n \"service_settings\": {\n \"provider\": \"ai21\",\n \"service_account_json\": \"service-account-json\",\n \"url\": \"https://url:rawPredict\",\n \"streaming_url\": \"https://url:streamRawPredict\"\n }\n}" + }, + "PutGoogleVertexAiRequestExample12": { + "description": "Run `PUT _inference/chat_completion/google_model_garden_ai21_chat_completion` to create an inference endpoint to perform a `chat_completion` task using AI21's model hosted on Google Model Garden with single streaming URL provided.", + "method_request": "PUT _inference/chat_completion/google_model_garden_ai21_chat_completion", + "summary": "A chat_completion task for Google Model Garden AI21 endpoint with single streaming URL provided", + "value": "{\n \"service\": \"googlevertexai\",\n \"service_settings\": {\n \"provider\": \"ai21\",\n \"service_account_json\": \"service-account-json\",\n \"streaming_url\": \"https://url:streamRawPredict\"\n }\n}" + }, "PutGoogleVertexAiRequestExample2": { "alternatives": [ { @@ -180470,21 +180488,21 @@ "value": "{\n \"service\": \"googlevertexai\",\n \"service_settings\": {\n \"service_account_json\": \"service-account-json\",\n \"project_id\": \"project-id\"\n }\n}" }, "PutGoogleVertexAiRequestExample3": { - "description": "Run `PUT _inference/completion/google_model_garden_anthropic_completion` to create an inference endpoint to perform a `completion` task using Anthropic's model hosted on Google Model Garden.", + "description": "Run `PUT _inference/completion/google_model_garden_anthropic_completion` to create an inference endpoint to perform a `completion` task using Anthropic's model hosted on Google Model Garden with separate URLs for streaming and non-streaming tasks.", "method_request": "PUT _inference/completion/google_model_garden_anthropic_completion", - "summary": "A completion task for Google Model Garden Anthropic endpoint", + "summary": "A completion task for Google Model Garden Anthropic endpoint with separate URLs for streaming and non-streaming tasks", "value": "{\n \"service\": \"googlevertexai\",\n \"service_settings\": {\n \"provider\": \"anthropic\",\n \"service_account_json\": \"service-account-json\",\n \"url\": \"https://url:rawPredict\",\n \"streaming_url\": \"https://streaming_url:streamRawPredict\"\n },\n \"task_settings\": {\n \"max_tokens\": 128\n }\n}" }, "PutGoogleVertexAiRequestExample4": { - "description": "Run `PUT _inference/chat_completion/google_model_garden_anthropic_chat_completion` to create an inference endpoint to perform a `chat_completion` task using Anthropic's model hosted on Google Model Garden.", + "description": "Run `PUT _inference/chat_completion/google_model_garden_anthropic_chat_completion` to create an inference endpoint to perform a `chat_completion` task using Anthropic's model hosted on Google Model Garden with single streaming URL provided.", "method_request": "PUT _inference/chat_completion/google_model_garden_anthropic_chat_completion", - "summary": "A chat_completion task for Google Model Garden Anthropic endpoint", - "value": "{\n \"service\": \"googlevertexai\",\n \"service_settings\": {\n \"provider\": \"anthropic\",\n \"service_account_json\": \"service-account-json\",\n \"url\": \"https://url:rawPredict\",\n \"streaming_url\": \"https://streaming_url:streamRawPredict\"\n },\n \"task_settings\": {\n \"max_tokens\": 128\n }\n}" + "summary": "A chat_completion task for Google Model Garden Anthropic endpoint with single streaming URL provided", + "value": "{\n \"service\": \"googlevertexai\",\n \"service_settings\": {\n \"provider\": \"anthropic\",\n \"service_account_json\": \"service-account-json\",\n \"streaming_url\": \"https://streaming_url:streamRawPredict\"\n },\n \"task_settings\": {\n \"max_tokens\": 128\n }\n}" }, "PutGoogleVertexAiRequestExample5": { - "description": "Run `PUT _inference/completion/google_model_garden_meta_completion` to create an inference endpoint to perform a `completion` task using Meta's model hosted on Google Model Garden with single URL provided.", + "description": "Run `PUT _inference/completion/google_model_garden_meta_completion` to create an inference endpoint to perform a `completion` task using Meta's model hosted on Google Model Garden with single URL provided for both streaming and non-streaming tasks.", "method_request": "PUT _inference/completion/google_model_garden_meta_completion", - "summary": "A completion task for Google Model Garden Meta endpoint with single URL provided", + "summary": "A completion task for Google Model Garden Meta endpoint with single URL provided for both streaming and non-streaming tasks", "value": "{\n \"service\": \"googlevertexai\",\n \"service_settings\": {\n \"provider\": \"meta\",\n \"model_id\": \"meta/llama-3.3-70b-instruct-maas\",\n \"service_account_json\": \"service-account-json\",\n \"url\": \"https://url/openapi/chat/completions\"\n }\n}" }, "PutGoogleVertexAiRequestExample6": { @@ -180492,6 +180510,24 @@ "method_request": "PUT _inference/chat_completion/google_model_garden_meta_chat_completion", "summary": "A chat_completion task for Google Model Garden Meta endpoint with single streaming URL provided", "value": "{\n \"service\": \"googlevertexai\",\n \"service_settings\": {\n \"provider\": \"meta\",\n \"model_id\": \"meta/llama-3.3-70b-instruct-maas\",\n \"service_account_json\": \"service-account-json\",\n \"streaming_url\": \"https://url/openapi/chat/completions\"\n }\n}" + }, + "PutGoogleVertexAiRequestExample7": { + "description": "Run `PUT _inference/completion/google_model_garden_hugging_face_completion` to create an inference endpoint to perform a `completion` task using Hugging Face's model hosted on Google Model Garden with single URL provided for both streaming and non-streaming tasks.", + "method_request": "PUT _inference/completion/google_model_garden_hugging_face_completion", + "summary": "A completion task for Google Model Garden Hugging Face endpoint with single URL provided for both streaming and non-streaming tasks", + "value": "{\n \"service\": \"googlevertexai\",\n \"service_settings\": {\n \"provider\": \"hugging_face\",\n \"service_account_json\": \"service-account-json\",\n \"url\": \"https://url/chat/completions\"\n }\n}" + }, + "PutGoogleVertexAiRequestExample8": { + "description": "Run `PUT _inference/chat_completion/google_model_garden_hugging_face_chat_completion` to create an inference endpoint to perform a `chat_completion` task using Hugging Face's model hosted on Google Model Garden with single streaming URL provided.", + "method_request": "PUT _inference/chat_completion/google_model_garden_hugging_face_chat_completion", + "summary": "A chat_completion task for Google Model Garden Hugging Face endpoint with single streaming URL provided", + "value": "{\n \"service\": \"googlevertexai\",\n \"service_settings\": {\n \"provider\": \"hugging_face\",\n \"service_account_json\": \"service-account-json\",\n \"streaming_url\": \"https://url/chat/completions\"\n }\n}" + }, + "PutGoogleVertexAiRequestExample9": { + "description": "Run `PUT _inference/completion/google_model_garden_mistral_completion` to create an inference endpoint to perform a `completion` task using Mistral's model hosted on Google Model Garden with separate URLs for streaming and non-streaming tasks.", + "method_request": "PUT _inference/completion/google_model_garden_mistral_completion", + "summary": "A completion task for Google Model Garden Mistral endpoint with separate URLs for streaming and non-streaming tasks", + "value": "{\n \"service\": \"googlevertexai\",\n \"service_settings\": {\n \"provider\": \"mistral\",\n \"model_id\": \"mistral-small-2503\",\n \"service_account_json\": \"service-account-json\",\n \"url\": \"https://url:rawPredict\",\n \"streaming_url\": \"https://url:streamRawPredict\"\n }\n}" } }, "inherits": { diff --git a/specification/inference/put_googlevertexai/examples/request/PutGoogleVertexAiRequestExample10.yaml b/specification/inference/put_googlevertexai/examples/request/PutGoogleVertexAiRequestExample10.yaml new file mode 100644 index 0000000000..ad2ed0183a --- /dev/null +++ b/specification/inference/put_googlevertexai/examples/request/PutGoogleVertexAiRequestExample10.yaml @@ -0,0 +1,14 @@ +summary: A chat_completion task for Google Model Garden Mistral endpoint with single streaming URL provided +description: Run `PUT _inference/chat_completion/google_model_garden_mistral_chat_completion` to create an inference endpoint to perform a `chat_completion` task using Mistral's model hosted on Google Model Garden with single streaming URL provided. +method_request: 'PUT _inference/chat_completion/google_model_garden_mistral_chat_completion' +# type: "request" +value: |- + { + "service": "googlevertexai", + "service_settings": { + "provider": "mistral", + "model_id": "mistral-small-2503", + "service_account_json": "service-account-json", + "streaming_url": "https://url:streamRawPredict" + } + } diff --git a/specification/inference/put_googlevertexai/examples/request/PutGoogleVertexAiRequestExample11.yaml b/specification/inference/put_googlevertexai/examples/request/PutGoogleVertexAiRequestExample11.yaml new file mode 100644 index 0000000000..00a076c727 --- /dev/null +++ b/specification/inference/put_googlevertexai/examples/request/PutGoogleVertexAiRequestExample11.yaml @@ -0,0 +1,14 @@ +summary: A completion task for Google Model Garden AI21 endpoint with separate URLs for streaming and non-streaming tasks +description: Run `PUT _inference/completion/google_model_garden_ai21_completion` to create an inference endpoint to perform a `completion` task using AI21's model hosted on Google Model Garden with separate URLs for streaming and non-streaming tasks. +method_request: 'PUT _inference/completion/google_model_garden_ai21_completion' +# type: "request" +value: |- + { + "service": "googlevertexai", + "service_settings": { + "provider": "ai21", + "service_account_json": "service-account-json", + "url": "https://url:rawPredict", + "streaming_url": "https://url:streamRawPredict" + } + } diff --git a/specification/inference/put_googlevertexai/examples/request/PutGoogleVertexAiRequestExample12.yaml b/specification/inference/put_googlevertexai/examples/request/PutGoogleVertexAiRequestExample12.yaml new file mode 100644 index 0000000000..ceadc43079 --- /dev/null +++ b/specification/inference/put_googlevertexai/examples/request/PutGoogleVertexAiRequestExample12.yaml @@ -0,0 +1,13 @@ +summary: A chat_completion task for Google Model Garden AI21 endpoint with single streaming URL provided +description: Run `PUT _inference/chat_completion/google_model_garden_ai21_chat_completion` to create an inference endpoint to perform a `chat_completion` task using AI21's model hosted on Google Model Garden with single streaming URL provided. +method_request: 'PUT _inference/chat_completion/google_model_garden_ai21_chat_completion' +# type: "request" +value: |- + { + "service": "googlevertexai", + "service_settings": { + "provider": "ai21", + "service_account_json": "service-account-json", + "streaming_url": "https://url:streamRawPredict" + } + } diff --git a/specification/inference/put_googlevertexai/examples/request/PutGoogleVertexAiRequestExample3.yaml b/specification/inference/put_googlevertexai/examples/request/PutGoogleVertexAiRequestExample3.yaml index 5cb79753dc..224eec318a 100644 --- a/specification/inference/put_googlevertexai/examples/request/PutGoogleVertexAiRequestExample3.yaml +++ b/specification/inference/put_googlevertexai/examples/request/PutGoogleVertexAiRequestExample3.yaml @@ -1,5 +1,5 @@ -summary: A completion task for Google Model Garden Anthropic endpoint -description: Run `PUT _inference/completion/google_model_garden_anthropic_completion` to create an inference endpoint to perform a `completion` task using Anthropic's model hosted on Google Model Garden. +summary: A completion task for Google Model Garden Anthropic endpoint with separate URLs for streaming and non-streaming tasks +description: Run `PUT _inference/completion/google_model_garden_anthropic_completion` to create an inference endpoint to perform a `completion` task using Anthropic's model hosted on Google Model Garden with separate URLs for streaming and non-streaming tasks. method_request: 'PUT _inference/completion/google_model_garden_anthropic_completion' # type: "request" value: |- diff --git a/specification/inference/put_googlevertexai/examples/request/PutGoogleVertexAiRequestExample4.yaml b/specification/inference/put_googlevertexai/examples/request/PutGoogleVertexAiRequestExample4.yaml index 52b7ececd8..a8ae043de1 100644 --- a/specification/inference/put_googlevertexai/examples/request/PutGoogleVertexAiRequestExample4.yaml +++ b/specification/inference/put_googlevertexai/examples/request/PutGoogleVertexAiRequestExample4.yaml @@ -1,5 +1,5 @@ -summary: A chat_completion task for Google Model Garden Anthropic endpoint -description: Run `PUT _inference/chat_completion/google_model_garden_anthropic_chat_completion` to create an inference endpoint to perform a `chat_completion` task using Anthropic's model hosted on Google Model Garden. +summary: A chat_completion task for Google Model Garden Anthropic endpoint with single streaming URL provided +description: Run `PUT _inference/chat_completion/google_model_garden_anthropic_chat_completion` to create an inference endpoint to perform a `chat_completion` task using Anthropic's model hosted on Google Model Garden with single streaming URL provided. method_request: 'PUT _inference/chat_completion/google_model_garden_anthropic_chat_completion' # type: "request" value: |- @@ -8,7 +8,6 @@ value: |- "service_settings": { "provider": "anthropic", "service_account_json": "service-account-json", - "url": "https://url:rawPredict", "streaming_url": "https://streaming_url:streamRawPredict" }, "task_settings": { diff --git a/specification/inference/put_googlevertexai/examples/request/PutGoogleVertexAiRequestExample5.yaml b/specification/inference/put_googlevertexai/examples/request/PutGoogleVertexAiRequestExample5.yaml index 750fc0eaf7..19f4ce4de5 100644 --- a/specification/inference/put_googlevertexai/examples/request/PutGoogleVertexAiRequestExample5.yaml +++ b/specification/inference/put_googlevertexai/examples/request/PutGoogleVertexAiRequestExample5.yaml @@ -1,5 +1,5 @@ -summary: A completion task for Google Model Garden Meta endpoint with single URL provided -description: Run `PUT _inference/completion/google_model_garden_meta_completion` to create an inference endpoint to perform a `completion` task using Meta's model hosted on Google Model Garden with single URL provided. +summary: A completion task for Google Model Garden Meta endpoint with single URL provided for both streaming and non-streaming tasks +description: Run `PUT _inference/completion/google_model_garden_meta_completion` to create an inference endpoint to perform a `completion` task using Meta's model hosted on Google Model Garden with single URL provided for both streaming and non-streaming tasks. method_request: 'PUT _inference/completion/google_model_garden_meta_completion' # type: "request" value: |- diff --git a/specification/inference/put_googlevertexai/examples/request/PutGoogleVertexAiRequestExample7.yaml b/specification/inference/put_googlevertexai/examples/request/PutGoogleVertexAiRequestExample7.yaml new file mode 100644 index 0000000000..e503c99756 --- /dev/null +++ b/specification/inference/put_googlevertexai/examples/request/PutGoogleVertexAiRequestExample7.yaml @@ -0,0 +1,13 @@ +summary: A completion task for Google Model Garden Hugging Face endpoint with single URL provided for both streaming and non-streaming tasks +description: Run `PUT _inference/completion/google_model_garden_hugging_face_completion` to create an inference endpoint to perform a `completion` task using Hugging Face's model hosted on Google Model Garden with single URL provided for both streaming and non-streaming tasks. +method_request: 'PUT _inference/completion/google_model_garden_hugging_face_completion' +# type: "request" +value: |- + { + "service": "googlevertexai", + "service_settings": { + "provider": "hugging_face", + "service_account_json": "service-account-json", + "url": "https://url/chat/completions" + } + } diff --git a/specification/inference/put_googlevertexai/examples/request/PutGoogleVertexAiRequestExample8.yaml b/specification/inference/put_googlevertexai/examples/request/PutGoogleVertexAiRequestExample8.yaml new file mode 100644 index 0000000000..59c9c14010 --- /dev/null +++ b/specification/inference/put_googlevertexai/examples/request/PutGoogleVertexAiRequestExample8.yaml @@ -0,0 +1,13 @@ +summary: A chat_completion task for Google Model Garden Hugging Face endpoint with single streaming URL provided +description: Run `PUT _inference/chat_completion/google_model_garden_hugging_face_chat_completion` to create an inference endpoint to perform a `chat_completion` task using Hugging Face's model hosted on Google Model Garden with single streaming URL provided. +method_request: 'PUT _inference/chat_completion/google_model_garden_hugging_face_chat_completion' +# type: "request" +value: |- + { + "service": "googlevertexai", + "service_settings": { + "provider": "hugging_face", + "service_account_json": "service-account-json", + "streaming_url": "https://url/chat/completions" + } + } diff --git a/specification/inference/put_googlevertexai/examples/request/PutGoogleVertexAiRequestExample9.yaml b/specification/inference/put_googlevertexai/examples/request/PutGoogleVertexAiRequestExample9.yaml new file mode 100644 index 0000000000..17c69126b5 --- /dev/null +++ b/specification/inference/put_googlevertexai/examples/request/PutGoogleVertexAiRequestExample9.yaml @@ -0,0 +1,15 @@ +summary: A completion task for Google Model Garden Mistral endpoint with separate URLs for streaming and non-streaming tasks +description: Run `PUT _inference/completion/google_model_garden_mistral_completion` to create an inference endpoint to perform a `completion` task using Mistral's model hosted on Google Model Garden with separate URLs for streaming and non-streaming tasks. +method_request: 'PUT _inference/completion/google_model_garden_mistral_completion' +# type: "request" +value: |- + { + "service": "googlevertexai", + "service_settings": { + "provider": "mistral", + "model_id": "mistral-small-2503", + "service_account_json": "service-account-json", + "url": "https://url:rawPredict", + "streaming_url": "https://url:streamRawPredict" + } + } From 918321e02be6e220f7d9908d4055a56630194d8e Mon Sep 17 00:00:00 2001 From: Jan Kazlouski Date: Fri, 24 Oct 2025 12:08:27 +0000 Subject: [PATCH 3/4] Update CommonTypes.ts to clarify URL requirements for various providers --- output/openapi/elasticsearch-openapi.json | 4 +- .../elasticsearch-serverless-openapi.json | 4 +- output/schema/schema.json | 72 +++++++++---------- specification/inference/_types/CommonTypes.ts | 2 + 4 files changed, 42 insertions(+), 40 deletions(-) diff --git a/output/openapi/elasticsearch-openapi.json b/output/openapi/elasticsearch-openapi.json index b038c6b679..8cbb2651b7 100644 --- a/output/openapi/elasticsearch-openapi.json +++ b/output/openapi/elasticsearch-openapi.json @@ -103017,11 +103017,11 @@ ] }, "url": { - "description": "The URL for non-streaming `completion` requests to a Google Model Garden provider endpoint.\nIf both `url` and `streaming_url` are provided, each is used for its respective mode.\nIf `streaming_url` is not provided, `url` is also used for streaming `completion` and `chat_completion`.\nIf `provider` is not provided or set to `google` (Google Vertex AI), do not set `url` (or `streaming_url`).\nAt least one of `url` or `streaming_url` must be provided for Google Model Garden endpoint usage.", + "description": "The URL for non-streaming `completion` requests to a Google Model Garden provider endpoint.\nIf both `url` and `streaming_url` are provided, each is used for its respective mode.\nIf `streaming_url` is not provided, `url` is also used for streaming `completion` and `chat_completion`.\nIf `provider` is not provided or set to `google` (Google Vertex AI), do not set `url` (or `streaming_url`).\nAt least one of `url` or `streaming_url` must be provided for Google Model Garden endpoint usage.\nCertain providers require separate URLs for streaming and non-streaming operations (e.g., Anthropic, Mistral, AI21). Others support both operation types through a single URL (e.g., Meta, Hugging Face).", "type": "string" }, "streaming_url": { - "description": "The URL for streaming `completion` and `chat_completion` requests to a Google Model Garden provider endpoint.\nIf both `streaming_url` and `url` are provided, each is used for its respective mode.\nIf `url` is not provided, `streaming_url` is also used for non-streaming `completion` requests.\nIf `provider` is not provided or set to `google` (Google Vertex AI), do not set `streaming_url` (or `url`).\nAt least one of `streaming_url` or `url` must be provided for Google Model Garden endpoint usage.", + "description": "The URL for streaming `completion` and `chat_completion` requests to a Google Model Garden provider endpoint.\nIf both `streaming_url` and `url` are provided, each is used for its respective mode.\nIf `url` is not provided, `streaming_url` is also used for non-streaming `completion` requests.\nIf `provider` is not provided or set to `google` (Google Vertex AI), do not set `streaming_url` (or `url`).\nAt least one of `streaming_url` or `url` must be provided for Google Model Garden endpoint usage.\nCertain providers require separate URLs for streaming and non-streaming operations (e.g., Anthropic, Mistral, AI21). Others support both operation types through a single URL (e.g., Meta, Hugging Face).", "type": "string" }, "location": { diff --git a/output/openapi/elasticsearch-serverless-openapi.json b/output/openapi/elasticsearch-serverless-openapi.json index 4924a1db9b..32d051c61b 100644 --- a/output/openapi/elasticsearch-serverless-openapi.json +++ b/output/openapi/elasticsearch-serverless-openapi.json @@ -67097,11 +67097,11 @@ ] }, "url": { - "description": "The URL for non-streaming `completion` requests to a Google Model Garden provider endpoint.\nIf both `url` and `streaming_url` are provided, each is used for its respective mode.\nIf `streaming_url` is not provided, `url` is also used for streaming `completion` and `chat_completion`.\nIf `provider` is not provided or set to `google` (Google Vertex AI), do not set `url` (or `streaming_url`).\nAt least one of `url` or `streaming_url` must be provided for Google Model Garden endpoint usage.", + "description": "The URL for non-streaming `completion` requests to a Google Model Garden provider endpoint.\nIf both `url` and `streaming_url` are provided, each is used for its respective mode.\nIf `streaming_url` is not provided, `url` is also used for streaming `completion` and `chat_completion`.\nIf `provider` is not provided or set to `google` (Google Vertex AI), do not set `url` (or `streaming_url`).\nAt least one of `url` or `streaming_url` must be provided for Google Model Garden endpoint usage.\nCertain providers require separate URLs for streaming and non-streaming operations (e.g., Anthropic, Mistral, AI21). Others support both operation types through a single URL (e.g., Meta, Hugging Face).", "type": "string" }, "streaming_url": { - "description": "The URL for streaming `completion` and `chat_completion` requests to a Google Model Garden provider endpoint.\nIf both `streaming_url` and `url` are provided, each is used for its respective mode.\nIf `url` is not provided, `streaming_url` is also used for non-streaming `completion` requests.\nIf `provider` is not provided or set to `google` (Google Vertex AI), do not set `streaming_url` (or `url`).\nAt least one of `streaming_url` or `url` must be provided for Google Model Garden endpoint usage.", + "description": "The URL for streaming `completion` and `chat_completion` requests to a Google Model Garden provider endpoint.\nIf both `streaming_url` and `url` are provided, each is used for its respective mode.\nIf `url` is not provided, `streaming_url` is also used for non-streaming `completion` requests.\nIf `provider` is not provided or set to `google` (Google Vertex AI), do not set `streaming_url` (or `url`).\nAt least one of `streaming_url` or `url` must be provided for Google Model Garden endpoint usage.\nCertain providers require separate URLs for streaming and non-streaming operations (e.g., Anthropic, Mistral, AI21). Others support both operation types through a single URL (e.g., Meta, Hugging Face).", "type": "string" }, "location": { diff --git a/output/schema/schema.json b/output/schema/schema.json index 9b86d2519b..49ba042117 100644 --- a/output/schema/schema.json +++ b/output/schema/schema.json @@ -173344,7 +173344,7 @@ "name": "GoogleModelGardenProvider", "namespace": "inference._types" }, - "specLocation": "inference/_types/CommonTypes.ts#L1498-L1505" + "specLocation": "inference/_types/CommonTypes.ts#L1500-L1507" }, { "kind": "interface", @@ -173366,7 +173366,7 @@ } }, { - "description": "The URL for non-streaming `completion` requests to a Google Model Garden provider endpoint.\nIf both `url` and `streaming_url` are provided, each is used for its respective mode.\nIf `streaming_url` is not provided, `url` is also used for streaming `completion` and `chat_completion`.\nIf `provider` is not provided or set to `google` (Google Vertex AI), do not set `url` (or `streaming_url`).\nAt least one of `url` or `streaming_url` must be provided for Google Model Garden endpoint usage.", + "description": "The URL for non-streaming `completion` requests to a Google Model Garden provider endpoint.\nIf both `url` and `streaming_url` are provided, each is used for its respective mode.\nIf `streaming_url` is not provided, `url` is also used for streaming `completion` and `chat_completion`.\nIf `provider` is not provided or set to `google` (Google Vertex AI), do not set `url` (or `streaming_url`).\nAt least one of `url` or `streaming_url` must be provided for Google Model Garden endpoint usage.\nCertain providers require separate URLs for streaming and non-streaming operations (e.g., Anthropic, Mistral, AI21). Others support both operation types through a single URL (e.g., Meta, Hugging Face).", "name": "url", "required": false, "type": { @@ -173378,7 +173378,7 @@ } }, { - "description": "The URL for streaming `completion` and `chat_completion` requests to a Google Model Garden provider endpoint.\nIf both `streaming_url` and `url` are provided, each is used for its respective mode.\nIf `url` is not provided, `streaming_url` is also used for non-streaming `completion` requests.\nIf `provider` is not provided or set to `google` (Google Vertex AI), do not set `streaming_url` (or `url`).\nAt least one of `streaming_url` or `url` must be provided for Google Model Garden endpoint usage.", + "description": "The URL for streaming `completion` and `chat_completion` requests to a Google Model Garden provider endpoint.\nIf both `streaming_url` and `url` are provided, each is used for its respective mode.\nIf `url` is not provided, `streaming_url` is also used for non-streaming `completion` requests.\nIf `provider` is not provided or set to `google` (Google Vertex AI), do not set `streaming_url` (or `url`).\nAt least one of `streaming_url` or `url` must be provided for Google Model Garden endpoint usage.\nCertain providers require separate URLs for streaming and non-streaming operations (e.g., Anthropic, Mistral, AI21). Others support both operation types through a single URL (e.g., Meta, Hugging Face).", "name": "streaming_url", "required": false, "type": { @@ -173468,7 +173468,7 @@ } } ], - "specLocation": "inference/_types/CommonTypes.ts#L1433-L1496" + "specLocation": "inference/_types/CommonTypes.ts#L1433-L1498" }, { "kind": "enum", @@ -173481,7 +173481,7 @@ "name": "GoogleVertexAIServiceType", "namespace": "inference._types" }, - "specLocation": "inference/_types/CommonTypes.ts#L1546-L1548" + "specLocation": "inference/_types/CommonTypes.ts#L1548-L1550" }, { "kind": "interface", @@ -173543,7 +173543,7 @@ } } ], - "specLocation": "inference/_types/CommonTypes.ts#L1507-L1530" + "specLocation": "inference/_types/CommonTypes.ts#L1509-L1532" }, { "kind": "enum", @@ -173565,7 +173565,7 @@ "name": "GoogleVertexAITaskType", "namespace": "inference._types" }, - "specLocation": "inference/_types/CommonTypes.ts#L1539-L1544" + "specLocation": "inference/_types/CommonTypes.ts#L1541-L1546" }, { "kind": "interface", @@ -173627,7 +173627,7 @@ } } ], - "specLocation": "inference/_types/CommonTypes.ts#L1550-L1582" + "specLocation": "inference/_types/CommonTypes.ts#L1552-L1584" }, { "kind": "enum", @@ -173640,7 +173640,7 @@ "name": "HuggingFaceServiceType", "namespace": "inference._types" }, - "specLocation": "inference/_types/CommonTypes.ts#L1603-L1605" + "specLocation": "inference/_types/CommonTypes.ts#L1605-L1607" }, { "kind": "interface", @@ -173674,7 +173674,7 @@ } } ], - "specLocation": "inference/_types/CommonTypes.ts#L1584-L1594" + "specLocation": "inference/_types/CommonTypes.ts#L1586-L1596" }, { "kind": "enum", @@ -173696,7 +173696,7 @@ "name": "HuggingFaceTaskType", "namespace": "inference._types" }, - "specLocation": "inference/_types/CommonTypes.ts#L1596-L1601" + "specLocation": "inference/_types/CommonTypes.ts#L1598-L1603" }, { "kind": "interface", @@ -174928,7 +174928,7 @@ } } ], - "specLocation": "inference/_types/CommonTypes.ts#L1607-L1636" + "specLocation": "inference/_types/CommonTypes.ts#L1609-L1638" }, { "kind": "enum", @@ -174941,7 +174941,7 @@ "name": "JinaAIServiceType", "namespace": "inference._types" }, - "specLocation": "inference/_types/CommonTypes.ts#L1666-L1668" + "specLocation": "inference/_types/CommonTypes.ts#L1668-L1670" }, { "kind": "enum", @@ -174960,7 +174960,7 @@ "name": "JinaAISimilarityType", "namespace": "inference._types" }, - "specLocation": "inference/_types/CommonTypes.ts#L1670-L1674" + "specLocation": "inference/_types/CommonTypes.ts#L1672-L1676" }, { "kind": "interface", @@ -175006,7 +175006,7 @@ } } ], - "specLocation": "inference/_types/CommonTypes.ts#L1638-L1659" + "specLocation": "inference/_types/CommonTypes.ts#L1640-L1661" }, { "kind": "enum", @@ -175022,7 +175022,7 @@ "name": "JinaAITaskType", "namespace": "inference._types" }, - "specLocation": "inference/_types/CommonTypes.ts#L1661-L1664" + "specLocation": "inference/_types/CommonTypes.ts#L1663-L1666" }, { "kind": "enum", @@ -175044,7 +175044,7 @@ "name": "JinaAITextEmbeddingTask", "namespace": "inference._types" }, - "specLocation": "inference/_types/CommonTypes.ts#L1676-L1681" + "specLocation": "inference/_types/CommonTypes.ts#L1678-L1683" }, { "kind": "interface", @@ -175116,7 +175116,7 @@ } } ], - "specLocation": "inference/_types/CommonTypes.ts#L1683-L1713" + "specLocation": "inference/_types/CommonTypes.ts#L1685-L1715" }, { "kind": "enum", @@ -175129,7 +175129,7 @@ "name": "LlamaServiceType", "namespace": "inference._types" }, - "specLocation": "inference/_types/CommonTypes.ts#L1721-L1723" + "specLocation": "inference/_types/CommonTypes.ts#L1723-L1725" }, { "kind": "enum", @@ -175148,7 +175148,7 @@ "name": "LlamaSimilarityType", "namespace": "inference._types" }, - "specLocation": "inference/_types/CommonTypes.ts#L1725-L1729" + "specLocation": "inference/_types/CommonTypes.ts#L1727-L1731" }, { "kind": "enum", @@ -175167,7 +175167,7 @@ "name": "LlamaTaskType", "namespace": "inference._types" }, - "specLocation": "inference/_types/CommonTypes.ts#L1715-L1719" + "specLocation": "inference/_types/CommonTypes.ts#L1717-L1721" }, { "kind": "interface", @@ -175325,7 +175325,7 @@ } } ], - "specLocation": "inference/_types/CommonTypes.ts#L1731-L1758" + "specLocation": "inference/_types/CommonTypes.ts#L1733-L1760" }, { "kind": "enum", @@ -175338,7 +175338,7 @@ "name": "MistralServiceType", "namespace": "inference._types" }, - "specLocation": "inference/_types/CommonTypes.ts#L1766-L1768" + "specLocation": "inference/_types/CommonTypes.ts#L1768-L1770" }, { "kind": "enum", @@ -175357,7 +175357,7 @@ "name": "MistralTaskType", "namespace": "inference._types" }, - "specLocation": "inference/_types/CommonTypes.ts#L1760-L1764" + "specLocation": "inference/_types/CommonTypes.ts#L1762-L1766" }, { "kind": "interface", @@ -175444,7 +175444,7 @@ } } ], - "specLocation": "inference/_types/CommonTypes.ts#L1770-L1812" + "specLocation": "inference/_types/CommonTypes.ts#L1772-L1814" }, { "kind": "enum", @@ -175457,7 +175457,7 @@ "name": "OpenAIServiceType", "namespace": "inference._types" }, - "specLocation": "inference/_types/CommonTypes.ts#L1839-L1841" + "specLocation": "inference/_types/CommonTypes.ts#L1841-L1843" }, { "kind": "interface", @@ -175487,7 +175487,7 @@ } } ], - "specLocation": "inference/_types/CommonTypes.ts#L1814-L1831" + "specLocation": "inference/_types/CommonTypes.ts#L1816-L1833" }, { "kind": "enum", @@ -175506,7 +175506,7 @@ "name": "OpenAITaskType", "namespace": "inference._types" }, - "specLocation": "inference/_types/CommonTypes.ts#L1833-L1837" + "specLocation": "inference/_types/CommonTypes.ts#L1835-L1839" }, { "kind": "interface", @@ -176352,7 +176352,7 @@ } } ], - "specLocation": "inference/_types/CommonTypes.ts#L1532-L1537" + "specLocation": "inference/_types/CommonTypes.ts#L1534-L1539" }, { "kind": "interface", @@ -176498,7 +176498,7 @@ } } ], - "specLocation": "inference/_types/CommonTypes.ts#L1843-L1874" + "specLocation": "inference/_types/CommonTypes.ts#L1845-L1876" }, { "kind": "enum", @@ -176511,7 +176511,7 @@ "name": "VoyageAIServiceType", "namespace": "inference._types" }, - "specLocation": "inference/_types/CommonTypes.ts#L1907-L1909" + "specLocation": "inference/_types/CommonTypes.ts#L1909-L1911" }, { "kind": "interface", @@ -176571,7 +176571,7 @@ } } ], - "specLocation": "inference/_types/CommonTypes.ts#L1876-L1900" + "specLocation": "inference/_types/CommonTypes.ts#L1878-L1902" }, { "kind": "enum", @@ -176587,7 +176587,7 @@ "name": "VoyageAITaskType", "namespace": "inference._types" }, - "specLocation": "inference/_types/CommonTypes.ts#L1902-L1905" + "specLocation": "inference/_types/CommonTypes.ts#L1904-L1907" }, { "kind": "interface", @@ -176675,7 +176675,7 @@ } } ], - "specLocation": "inference/_types/CommonTypes.ts#L1911-L1949" + "specLocation": "inference/_types/CommonTypes.ts#L1913-L1951" }, { "kind": "enum", @@ -176688,7 +176688,7 @@ "name": "WatsonxServiceType", "namespace": "inference._types" }, - "specLocation": "inference/_types/CommonTypes.ts#L1957-L1959" + "specLocation": "inference/_types/CommonTypes.ts#L1959-L1961" }, { "kind": "enum", @@ -176707,7 +176707,7 @@ "name": "WatsonxTaskType", "namespace": "inference._types" }, - "specLocation": "inference/_types/CommonTypes.ts#L1951-L1955" + "specLocation": "inference/_types/CommonTypes.ts#L1953-L1957" }, { "kind": "request", diff --git a/specification/inference/_types/CommonTypes.ts b/specification/inference/_types/CommonTypes.ts index 91cb66d192..645cc3d275 100644 --- a/specification/inference/_types/CommonTypes.ts +++ b/specification/inference/_types/CommonTypes.ts @@ -1445,6 +1445,7 @@ export class GoogleVertexAIServiceSettings { * If `streaming_url` is not provided, `url` is also used for streaming `completion` and `chat_completion`. * If `provider` is not provided or set to `google` (Google Vertex AI), do not set `url` (or `streaming_url`). * At least one of `url` or `streaming_url` must be provided for Google Model Garden endpoint usage. + * Certain providers require separate URLs for streaming and non-streaming operations (e.g., Anthropic, Mistral, AI21). Others support both operation types through a single URL (e.g., Meta, Hugging Face). */ url?: string /** @@ -1453,6 +1454,7 @@ export class GoogleVertexAIServiceSettings { * If `url` is not provided, `streaming_url` is also used for non-streaming `completion` requests. * If `provider` is not provided or set to `google` (Google Vertex AI), do not set `streaming_url` (or `url`). * At least one of `streaming_url` or `url` must be provided for Google Model Garden endpoint usage. + * Certain providers require separate URLs for streaming and non-streaming operations (e.g., Anthropic, Mistral, AI21). Others support both operation types through a single URL (e.g., Meta, Hugging Face). */ streaming_url?: string /** From 2c4bcb59c42b30f51d3a2d5dbd9071404838c249 Mon Sep 17 00:00:00 2001 From: Jan Kazlouski Date: Wed, 29 Oct 2025 12:22:26 +0000 Subject: [PATCH 4/4] Add examples for chat_completion and completion tasks using various Google Model Garden providers with updated URL formats --- output/openapi/elasticsearch-openapi.json | 114 +++++++--- .../elasticsearch-serverless-openapi.json | 114 +++++++--- output/schema/schema.json | 204 +++++++++++------- specification/inference/_types/CommonTypes.ts | 2 + .../PutGoogleVertexAiRequestExample10.yaml | 11 +- .../PutGoogleVertexAiRequestExample11.yaml | 11 +- .../PutGoogleVertexAiRequestExample12.yaml | 10 +- .../PutGoogleVertexAiRequestExample13.yaml | 13 ++ .../PutGoogleVertexAiRequestExample14.yaml | 13 ++ .../PutGoogleVertexAiRequestExample15.yaml | 15 ++ .../PutGoogleVertexAiRequestExample16.yaml | 14 ++ .../PutGoogleVertexAiRequestExample17.yaml | 13 ++ .../PutGoogleVertexAiRequestExample18.yaml | 13 ++ .../PutGoogleVertexAiRequestExample19.yaml | 13 ++ .../PutGoogleVertexAiRequestExample20.yaml | 13 ++ .../PutGoogleVertexAiRequestExample21.yaml | 14 ++ .../PutGoogleVertexAiRequestExample22.yaml | 13 ++ .../PutGoogleVertexAiRequestExample3.yaml | 8 +- .../PutGoogleVertexAiRequestExample4.yaml | 6 +- .../PutGoogleVertexAiRequestExample5.yaml | 6 +- .../PutGoogleVertexAiRequestExample6.yaml | 6 +- .../PutGoogleVertexAiRequestExample7.yaml | 10 +- .../PutGoogleVertexAiRequestExample8.yaml | 10 +- .../PutGoogleVertexAiRequestExample9.yaml | 12 +- 24 files changed, 475 insertions(+), 183 deletions(-) create mode 100644 specification/inference/put_googlevertexai/examples/request/PutGoogleVertexAiRequestExample13.yaml create mode 100644 specification/inference/put_googlevertexai/examples/request/PutGoogleVertexAiRequestExample14.yaml create mode 100644 specification/inference/put_googlevertexai/examples/request/PutGoogleVertexAiRequestExample15.yaml create mode 100644 specification/inference/put_googlevertexai/examples/request/PutGoogleVertexAiRequestExample16.yaml create mode 100644 specification/inference/put_googlevertexai/examples/request/PutGoogleVertexAiRequestExample17.yaml create mode 100644 specification/inference/put_googlevertexai/examples/request/PutGoogleVertexAiRequestExample18.yaml create mode 100644 specification/inference/put_googlevertexai/examples/request/PutGoogleVertexAiRequestExample19.yaml create mode 100644 specification/inference/put_googlevertexai/examples/request/PutGoogleVertexAiRequestExample20.yaml create mode 100644 specification/inference/put_googlevertexai/examples/request/PutGoogleVertexAiRequestExample21.yaml create mode 100644 specification/inference/put_googlevertexai/examples/request/PutGoogleVertexAiRequestExample22.yaml diff --git a/output/openapi/elasticsearch-openapi.json b/output/openapi/elasticsearch-openapi.json index 92eba80f75..f0fa323701 100644 --- a/output/openapi/elasticsearch-openapi.json +++ b/output/openapi/elasticsearch-openapi.json @@ -22777,59 +22777,109 @@ "value": "{\n \"service\": \"googlevertexai\",\n \"service_settings\": {\n \"service_account_json\": \"service-account-json\",\n \"model_id\": \"model-id\",\n \"location\": \"location\",\n \"project_id\": \"project-id\"\n }\n}" }, "PutGoogleVertexAiRequestExample10": { - "summary": "A chat_completion task for Google Model Garden Mistral endpoint with single streaming URL provided", - "description": "Run `PUT _inference/chat_completion/google_model_garden_mistral_chat_completion` to create an inference endpoint to perform a `chat_completion` task using Mistral's model hosted on Google Model Garden with single streaming URL provided.", - "value": "{\n \"service\": \"googlevertexai\",\n \"service_settings\": {\n \"provider\": \"mistral\",\n \"model_id\": \"mistral-small-2503\",\n \"service_account_json\": \"service-account-json\",\n \"streaming_url\": \"https://url:streamRawPredict\"\n }\n}" + "summary": "A chat_completion task for Google Model Garden Meta shared endpoint with single streaming URL provided", + "description": "Run `PUT _inference/chat_completion/google_model_garden_meta_chat_completion` to create an inference endpoint to perform a `chat_completion` task using Meta's model hosted on Google Model Garden shared endpoint with single streaming URL provided. See the endpoint's `Sample request` page for the variable values used in the URL.", + "value": "{\n \"service\": \"googlevertexai\",\n \"service_settings\": {\n \"provider\": \"meta\",\n \"service_account_json\": \"service-account-json\",\n \"streaming_url\": \"https://%LOCATION_ID%-aiplatform.googleapis.com/v1/projects/%PROJECT_ID%/locations/%LOCATION_ID%/endpoints/%ENDPOINT_ID%/chat/completions\"\n }\n}" }, "PutGoogleVertexAiRequestExample11": { - "summary": "A completion task for Google Model Garden AI21 endpoint with separate URLs for streaming and non-streaming tasks", - "description": "Run `PUT _inference/completion/google_model_garden_ai21_completion` to create an inference endpoint to perform a `completion` task using AI21's model hosted on Google Model Garden with separate URLs for streaming and non-streaming tasks.", - "value": "{\n \"service\": \"googlevertexai\",\n \"service_settings\": {\n \"provider\": \"ai21\",\n \"service_account_json\": \"service-account-json\",\n \"url\": \"https://url:rawPredict\",\n \"streaming_url\": \"https://url:streamRawPredict\"\n }\n}" + "summary": "A completion task for Google Model Garden Hugging Face dedicated endpoint with single URL provided for both streaming and non-streaming tasks", + "description": "Run `PUT _inference/completion/google_model_garden_hugging_face_completion` to create an inference endpoint to perform a `completion` task using Hugging Face's model hosted on Google Model Garden dedicated endpoint with single URL provided for both streaming and non-streaming tasks. See the endpoint's `Sample request` page for the variable values used in the URL.", + "value": "{\n \"service\": \"googlevertexai\",\n \"service_settings\": {\n \"provider\": \"hugging_face\",\n \"service_account_json\": \"service-account-json\",\n \"url\": \"https://%ENDPOINT_ID%.%LOCATION_ID%-%PROJECT_ID%.prediction.vertexai.goog/v1/projects/%PROJECT_ID%/locations/%LOCATION_ID%/endpoints/%ENDPOINT_ID%/chat/completions\"\n }\n}" }, "PutGoogleVertexAiRequestExample12": { - "summary": "A chat_completion task for Google Model Garden AI21 endpoint with single streaming URL provided", - "description": "Run `PUT _inference/chat_completion/google_model_garden_ai21_chat_completion` to create an inference endpoint to perform a `chat_completion` task using AI21's model hosted on Google Model Garden with single streaming URL provided.", - "value": "{\n \"service\": \"googlevertexai\",\n \"service_settings\": {\n \"provider\": \"ai21\",\n \"service_account_json\": \"service-account-json\",\n \"streaming_url\": \"https://url:streamRawPredict\"\n }\n}" + "summary": "A chat_completion task for Google Model Garden Hugging Face dedicated endpoint with single streaming URL provided", + "description": "Run `PUT _inference/chat_completion/google_model_garden_hugging_face_chat_completion` to create an inference endpoint to perform a `chat_completion` task using Hugging Face's model hosted on Google Model Garden dedicated endpoint with single streaming URL provided. See the endpoint's `Sample request` page for the variable values used in the URL.", + "value": "{\n \"service\": \"googlevertexai\",\n \"service_settings\": {\n \"provider\": \"hugging_face\",\n \"service_account_json\": \"service-account-json\",\n \"streaming_url\": \"https://%ENDPOINT_ID%.%LOCATION_ID%-%PROJECT_ID%.prediction.vertexai.goog/v1/projects/%PROJECT_ID%/locations/%LOCATION_ID%/endpoints/%ENDPOINT_ID%/chat/completions\"\n }\n}" + }, + "PutGoogleVertexAiRequestExample13": { + "summary": "A completion task for Google Model Garden Hugging Face shared endpoint with single URL provided for both streaming and non-streaming tasks", + "description": "Run `PUT _inference/completion/google_model_garden_hugging_face_completion` to create an inference endpoint to perform a `completion` task using Hugging Face's model hosted on Google Model Garden shared endpoint with single URL provided for both streaming and non-streaming tasks. See the endpoint's `Sample request` page for the variable values used in the URL.", + "value": "{\n \"service\": \"googlevertexai\",\n \"service_settings\": {\n \"provider\": \"hugging_face\",\n \"service_account_json\": \"service-account-json\",\n \"url\": \"https://%LOCATION_ID%-aiplatform.googleapis.com/v1/projects/%PROJECT_ID%/locations/%LOCATION_ID%/endpoints/%ENDPOINT_ID%/chat/completions\"\n }\n}" + }, + "PutGoogleVertexAiRequestExample14": { + "summary": "A chat_completion task for Google Model Garden Hugging Face shared endpoint with single streaming URL provided", + "description": "Run `PUT _inference/chat_completion/google_model_garden_hugging_face_chat_completion` to create an inference endpoint to perform a `chat_completion` task using Hugging Face's model hosted on Google Model Garden shared endpoint with single streaming URL provided. See the endpoint's `Sample request` page for the variable values used in the URL.", + "value": "{\n \"service\": \"googlevertexai\",\n \"service_settings\": {\n \"provider\": \"hugging_face\",\n \"service_account_json\": \"service-account-json\",\n \"streaming_url\": \"https://%LOCATION_ID%-aiplatform.googleapis.com/v1/projects/%PROJECT_ID%/locations/%LOCATION_ID%/endpoints/%ENDPOINT_ID%/chat/completions\"\n }\n}" + }, + "PutGoogleVertexAiRequestExample15": { + "summary": "A completion task for Google Model Garden Mistral serverless endpoint with separate URLs for streaming and non-streaming tasks", + "description": "Run `PUT _inference/completion/google_model_garden_mistral_completion` to create an inference endpoint to perform a `completion` task using Mistral's serverless model hosted on Google Model Garden with separate URLs for streaming and non-streaming tasks. See the Mistral model documentation for instructions on how to construct URLs.", + "value": "{\n \"service\": \"googlevertexai\",\n \"service_settings\": {\n \"provider\": \"mistral\",\n \"model_id\": \"mistral-small-2503\",\n \"service_account_json\": \"service-account-json\",\n \"url\": \"https://%LOCATION_ID%-aiplatform.googleapis.com/v1/projects/%PROJECT_ID%/locations/%LOCATION_ID%/publishers/mistralai/models/%MODEL_ID%:rawPredict\",\n \"streaming_url\": \"https://%LOCATION_ID%-aiplatform.googleapis.com/v1/projects/%PROJECT_ID%/locations/%LOCATION_ID%/publishers/mistralai/models/%MODEL_ID%:streamRawPredict\"\n }\n}" + }, + "PutGoogleVertexAiRequestExample16": { + "summary": "A chat_completion task for Google Model Garden Mistral serverless endpoint with single streaming URL provided", + "description": "Run `PUT _inference/chat_completion/google_model_garden_mistral_chat_completion` to create an inference endpoint to perform a `chat_completion` task using Mistral's serverless model hosted on Google Model Garden with single streaming URL provided. See the Mistral model documentation for instructions on how to construct the URL.", + "value": "{\n \"service\": \"googlevertexai\",\n \"service_settings\": {\n \"provider\": \"mistral\",\n \"model_id\": \"mistral-small-2503\",\n \"service_account_json\": \"service-account-json\",\n \"streaming_url\": \"https://%LOCATION_ID%-aiplatform.googleapis.com/v1/projects/%PROJECT_ID%/locations/%LOCATION_ID%/publishers/mistralai/models/%MODEL_ID%:streamRawPredict\"\n }\n}" + }, + "PutGoogleVertexAiRequestExample17": { + "summary": "A completion task for Google Model Garden Mistral dedicated endpoint with single URL provided for both streaming and non-streaming tasks", + "description": "Run `PUT _inference/completion/google_model_garden_mistral_completion` to create an inference endpoint to perform a `completion` task using Mistral's model hosted on Google Model Garden dedicated endpoint with single URL provided for both streaming and non-streaming tasks. See the endpoint's `Sample request` page for the variable values used in the URL.", + "value": "{\n \"service\": \"googlevertexai\",\n \"service_settings\": {\n \"provider\": \"mistral\",\n \"service_account_json\": \"service-account-json\",\n \"url\": \"https://%ENDPOINT_ID%.%LOCATION_ID%-%PROJECT_ID%.prediction.vertexai.goog/v1/projects/%PROJECT_ID%/locations/%LOCATION_ID%/endpoints/%ENDPOINT_ID%/chat/completions\"\n }\n}" + }, + "PutGoogleVertexAiRequestExample18": { + "summary": "A chat_completion task for Google Model Garden Mistral dedicated endpoint with single streaming URL provided", + "description": "Run `PUT _inference/chat_completion/google_model_garden_mistral_chat_completion` to create an inference endpoint to perform a `chat_completion` task using Mistral's model hosted on Google Model Garden dedicated endpoint with single streaming URL provided. See the endpoint's `Sample request` page for the variable values used in the URL.", + "value": "{\n \"service\": \"googlevertexai\",\n \"service_settings\": {\n \"provider\": \"mistral\",\n \"service_account_json\": \"service-account-json\",\n \"streaming_url\": \"https://%ENDPOINT_ID%.%LOCATION_ID%-%PROJECT_ID%.prediction.vertexai.goog/v1/projects/%PROJECT_ID%/locations/%LOCATION_ID%/endpoints/%ENDPOINT_ID%/chat/completions\"\n }\n}" + }, + "PutGoogleVertexAiRequestExample19": { + "summary": "A completion task for Google Model Garden Mistral shared endpoint with single URL provided for both streaming and non-streaming tasks", + "description": "Run `PUT _inference/completion/google_model_garden_mistral_completion` to create an inference endpoint to perform a `completion` task using Mistral's model hosted on Google Model Garden shared endpoint with single URL provided for both streaming and non-streaming tasks. See the endpoint's `Sample request` page for the variable values used in the URL.", + "value": "{\n \"service\": \"googlevertexai\",\n \"service_settings\": {\n \"provider\": \"mistral\",\n \"service_account_json\": \"service-account-json\",\n \"url\": \"https://%LOCATION_ID%-aiplatform.googleapis.com/v1/projects/%PROJECT_ID%/locations/%LOCATION_ID%/endpoints/%ENDPOINT_ID%/chat/completions\"\n }\n}" }, "PutGoogleVertexAiRequestExample2": { "summary": "A rerank task", "description": "Run `PUT _inference/rerank/google_vertex_ai_rerank` to create an inference endpoint to perform a `rerank` task type.", "value": "{\n \"service\": \"googlevertexai\",\n \"service_settings\": {\n \"service_account_json\": \"service-account-json\",\n \"project_id\": \"project-id\"\n }\n}" }, + "PutGoogleVertexAiRequestExample20": { + "summary": "A chat_completion task for Google Model Garden Mistral shared endpoint with single streaming URL provided", + "description": "Run `PUT _inference/chat_completion/google_model_garden_mistral_chat_completion` to create an inference endpoint to perform a `chat_completion` task using Mistral's model hosted on Google Model Garden shared endpoint with single streaming URL provided. See the endpoint's `Sample request` page for the variable values used in the URL.", + "value": "{\n \"service\": \"googlevertexai\",\n \"service_settings\": {\n \"provider\": \"mistral\",\n \"service_account_json\": \"service-account-json\",\n \"streaming_url\": \"https://%LOCATION_ID%-aiplatform.googleapis.com/v1/projects/%PROJECT_ID%/locations/%LOCATION_ID%/endpoints/%ENDPOINT_ID%/chat/completions\"\n }\n}" + }, + "PutGoogleVertexAiRequestExample21": { + "summary": "A completion task for Google Model Garden AI21 serverless endpoint with separate URLs for streaming and non-streaming tasks", + "description": "Run `PUT _inference/completion/google_model_garden_ai21_completion` to create an inference endpoint to perform a `completion` task using AI21's model hosted on Google Model Garden serverless endpoint with separate URLs for streaming and non-streaming tasks. See the AI21 model documentation for instructions on how to construct URLs.", + "value": "{\n \"service\": \"googlevertexai\",\n \"service_settings\": {\n \"provider\": \"ai21\",\n \"service_account_json\": \"service-account-json\",\n \"url\": \"https://%LOCATION_ID%-aiplatform.googleapis.com/v1/projects/%PROJECT_ID%/locations/%LOCATION_ID%/publishers/ai21/models/%MODEL_ID%:rawPredict\",\n \"streaming_url\": \"https://%LOCATION_ID%-aiplatform.googleapis.com/v1/projects/%PROJECT_ID%/locations/%LOCATION_ID%/publishers/ai21/models/%MODEL_ID%:streamRawPredict\"\n }\n}" + }, + "PutGoogleVertexAiRequestExample22": { + "summary": "A chat_completion task for Google Model Garden AI21 serverless endpoint with single streaming URL provided", + "description": "Run `PUT _inference/chat_completion/google_model_garden_ai21_chat_completion` to create an inference endpoint to perform a `chat_completion` task using AI21's model hosted on Google Model Garden serverless endpoint with single streaming URL provided. See the AI21 model documentation for instructions on how to construct URLs.", + "value": "{\n \"service\": \"googlevertexai\",\n \"service_settings\": {\n \"provider\": \"ai21\",\n \"service_account_json\": \"service-account-json\",\n \"streaming_url\": \"https://%LOCATION_ID%-aiplatform.googleapis.com/v1/projects/%PROJECT_ID%/locations/%LOCATION_ID%/publishers/ai21/models/%MODEL_ID%:streamRawPredict\"\n }\n}" + }, "PutGoogleVertexAiRequestExample3": { - "summary": "A completion task for Google Model Garden Anthropic endpoint with separate URLs for streaming and non-streaming tasks", - "description": "Run `PUT _inference/completion/google_model_garden_anthropic_completion` to create an inference endpoint to perform a `completion` task using Anthropic's model hosted on Google Model Garden with separate URLs for streaming and non-streaming tasks.", - "value": "{\n \"service\": \"googlevertexai\",\n \"service_settings\": {\n \"provider\": \"anthropic\",\n \"service_account_json\": \"service-account-json\",\n \"url\": \"https://url:rawPredict\",\n \"streaming_url\": \"https://streaming_url:streamRawPredict\"\n },\n \"task_settings\": {\n \"max_tokens\": 128\n }\n}" + "summary": "A completion task for Google Model Garden Anthropic serverless endpoint with separate URLs for streaming and non-streaming tasks", + "description": "Run `PUT _inference/completion/google_model_garden_anthropic_completion` to create an inference endpoint to perform a `completion` task using Anthropic's serverless model hosted on Google Model Garden with separate URLs for streaming and non-streaming tasks. See the Anthropic model documentation for instructions on how to construct URLs.", + "value": "{\n \"service\": \"googlevertexai\",\n \"service_settings\": {\n \"provider\": \"anthropic\",\n \"service_account_json\": \"service-account-json\",\n \"url\": \"https://%LOCATION_ID%-aiplatform.googleapis.com/v1/projects/%PROJECT_ID%/locations/%LOCATION_ID%/publishers/anthropic/models/%MODEL_ID%:rawPredict\",\n \"streaming_url\": \"https://%LOCATION_ID%-aiplatform.googleapis.com/v1/projects/%PROJECT_ID%/locations/%LOCATION_ID%/publishers/anthropic/models/%MODEL_ID%:streamRawPredict\"\n },\n \"task_settings\": {\n \"max_tokens\": 128\n }\n}" }, "PutGoogleVertexAiRequestExample4": { - "summary": "A chat_completion task for Google Model Garden Anthropic endpoint with single streaming URL provided", - "description": "Run `PUT _inference/chat_completion/google_model_garden_anthropic_chat_completion` to create an inference endpoint to perform a `chat_completion` task using Anthropic's model hosted on Google Model Garden with single streaming URL provided.", - "value": "{\n \"service\": \"googlevertexai\",\n \"service_settings\": {\n \"provider\": \"anthropic\",\n \"service_account_json\": \"service-account-json\",\n \"streaming_url\": \"https://streaming_url:streamRawPredict\"\n },\n \"task_settings\": {\n \"max_tokens\": 128\n }\n}" + "summary": "A chat_completion task for Google Model Garden Anthropic serverless endpoint with single streaming URL provided", + "description": "Run `PUT _inference/chat_completion/google_model_garden_anthropic_chat_completion` to create an inference endpoint to perform a `chat_completion` task using Anthropic's serverless model hosted on Google Model Garden with single streaming URL provided. See the Anthropic model documentation for instructions on how to construct the URL.", + "value": "{\n \"service\": \"googlevertexai\",\n \"service_settings\": {\n \"provider\": \"anthropic\",\n \"service_account_json\": \"service-account-json\",\n \"streaming_url\": \"https://%LOCATION_ID%-aiplatform.googleapis.com/v1/projects/%PROJECT_ID%/locations/%LOCATION_ID%/publishers/anthropic/models/%MODEL_ID%:streamRawPredict\"\n },\n \"task_settings\": {\n \"max_tokens\": 128\n }\n}" }, "PutGoogleVertexAiRequestExample5": { - "summary": "A completion task for Google Model Garden Meta endpoint with single URL provided for both streaming and non-streaming tasks", - "description": "Run `PUT _inference/completion/google_model_garden_meta_completion` to create an inference endpoint to perform a `completion` task using Meta's model hosted on Google Model Garden with single URL provided for both streaming and non-streaming tasks.", - "value": "{\n \"service\": \"googlevertexai\",\n \"service_settings\": {\n \"provider\": \"meta\",\n \"model_id\": \"meta/llama-3.3-70b-instruct-maas\",\n \"service_account_json\": \"service-account-json\",\n \"url\": \"https://url/openapi/chat/completions\"\n }\n}" + "summary": "A completion task for Google Model Garden Meta serverless endpoint with single URL provided for both streaming and non-streaming tasks", + "description": "Run `PUT _inference/completion/google_model_garden_meta_completion` to create an inference endpoint to perform a `completion` task using Meta's serverless model hosted on Google Model Garden with single URL provided for both streaming and non-streaming tasks. See the Meta model documentation for instructions on how to construct the URL.", + "value": "{\n \"service\": \"googlevertexai\",\n \"service_settings\": {\n \"provider\": \"meta\",\n \"model_id\": \"meta/llama-3.3-70b-instruct-maas\",\n \"service_account_json\": \"service-account-json\",\n \"url\": \"https://%LOCATION_ID%-aiplatform.googleapis.com/v1/projects/%PROJECT_ID%/locations/%LOCATION_ID%/endpoints/openapi/chat/completions\"\n }\n}" }, "PutGoogleVertexAiRequestExample6": { - "summary": "A chat_completion task for Google Model Garden Meta endpoint with single streaming URL provided", - "description": "Run `PUT _inference/chat_completion/google_model_garden_meta_chat_completion` to create an inference endpoint to perform a `chat_completion` task using Meta's model hosted on Google Model Garden with single streaming URL provided.", - "value": "{\n \"service\": \"googlevertexai\",\n \"service_settings\": {\n \"provider\": \"meta\",\n \"model_id\": \"meta/llama-3.3-70b-instruct-maas\",\n \"service_account_json\": \"service-account-json\",\n \"streaming_url\": \"https://url/openapi/chat/completions\"\n }\n}" + "summary": "A chat_completion task for Google Model Garden Meta serverless endpoint with single streaming URL provided", + "description": "Run `PUT _inference/chat_completion/google_model_garden_meta_chat_completion` to create an inference endpoint to perform a `chat_completion` task using Meta's serverless model hosted on Google Model Garden with single streaming URL provided. See the Meta model documentation for instructions on how to construct the URL.", + "value": "{\n \"service\": \"googlevertexai\",\n \"service_settings\": {\n \"provider\": \"meta\",\n \"model_id\": \"meta/llama-3.3-70b-instruct-maas\",\n \"service_account_json\": \"service-account-json\",\n \"streaming_url\": \"https://%LOCATION_ID%-aiplatform.googleapis.com/v1/projects/%PROJECT_ID%/locations/%LOCATION_ID%/endpoints/openapi/chat/completions\"\n }\n}" }, "PutGoogleVertexAiRequestExample7": { - "summary": "A completion task for Google Model Garden Hugging Face endpoint with single URL provided for both streaming and non-streaming tasks", - "description": "Run `PUT _inference/completion/google_model_garden_hugging_face_completion` to create an inference endpoint to perform a `completion` task using Hugging Face's model hosted on Google Model Garden with single URL provided for both streaming and non-streaming tasks.", - "value": "{\n \"service\": \"googlevertexai\",\n \"service_settings\": {\n \"provider\": \"hugging_face\",\n \"service_account_json\": \"service-account-json\",\n \"url\": \"https://url/chat/completions\"\n }\n}" + "summary": "A completion task for Google Model Garden Meta dedicated endpoint with single URL provided for both streaming and non-streaming tasks", + "description": "Run `PUT _inference/completion/google_model_garden_meta_completion` to create an inference endpoint to perform a `completion` task using Meta's model hosted on Google Model Garden dedicated endpoint with single URL provided for both streaming and non-streaming tasks. See the endpoint's `Sample request` page for the variable values used in the URL.", + "value": "{\n \"service\": \"googlevertexai\",\n \"service_settings\": {\n \"provider\": \"meta\",\n \"service_account_json\": \"service-account-json\",\n \"url\": \"https://%ENDPOINT_ID%.%LOCATION_ID%-fasttryout.prediction.vertexai.goog/v1/projects/%PROJECT_ID%/locations/%LOCATION_ID%/endpoints/%ENDPOINT_ID%/chat/completions\"\n }\n}" }, "PutGoogleVertexAiRequestExample8": { - "summary": "A chat_completion task for Google Model Garden Hugging Face endpoint with single streaming URL provided", - "description": "Run `PUT _inference/chat_completion/google_model_garden_hugging_face_chat_completion` to create an inference endpoint to perform a `chat_completion` task using Hugging Face's model hosted on Google Model Garden with single streaming URL provided.", - "value": "{\n \"service\": \"googlevertexai\",\n \"service_settings\": {\n \"provider\": \"hugging_face\",\n \"service_account_json\": \"service-account-json\",\n \"streaming_url\": \"https://url/chat/completions\"\n }\n}" + "summary": "A chat_completion task for Google Model Garden Meta dedicated endpoint with single streaming URL provided", + "description": "Run `PUT _inference/chat_completion/google_model_garden_meta_chat_completion` to create an inference endpoint to perform a `chat_completion` task using Meta's model hosted on Google Model Garden dedicated endpoint with single streaming URL provided. See the endpoint's `Sample request` page for the variable values used in the URL.", + "value": "{\n \"service\": \"googlevertexai\",\n \"service_settings\": {\n \"provider\": \"meta\",\n \"service_account_json\": \"service-account-json\",\n \"streaming_url\": \"https://%ENDPOINT_ID%.%LOCATION_ID%-fasttryout.prediction.vertexai.goog/v1/projects/%PROJECT_ID%/locations/%LOCATION_ID%/endpoints/%ENDPOINT_ID%/chat/completions\"\n }\n}" }, "PutGoogleVertexAiRequestExample9": { - "summary": "A completion task for Google Model Garden Mistral endpoint with separate URLs for streaming and non-streaming tasks", - "description": "Run `PUT _inference/completion/google_model_garden_mistral_completion` to create an inference endpoint to perform a `completion` task using Mistral's model hosted on Google Model Garden with separate URLs for streaming and non-streaming tasks.", - "value": "{\n \"service\": \"googlevertexai\",\n \"service_settings\": {\n \"provider\": \"mistral\",\n \"model_id\": \"mistral-small-2503\",\n \"service_account_json\": \"service-account-json\",\n \"url\": \"https://url:rawPredict\",\n \"streaming_url\": \"https://url:streamRawPredict\"\n }\n}" + "summary": "A completion task for Google Model Garden Meta shared endpoint with single URL provided for both streaming and non-streaming tasks", + "description": "Run `PUT _inference/completion/google_model_garden_meta_completion` to create an inference endpoint to perform a `completion` task using Meta's model hosted on Google Model Garden shared endpoint with single URL provided for both streaming and non-streaming tasks. See the endpoint's `Sample request` page for the variable values used in the URL.", + "value": "{\n \"service\": \"googlevertexai\",\n \"service_settings\": {\n \"provider\": \"meta\",\n \"service_account_json\": \"service-account-json\",\n \"url\": \"https://%LOCATION_ID%-aiplatform.googleapis.com/v1/projects/%PROJECT_ID%/locations/%LOCATION_ID%/endpoints/%ENDPOINT_ID%/chat/completions\"\n }\n}" } } } @@ -103077,11 +103127,11 @@ ] }, "url": { - "description": "The URL for non-streaming `completion` requests to a Google Model Garden provider endpoint.\nIf both `url` and `streaming_url` are provided, each is used for its respective mode.\nIf `streaming_url` is not provided, `url` is also used for streaming `completion` and `chat_completion`.\nIf `provider` is not provided or set to `google` (Google Vertex AI), do not set `url` (or `streaming_url`).\nAt least one of `url` or `streaming_url` must be provided for Google Model Garden endpoint usage.\nCertain providers require separate URLs for streaming and non-streaming operations (e.g., Anthropic, Mistral, AI21). Others support both operation types through a single URL (e.g., Meta, Hugging Face).", + "description": "The URL for non-streaming `completion` requests to a Google Model Garden provider endpoint.\nIf both `url` and `streaming_url` are provided, each is used for its respective mode.\nIf `streaming_url` is not provided, `url` is also used for streaming `completion` and `chat_completion`.\nIf `provider` is not provided or set to `google` (Google Vertex AI), do not set `url` (or `streaming_url`).\nAt least one of `url` or `streaming_url` must be provided for Google Model Garden endpoint usage.\nCertain providers require separate URLs for streaming and non-streaming operations (e.g., Anthropic, Mistral, AI21). Others support both operation types through a single URL (e.g., Meta, Hugging Face).\nInformation on constructing the URL for various providers can be found in the Google Model Garden documentation for the model, or on the endpoint’s `Sample request` page. The request examples also illustrate the proper formatting for the `url`.", "type": "string" }, "streaming_url": { - "description": "The URL for streaming `completion` and `chat_completion` requests to a Google Model Garden provider endpoint.\nIf both `streaming_url` and `url` are provided, each is used for its respective mode.\nIf `url` is not provided, `streaming_url` is also used for non-streaming `completion` requests.\nIf `provider` is not provided or set to `google` (Google Vertex AI), do not set `streaming_url` (or `url`).\nAt least one of `streaming_url` or `url` must be provided for Google Model Garden endpoint usage.\nCertain providers require separate URLs for streaming and non-streaming operations (e.g., Anthropic, Mistral, AI21). Others support both operation types through a single URL (e.g., Meta, Hugging Face).", + "description": "The URL for streaming `completion` and `chat_completion` requests to a Google Model Garden provider endpoint.\nIf both `streaming_url` and `url` are provided, each is used for its respective mode.\nIf `url` is not provided, `streaming_url` is also used for non-streaming `completion` requests.\nIf `provider` is not provided or set to `google` (Google Vertex AI), do not set `streaming_url` (or `url`).\nAt least one of `streaming_url` or `url` must be provided for Google Model Garden endpoint usage.\nCertain providers require separate URLs for streaming and non-streaming operations (e.g., Anthropic, Mistral, AI21). Others support both operation types through a single URL (e.g., Meta, Hugging Face).\nInformation on constructing the URL for various providers can be found in the Google Model Garden documentation for the model, or on the endpoint’s `Sample request` page. The request examples also illustrate the proper formatting for the `streaming_url`.", "type": "string" }, "location": { diff --git a/output/openapi/elasticsearch-serverless-openapi.json b/output/openapi/elasticsearch-serverless-openapi.json index f8a68eeab1..7c98738466 100644 --- a/output/openapi/elasticsearch-serverless-openapi.json +++ b/output/openapi/elasticsearch-serverless-openapi.json @@ -13729,59 +13729,109 @@ "value": "{\n \"service\": \"googlevertexai\",\n \"service_settings\": {\n \"service_account_json\": \"service-account-json\",\n \"model_id\": \"model-id\",\n \"location\": \"location\",\n \"project_id\": \"project-id\"\n }\n}" }, "PutGoogleVertexAiRequestExample10": { - "summary": "A chat_completion task for Google Model Garden Mistral endpoint with single streaming URL provided", - "description": "Run `PUT _inference/chat_completion/google_model_garden_mistral_chat_completion` to create an inference endpoint to perform a `chat_completion` task using Mistral's model hosted on Google Model Garden with single streaming URL provided.", - "value": "{\n \"service\": \"googlevertexai\",\n \"service_settings\": {\n \"provider\": \"mistral\",\n \"model_id\": \"mistral-small-2503\",\n \"service_account_json\": \"service-account-json\",\n \"streaming_url\": \"https://url:streamRawPredict\"\n }\n}" + "summary": "A chat_completion task for Google Model Garden Meta shared endpoint with single streaming URL provided", + "description": "Run `PUT _inference/chat_completion/google_model_garden_meta_chat_completion` to create an inference endpoint to perform a `chat_completion` task using Meta's model hosted on Google Model Garden shared endpoint with single streaming URL provided. See the endpoint's `Sample request` page for the variable values used in the URL.", + "value": "{\n \"service\": \"googlevertexai\",\n \"service_settings\": {\n \"provider\": \"meta\",\n \"service_account_json\": \"service-account-json\",\n \"streaming_url\": \"https://%LOCATION_ID%-aiplatform.googleapis.com/v1/projects/%PROJECT_ID%/locations/%LOCATION_ID%/endpoints/%ENDPOINT_ID%/chat/completions\"\n }\n}" }, "PutGoogleVertexAiRequestExample11": { - "summary": "A completion task for Google Model Garden AI21 endpoint with separate URLs for streaming and non-streaming tasks", - "description": "Run `PUT _inference/completion/google_model_garden_ai21_completion` to create an inference endpoint to perform a `completion` task using AI21's model hosted on Google Model Garden with separate URLs for streaming and non-streaming tasks.", - "value": "{\n \"service\": \"googlevertexai\",\n \"service_settings\": {\n \"provider\": \"ai21\",\n \"service_account_json\": \"service-account-json\",\n \"url\": \"https://url:rawPredict\",\n \"streaming_url\": \"https://url:streamRawPredict\"\n }\n}" + "summary": "A completion task for Google Model Garden Hugging Face dedicated endpoint with single URL provided for both streaming and non-streaming tasks", + "description": "Run `PUT _inference/completion/google_model_garden_hugging_face_completion` to create an inference endpoint to perform a `completion` task using Hugging Face's model hosted on Google Model Garden dedicated endpoint with single URL provided for both streaming and non-streaming tasks. See the endpoint's `Sample request` page for the variable values used in the URL.", + "value": "{\n \"service\": \"googlevertexai\",\n \"service_settings\": {\n \"provider\": \"hugging_face\",\n \"service_account_json\": \"service-account-json\",\n \"url\": \"https://%ENDPOINT_ID%.%LOCATION_ID%-%PROJECT_ID%.prediction.vertexai.goog/v1/projects/%PROJECT_ID%/locations/%LOCATION_ID%/endpoints/%ENDPOINT_ID%/chat/completions\"\n }\n}" }, "PutGoogleVertexAiRequestExample12": { - "summary": "A chat_completion task for Google Model Garden AI21 endpoint with single streaming URL provided", - "description": "Run `PUT _inference/chat_completion/google_model_garden_ai21_chat_completion` to create an inference endpoint to perform a `chat_completion` task using AI21's model hosted on Google Model Garden with single streaming URL provided.", - "value": "{\n \"service\": \"googlevertexai\",\n \"service_settings\": {\n \"provider\": \"ai21\",\n \"service_account_json\": \"service-account-json\",\n \"streaming_url\": \"https://url:streamRawPredict\"\n }\n}" + "summary": "A chat_completion task for Google Model Garden Hugging Face dedicated endpoint with single streaming URL provided", + "description": "Run `PUT _inference/chat_completion/google_model_garden_hugging_face_chat_completion` to create an inference endpoint to perform a `chat_completion` task using Hugging Face's model hosted on Google Model Garden dedicated endpoint with single streaming URL provided. See the endpoint's `Sample request` page for the variable values used in the URL.", + "value": "{\n \"service\": \"googlevertexai\",\n \"service_settings\": {\n \"provider\": \"hugging_face\",\n \"service_account_json\": \"service-account-json\",\n \"streaming_url\": \"https://%ENDPOINT_ID%.%LOCATION_ID%-%PROJECT_ID%.prediction.vertexai.goog/v1/projects/%PROJECT_ID%/locations/%LOCATION_ID%/endpoints/%ENDPOINT_ID%/chat/completions\"\n }\n}" + }, + "PutGoogleVertexAiRequestExample13": { + "summary": "A completion task for Google Model Garden Hugging Face shared endpoint with single URL provided for both streaming and non-streaming tasks", + "description": "Run `PUT _inference/completion/google_model_garden_hugging_face_completion` to create an inference endpoint to perform a `completion` task using Hugging Face's model hosted on Google Model Garden shared endpoint with single URL provided for both streaming and non-streaming tasks. See the endpoint's `Sample request` page for the variable values used in the URL.", + "value": "{\n \"service\": \"googlevertexai\",\n \"service_settings\": {\n \"provider\": \"hugging_face\",\n \"service_account_json\": \"service-account-json\",\n \"url\": \"https://%LOCATION_ID%-aiplatform.googleapis.com/v1/projects/%PROJECT_ID%/locations/%LOCATION_ID%/endpoints/%ENDPOINT_ID%/chat/completions\"\n }\n}" + }, + "PutGoogleVertexAiRequestExample14": { + "summary": "A chat_completion task for Google Model Garden Hugging Face shared endpoint with single streaming URL provided", + "description": "Run `PUT _inference/chat_completion/google_model_garden_hugging_face_chat_completion` to create an inference endpoint to perform a `chat_completion` task using Hugging Face's model hosted on Google Model Garden shared endpoint with single streaming URL provided. See the endpoint's `Sample request` page for the variable values used in the URL.", + "value": "{\n \"service\": \"googlevertexai\",\n \"service_settings\": {\n \"provider\": \"hugging_face\",\n \"service_account_json\": \"service-account-json\",\n \"streaming_url\": \"https://%LOCATION_ID%-aiplatform.googleapis.com/v1/projects/%PROJECT_ID%/locations/%LOCATION_ID%/endpoints/%ENDPOINT_ID%/chat/completions\"\n }\n}" + }, + "PutGoogleVertexAiRequestExample15": { + "summary": "A completion task for Google Model Garden Mistral serverless endpoint with separate URLs for streaming and non-streaming tasks", + "description": "Run `PUT _inference/completion/google_model_garden_mistral_completion` to create an inference endpoint to perform a `completion` task using Mistral's serverless model hosted on Google Model Garden with separate URLs for streaming and non-streaming tasks. See the Mistral model documentation for instructions on how to construct URLs.", + "value": "{\n \"service\": \"googlevertexai\",\n \"service_settings\": {\n \"provider\": \"mistral\",\n \"model_id\": \"mistral-small-2503\",\n \"service_account_json\": \"service-account-json\",\n \"url\": \"https://%LOCATION_ID%-aiplatform.googleapis.com/v1/projects/%PROJECT_ID%/locations/%LOCATION_ID%/publishers/mistralai/models/%MODEL_ID%:rawPredict\",\n \"streaming_url\": \"https://%LOCATION_ID%-aiplatform.googleapis.com/v1/projects/%PROJECT_ID%/locations/%LOCATION_ID%/publishers/mistralai/models/%MODEL_ID%:streamRawPredict\"\n }\n}" + }, + "PutGoogleVertexAiRequestExample16": { + "summary": "A chat_completion task for Google Model Garden Mistral serverless endpoint with single streaming URL provided", + "description": "Run `PUT _inference/chat_completion/google_model_garden_mistral_chat_completion` to create an inference endpoint to perform a `chat_completion` task using Mistral's serverless model hosted on Google Model Garden with single streaming URL provided. See the Mistral model documentation for instructions on how to construct the URL.", + "value": "{\n \"service\": \"googlevertexai\",\n \"service_settings\": {\n \"provider\": \"mistral\",\n \"model_id\": \"mistral-small-2503\",\n \"service_account_json\": \"service-account-json\",\n \"streaming_url\": \"https://%LOCATION_ID%-aiplatform.googleapis.com/v1/projects/%PROJECT_ID%/locations/%LOCATION_ID%/publishers/mistralai/models/%MODEL_ID%:streamRawPredict\"\n }\n}" + }, + "PutGoogleVertexAiRequestExample17": { + "summary": "A completion task for Google Model Garden Mistral dedicated endpoint with single URL provided for both streaming and non-streaming tasks", + "description": "Run `PUT _inference/completion/google_model_garden_mistral_completion` to create an inference endpoint to perform a `completion` task using Mistral's model hosted on Google Model Garden dedicated endpoint with single URL provided for both streaming and non-streaming tasks. See the endpoint's `Sample request` page for the variable values used in the URL.", + "value": "{\n \"service\": \"googlevertexai\",\n \"service_settings\": {\n \"provider\": \"mistral\",\n \"service_account_json\": \"service-account-json\",\n \"url\": \"https://%ENDPOINT_ID%.%LOCATION_ID%-%PROJECT_ID%.prediction.vertexai.goog/v1/projects/%PROJECT_ID%/locations/%LOCATION_ID%/endpoints/%ENDPOINT_ID%/chat/completions\"\n }\n}" + }, + "PutGoogleVertexAiRequestExample18": { + "summary": "A chat_completion task for Google Model Garden Mistral dedicated endpoint with single streaming URL provided", + "description": "Run `PUT _inference/chat_completion/google_model_garden_mistral_chat_completion` to create an inference endpoint to perform a `chat_completion` task using Mistral's model hosted on Google Model Garden dedicated endpoint with single streaming URL provided. See the endpoint's `Sample request` page for the variable values used in the URL.", + "value": "{\n \"service\": \"googlevertexai\",\n \"service_settings\": {\n \"provider\": \"mistral\",\n \"service_account_json\": \"service-account-json\",\n \"streaming_url\": \"https://%ENDPOINT_ID%.%LOCATION_ID%-%PROJECT_ID%.prediction.vertexai.goog/v1/projects/%PROJECT_ID%/locations/%LOCATION_ID%/endpoints/%ENDPOINT_ID%/chat/completions\"\n }\n}" + }, + "PutGoogleVertexAiRequestExample19": { + "summary": "A completion task for Google Model Garden Mistral shared endpoint with single URL provided for both streaming and non-streaming tasks", + "description": "Run `PUT _inference/completion/google_model_garden_mistral_completion` to create an inference endpoint to perform a `completion` task using Mistral's model hosted on Google Model Garden shared endpoint with single URL provided for both streaming and non-streaming tasks. See the endpoint's `Sample request` page for the variable values used in the URL.", + "value": "{\n \"service\": \"googlevertexai\",\n \"service_settings\": {\n \"provider\": \"mistral\",\n \"service_account_json\": \"service-account-json\",\n \"url\": \"https://%LOCATION_ID%-aiplatform.googleapis.com/v1/projects/%PROJECT_ID%/locations/%LOCATION_ID%/endpoints/%ENDPOINT_ID%/chat/completions\"\n }\n}" }, "PutGoogleVertexAiRequestExample2": { "summary": "A rerank task", "description": "Run `PUT _inference/rerank/google_vertex_ai_rerank` to create an inference endpoint to perform a `rerank` task type.", "value": "{\n \"service\": \"googlevertexai\",\n \"service_settings\": {\n \"service_account_json\": \"service-account-json\",\n \"project_id\": \"project-id\"\n }\n}" }, + "PutGoogleVertexAiRequestExample20": { + "summary": "A chat_completion task for Google Model Garden Mistral shared endpoint with single streaming URL provided", + "description": "Run `PUT _inference/chat_completion/google_model_garden_mistral_chat_completion` to create an inference endpoint to perform a `chat_completion` task using Mistral's model hosted on Google Model Garden shared endpoint with single streaming URL provided. See the endpoint's `Sample request` page for the variable values used in the URL.", + "value": "{\n \"service\": \"googlevertexai\",\n \"service_settings\": {\n \"provider\": \"mistral\",\n \"service_account_json\": \"service-account-json\",\n \"streaming_url\": \"https://%LOCATION_ID%-aiplatform.googleapis.com/v1/projects/%PROJECT_ID%/locations/%LOCATION_ID%/endpoints/%ENDPOINT_ID%/chat/completions\"\n }\n}" + }, + "PutGoogleVertexAiRequestExample21": { + "summary": "A completion task for Google Model Garden AI21 serverless endpoint with separate URLs for streaming and non-streaming tasks", + "description": "Run `PUT _inference/completion/google_model_garden_ai21_completion` to create an inference endpoint to perform a `completion` task using AI21's model hosted on Google Model Garden serverless endpoint with separate URLs for streaming and non-streaming tasks. See the AI21 model documentation for instructions on how to construct URLs.", + "value": "{\n \"service\": \"googlevertexai\",\n \"service_settings\": {\n \"provider\": \"ai21\",\n \"service_account_json\": \"service-account-json\",\n \"url\": \"https://%LOCATION_ID%-aiplatform.googleapis.com/v1/projects/%PROJECT_ID%/locations/%LOCATION_ID%/publishers/ai21/models/%MODEL_ID%:rawPredict\",\n \"streaming_url\": \"https://%LOCATION_ID%-aiplatform.googleapis.com/v1/projects/%PROJECT_ID%/locations/%LOCATION_ID%/publishers/ai21/models/%MODEL_ID%:streamRawPredict\"\n }\n}" + }, + "PutGoogleVertexAiRequestExample22": { + "summary": "A chat_completion task for Google Model Garden AI21 serverless endpoint with single streaming URL provided", + "description": "Run `PUT _inference/chat_completion/google_model_garden_ai21_chat_completion` to create an inference endpoint to perform a `chat_completion` task using AI21's model hosted on Google Model Garden serverless endpoint with single streaming URL provided. See the AI21 model documentation for instructions on how to construct URLs.", + "value": "{\n \"service\": \"googlevertexai\",\n \"service_settings\": {\n \"provider\": \"ai21\",\n \"service_account_json\": \"service-account-json\",\n \"streaming_url\": \"https://%LOCATION_ID%-aiplatform.googleapis.com/v1/projects/%PROJECT_ID%/locations/%LOCATION_ID%/publishers/ai21/models/%MODEL_ID%:streamRawPredict\"\n }\n}" + }, "PutGoogleVertexAiRequestExample3": { - "summary": "A completion task for Google Model Garden Anthropic endpoint with separate URLs for streaming and non-streaming tasks", - "description": "Run `PUT _inference/completion/google_model_garden_anthropic_completion` to create an inference endpoint to perform a `completion` task using Anthropic's model hosted on Google Model Garden with separate URLs for streaming and non-streaming tasks.", - "value": "{\n \"service\": \"googlevertexai\",\n \"service_settings\": {\n \"provider\": \"anthropic\",\n \"service_account_json\": \"service-account-json\",\n \"url\": \"https://url:rawPredict\",\n \"streaming_url\": \"https://streaming_url:streamRawPredict\"\n },\n \"task_settings\": {\n \"max_tokens\": 128\n }\n}" + "summary": "A completion task for Google Model Garden Anthropic serverless endpoint with separate URLs for streaming and non-streaming tasks", + "description": "Run `PUT _inference/completion/google_model_garden_anthropic_completion` to create an inference endpoint to perform a `completion` task using Anthropic's serverless model hosted on Google Model Garden with separate URLs for streaming and non-streaming tasks. See the Anthropic model documentation for instructions on how to construct URLs.", + "value": "{\n \"service\": \"googlevertexai\",\n \"service_settings\": {\n \"provider\": \"anthropic\",\n \"service_account_json\": \"service-account-json\",\n \"url\": \"https://%LOCATION_ID%-aiplatform.googleapis.com/v1/projects/%PROJECT_ID%/locations/%LOCATION_ID%/publishers/anthropic/models/%MODEL_ID%:rawPredict\",\n \"streaming_url\": \"https://%LOCATION_ID%-aiplatform.googleapis.com/v1/projects/%PROJECT_ID%/locations/%LOCATION_ID%/publishers/anthropic/models/%MODEL_ID%:streamRawPredict\"\n },\n \"task_settings\": {\n \"max_tokens\": 128\n }\n}" }, "PutGoogleVertexAiRequestExample4": { - "summary": "A chat_completion task for Google Model Garden Anthropic endpoint with single streaming URL provided", - "description": "Run `PUT _inference/chat_completion/google_model_garden_anthropic_chat_completion` to create an inference endpoint to perform a `chat_completion` task using Anthropic's model hosted on Google Model Garden with single streaming URL provided.", - "value": "{\n \"service\": \"googlevertexai\",\n \"service_settings\": {\n \"provider\": \"anthropic\",\n \"service_account_json\": \"service-account-json\",\n \"streaming_url\": \"https://streaming_url:streamRawPredict\"\n },\n \"task_settings\": {\n \"max_tokens\": 128\n }\n}" + "summary": "A chat_completion task for Google Model Garden Anthropic serverless endpoint with single streaming URL provided", + "description": "Run `PUT _inference/chat_completion/google_model_garden_anthropic_chat_completion` to create an inference endpoint to perform a `chat_completion` task using Anthropic's serverless model hosted on Google Model Garden with single streaming URL provided. See the Anthropic model documentation for instructions on how to construct the URL.", + "value": "{\n \"service\": \"googlevertexai\",\n \"service_settings\": {\n \"provider\": \"anthropic\",\n \"service_account_json\": \"service-account-json\",\n \"streaming_url\": \"https://%LOCATION_ID%-aiplatform.googleapis.com/v1/projects/%PROJECT_ID%/locations/%LOCATION_ID%/publishers/anthropic/models/%MODEL_ID%:streamRawPredict\"\n },\n \"task_settings\": {\n \"max_tokens\": 128\n }\n}" }, "PutGoogleVertexAiRequestExample5": { - "summary": "A completion task for Google Model Garden Meta endpoint with single URL provided for both streaming and non-streaming tasks", - "description": "Run `PUT _inference/completion/google_model_garden_meta_completion` to create an inference endpoint to perform a `completion` task using Meta's model hosted on Google Model Garden with single URL provided for both streaming and non-streaming tasks.", - "value": "{\n \"service\": \"googlevertexai\",\n \"service_settings\": {\n \"provider\": \"meta\",\n \"model_id\": \"meta/llama-3.3-70b-instruct-maas\",\n \"service_account_json\": \"service-account-json\",\n \"url\": \"https://url/openapi/chat/completions\"\n }\n}" + "summary": "A completion task for Google Model Garden Meta serverless endpoint with single URL provided for both streaming and non-streaming tasks", + "description": "Run `PUT _inference/completion/google_model_garden_meta_completion` to create an inference endpoint to perform a `completion` task using Meta's serverless model hosted on Google Model Garden with single URL provided for both streaming and non-streaming tasks. See the Meta model documentation for instructions on how to construct the URL.", + "value": "{\n \"service\": \"googlevertexai\",\n \"service_settings\": {\n \"provider\": \"meta\",\n \"model_id\": \"meta/llama-3.3-70b-instruct-maas\",\n \"service_account_json\": \"service-account-json\",\n \"url\": \"https://%LOCATION_ID%-aiplatform.googleapis.com/v1/projects/%PROJECT_ID%/locations/%LOCATION_ID%/endpoints/openapi/chat/completions\"\n }\n}" }, "PutGoogleVertexAiRequestExample6": { - "summary": "A chat_completion task for Google Model Garden Meta endpoint with single streaming URL provided", - "description": "Run `PUT _inference/chat_completion/google_model_garden_meta_chat_completion` to create an inference endpoint to perform a `chat_completion` task using Meta's model hosted on Google Model Garden with single streaming URL provided.", - "value": "{\n \"service\": \"googlevertexai\",\n \"service_settings\": {\n \"provider\": \"meta\",\n \"model_id\": \"meta/llama-3.3-70b-instruct-maas\",\n \"service_account_json\": \"service-account-json\",\n \"streaming_url\": \"https://url/openapi/chat/completions\"\n }\n}" + "summary": "A chat_completion task for Google Model Garden Meta serverless endpoint with single streaming URL provided", + "description": "Run `PUT _inference/chat_completion/google_model_garden_meta_chat_completion` to create an inference endpoint to perform a `chat_completion` task using Meta's serverless model hosted on Google Model Garden with single streaming URL provided. See the Meta model documentation for instructions on how to construct the URL.", + "value": "{\n \"service\": \"googlevertexai\",\n \"service_settings\": {\n \"provider\": \"meta\",\n \"model_id\": \"meta/llama-3.3-70b-instruct-maas\",\n \"service_account_json\": \"service-account-json\",\n \"streaming_url\": \"https://%LOCATION_ID%-aiplatform.googleapis.com/v1/projects/%PROJECT_ID%/locations/%LOCATION_ID%/endpoints/openapi/chat/completions\"\n }\n}" }, "PutGoogleVertexAiRequestExample7": { - "summary": "A completion task for Google Model Garden Hugging Face endpoint with single URL provided for both streaming and non-streaming tasks", - "description": "Run `PUT _inference/completion/google_model_garden_hugging_face_completion` to create an inference endpoint to perform a `completion` task using Hugging Face's model hosted on Google Model Garden with single URL provided for both streaming and non-streaming tasks.", - "value": "{\n \"service\": \"googlevertexai\",\n \"service_settings\": {\n \"provider\": \"hugging_face\",\n \"service_account_json\": \"service-account-json\",\n \"url\": \"https://url/chat/completions\"\n }\n}" + "summary": "A completion task for Google Model Garden Meta dedicated endpoint with single URL provided for both streaming and non-streaming tasks", + "description": "Run `PUT _inference/completion/google_model_garden_meta_completion` to create an inference endpoint to perform a `completion` task using Meta's model hosted on Google Model Garden dedicated endpoint with single URL provided for both streaming and non-streaming tasks. See the endpoint's `Sample request` page for the variable values used in the URL.", + "value": "{\n \"service\": \"googlevertexai\",\n \"service_settings\": {\n \"provider\": \"meta\",\n \"service_account_json\": \"service-account-json\",\n \"url\": \"https://%ENDPOINT_ID%.%LOCATION_ID%-fasttryout.prediction.vertexai.goog/v1/projects/%PROJECT_ID%/locations/%LOCATION_ID%/endpoints/%ENDPOINT_ID%/chat/completions\"\n }\n}" }, "PutGoogleVertexAiRequestExample8": { - "summary": "A chat_completion task for Google Model Garden Hugging Face endpoint with single streaming URL provided", - "description": "Run `PUT _inference/chat_completion/google_model_garden_hugging_face_chat_completion` to create an inference endpoint to perform a `chat_completion` task using Hugging Face's model hosted on Google Model Garden with single streaming URL provided.", - "value": "{\n \"service\": \"googlevertexai\",\n \"service_settings\": {\n \"provider\": \"hugging_face\",\n \"service_account_json\": \"service-account-json\",\n \"streaming_url\": \"https://url/chat/completions\"\n }\n}" + "summary": "A chat_completion task for Google Model Garden Meta dedicated endpoint with single streaming URL provided", + "description": "Run `PUT _inference/chat_completion/google_model_garden_meta_chat_completion` to create an inference endpoint to perform a `chat_completion` task using Meta's model hosted on Google Model Garden dedicated endpoint with single streaming URL provided. See the endpoint's `Sample request` page for the variable values used in the URL.", + "value": "{\n \"service\": \"googlevertexai\",\n \"service_settings\": {\n \"provider\": \"meta\",\n \"service_account_json\": \"service-account-json\",\n \"streaming_url\": \"https://%ENDPOINT_ID%.%LOCATION_ID%-fasttryout.prediction.vertexai.goog/v1/projects/%PROJECT_ID%/locations/%LOCATION_ID%/endpoints/%ENDPOINT_ID%/chat/completions\"\n }\n}" }, "PutGoogleVertexAiRequestExample9": { - "summary": "A completion task for Google Model Garden Mistral endpoint with separate URLs for streaming and non-streaming tasks", - "description": "Run `PUT _inference/completion/google_model_garden_mistral_completion` to create an inference endpoint to perform a `completion` task using Mistral's model hosted on Google Model Garden with separate URLs for streaming and non-streaming tasks.", - "value": "{\n \"service\": \"googlevertexai\",\n \"service_settings\": {\n \"provider\": \"mistral\",\n \"model_id\": \"mistral-small-2503\",\n \"service_account_json\": \"service-account-json\",\n \"url\": \"https://url:rawPredict\",\n \"streaming_url\": \"https://url:streamRawPredict\"\n }\n}" + "summary": "A completion task for Google Model Garden Meta shared endpoint with single URL provided for both streaming and non-streaming tasks", + "description": "Run `PUT _inference/completion/google_model_garden_meta_completion` to create an inference endpoint to perform a `completion` task using Meta's model hosted on Google Model Garden shared endpoint with single URL provided for both streaming and non-streaming tasks. See the endpoint's `Sample request` page for the variable values used in the URL.", + "value": "{\n \"service\": \"googlevertexai\",\n \"service_settings\": {\n \"provider\": \"meta\",\n \"service_account_json\": \"service-account-json\",\n \"url\": \"https://%LOCATION_ID%-aiplatform.googleapis.com/v1/projects/%PROJECT_ID%/locations/%LOCATION_ID%/endpoints/%ENDPOINT_ID%/chat/completions\"\n }\n}" } } } @@ -67129,11 +67179,11 @@ ] }, "url": { - "description": "The URL for non-streaming `completion` requests to a Google Model Garden provider endpoint.\nIf both `url` and `streaming_url` are provided, each is used for its respective mode.\nIf `streaming_url` is not provided, `url` is also used for streaming `completion` and `chat_completion`.\nIf `provider` is not provided or set to `google` (Google Vertex AI), do not set `url` (or `streaming_url`).\nAt least one of `url` or `streaming_url` must be provided for Google Model Garden endpoint usage.\nCertain providers require separate URLs for streaming and non-streaming operations (e.g., Anthropic, Mistral, AI21). Others support both operation types through a single URL (e.g., Meta, Hugging Face).", + "description": "The URL for non-streaming `completion` requests to a Google Model Garden provider endpoint.\nIf both `url` and `streaming_url` are provided, each is used for its respective mode.\nIf `streaming_url` is not provided, `url` is also used for streaming `completion` and `chat_completion`.\nIf `provider` is not provided or set to `google` (Google Vertex AI), do not set `url` (or `streaming_url`).\nAt least one of `url` or `streaming_url` must be provided for Google Model Garden endpoint usage.\nCertain providers require separate URLs for streaming and non-streaming operations (e.g., Anthropic, Mistral, AI21). Others support both operation types through a single URL (e.g., Meta, Hugging Face).\nInformation on constructing the URL for various providers can be found in the Google Model Garden documentation for the model, or on the endpoint’s `Sample request` page. The request examples also illustrate the proper formatting for the `url`.", "type": "string" }, "streaming_url": { - "description": "The URL for streaming `completion` and `chat_completion` requests to a Google Model Garden provider endpoint.\nIf both `streaming_url` and `url` are provided, each is used for its respective mode.\nIf `url` is not provided, `streaming_url` is also used for non-streaming `completion` requests.\nIf `provider` is not provided or set to `google` (Google Vertex AI), do not set `streaming_url` (or `url`).\nAt least one of `streaming_url` or `url` must be provided for Google Model Garden endpoint usage.\nCertain providers require separate URLs for streaming and non-streaming operations (e.g., Anthropic, Mistral, AI21). Others support both operation types through a single URL (e.g., Meta, Hugging Face).", + "description": "The URL for streaming `completion` and `chat_completion` requests to a Google Model Garden provider endpoint.\nIf both `streaming_url` and `url` are provided, each is used for its respective mode.\nIf `url` is not provided, `streaming_url` is also used for non-streaming `completion` requests.\nIf `provider` is not provided or set to `google` (Google Vertex AI), do not set `streaming_url` (or `url`).\nAt least one of `streaming_url` or `url` must be provided for Google Model Garden endpoint usage.\nCertain providers require separate URLs for streaming and non-streaming operations (e.g., Anthropic, Mistral, AI21). Others support both operation types through a single URL (e.g., Meta, Hugging Face).\nInformation on constructing the URL for various providers can be found in the Google Model Garden documentation for the model, or on the endpoint’s `Sample request` page. The request examples also illustrate the proper formatting for the `streaming_url`.", "type": "string" }, "location": { diff --git a/output/schema/schema.json b/output/schema/schema.json index c5728277c2..43abda361c 100644 --- a/output/schema/schema.json +++ b/output/schema/schema.json @@ -173498,7 +173498,7 @@ "name": "GoogleModelGardenProvider", "namespace": "inference._types" }, - "specLocation": "inference/_types/CommonTypes.ts#L1500-L1507" + "specLocation": "inference/_types/CommonTypes.ts#L1502-L1509" }, { "kind": "interface", @@ -173520,7 +173520,7 @@ } }, { - "description": "The URL for non-streaming `completion` requests to a Google Model Garden provider endpoint.\nIf both `url` and `streaming_url` are provided, each is used for its respective mode.\nIf `streaming_url` is not provided, `url` is also used for streaming `completion` and `chat_completion`.\nIf `provider` is not provided or set to `google` (Google Vertex AI), do not set `url` (or `streaming_url`).\nAt least one of `url` or `streaming_url` must be provided for Google Model Garden endpoint usage.\nCertain providers require separate URLs for streaming and non-streaming operations (e.g., Anthropic, Mistral, AI21). Others support both operation types through a single URL (e.g., Meta, Hugging Face).", + "description": "The URL for non-streaming `completion` requests to a Google Model Garden provider endpoint.\nIf both `url` and `streaming_url` are provided, each is used for its respective mode.\nIf `streaming_url` is not provided, `url` is also used for streaming `completion` and `chat_completion`.\nIf `provider` is not provided or set to `google` (Google Vertex AI), do not set `url` (or `streaming_url`).\nAt least one of `url` or `streaming_url` must be provided for Google Model Garden endpoint usage.\nCertain providers require separate URLs for streaming and non-streaming operations (e.g., Anthropic, Mistral, AI21). Others support both operation types through a single URL (e.g., Meta, Hugging Face).\nInformation on constructing the URL for various providers can be found in the Google Model Garden documentation for the model, or on the endpoint’s `Sample request` page. The request examples also illustrate the proper formatting for the `url`.", "name": "url", "required": false, "type": { @@ -173532,7 +173532,7 @@ } }, { - "description": "The URL for streaming `completion` and `chat_completion` requests to a Google Model Garden provider endpoint.\nIf both `streaming_url` and `url` are provided, each is used for its respective mode.\nIf `url` is not provided, `streaming_url` is also used for non-streaming `completion` requests.\nIf `provider` is not provided or set to `google` (Google Vertex AI), do not set `streaming_url` (or `url`).\nAt least one of `streaming_url` or `url` must be provided for Google Model Garden endpoint usage.\nCertain providers require separate URLs for streaming and non-streaming operations (e.g., Anthropic, Mistral, AI21). Others support both operation types through a single URL (e.g., Meta, Hugging Face).", + "description": "The URL for streaming `completion` and `chat_completion` requests to a Google Model Garden provider endpoint.\nIf both `streaming_url` and `url` are provided, each is used for its respective mode.\nIf `url` is not provided, `streaming_url` is also used for non-streaming `completion` requests.\nIf `provider` is not provided or set to `google` (Google Vertex AI), do not set `streaming_url` (or `url`).\nAt least one of `streaming_url` or `url` must be provided for Google Model Garden endpoint usage.\nCertain providers require separate URLs for streaming and non-streaming operations (e.g., Anthropic, Mistral, AI21). Others support both operation types through a single URL (e.g., Meta, Hugging Face).\nInformation on constructing the URL for various providers can be found in the Google Model Garden documentation for the model, or on the endpoint’s `Sample request` page. The request examples also illustrate the proper formatting for the `streaming_url`.", "name": "streaming_url", "required": false, "type": { @@ -173622,7 +173622,7 @@ } } ], - "specLocation": "inference/_types/CommonTypes.ts#L1433-L1498" + "specLocation": "inference/_types/CommonTypes.ts#L1433-L1500" }, { "kind": "enum", @@ -173635,7 +173635,7 @@ "name": "GoogleVertexAIServiceType", "namespace": "inference._types" }, - "specLocation": "inference/_types/CommonTypes.ts#L1548-L1550" + "specLocation": "inference/_types/CommonTypes.ts#L1550-L1552" }, { "kind": "interface", @@ -173697,7 +173697,7 @@ } } ], - "specLocation": "inference/_types/CommonTypes.ts#L1509-L1532" + "specLocation": "inference/_types/CommonTypes.ts#L1511-L1534" }, { "kind": "enum", @@ -173719,7 +173719,7 @@ "name": "GoogleVertexAITaskType", "namespace": "inference._types" }, - "specLocation": "inference/_types/CommonTypes.ts#L1541-L1546" + "specLocation": "inference/_types/CommonTypes.ts#L1543-L1548" }, { "kind": "interface", @@ -173781,7 +173781,7 @@ } } ], - "specLocation": "inference/_types/CommonTypes.ts#L1552-L1584" + "specLocation": "inference/_types/CommonTypes.ts#L1554-L1586" }, { "kind": "enum", @@ -173794,7 +173794,7 @@ "name": "HuggingFaceServiceType", "namespace": "inference._types" }, - "specLocation": "inference/_types/CommonTypes.ts#L1605-L1607" + "specLocation": "inference/_types/CommonTypes.ts#L1607-L1609" }, { "kind": "interface", @@ -173828,7 +173828,7 @@ } } ], - "specLocation": "inference/_types/CommonTypes.ts#L1586-L1596" + "specLocation": "inference/_types/CommonTypes.ts#L1588-L1598" }, { "kind": "enum", @@ -173850,7 +173850,7 @@ "name": "HuggingFaceTaskType", "namespace": "inference._types" }, - "specLocation": "inference/_types/CommonTypes.ts#L1598-L1603" + "specLocation": "inference/_types/CommonTypes.ts#L1600-L1605" }, { "kind": "interface", @@ -175082,7 +175082,7 @@ } } ], - "specLocation": "inference/_types/CommonTypes.ts#L1609-L1638" + "specLocation": "inference/_types/CommonTypes.ts#L1611-L1640" }, { "kind": "enum", @@ -175095,7 +175095,7 @@ "name": "JinaAIServiceType", "namespace": "inference._types" }, - "specLocation": "inference/_types/CommonTypes.ts#L1668-L1670" + "specLocation": "inference/_types/CommonTypes.ts#L1670-L1672" }, { "kind": "enum", @@ -175114,7 +175114,7 @@ "name": "JinaAISimilarityType", "namespace": "inference._types" }, - "specLocation": "inference/_types/CommonTypes.ts#L1672-L1676" + "specLocation": "inference/_types/CommonTypes.ts#L1674-L1678" }, { "kind": "interface", @@ -175160,7 +175160,7 @@ } } ], - "specLocation": "inference/_types/CommonTypes.ts#L1640-L1661" + "specLocation": "inference/_types/CommonTypes.ts#L1642-L1663" }, { "kind": "enum", @@ -175176,7 +175176,7 @@ "name": "JinaAITaskType", "namespace": "inference._types" }, - "specLocation": "inference/_types/CommonTypes.ts#L1663-L1666" + "specLocation": "inference/_types/CommonTypes.ts#L1665-L1668" }, { "kind": "enum", @@ -175198,7 +175198,7 @@ "name": "JinaAITextEmbeddingTask", "namespace": "inference._types" }, - "specLocation": "inference/_types/CommonTypes.ts#L1678-L1683" + "specLocation": "inference/_types/CommonTypes.ts#L1680-L1685" }, { "kind": "interface", @@ -175270,7 +175270,7 @@ } } ], - "specLocation": "inference/_types/CommonTypes.ts#L1685-L1715" + "specLocation": "inference/_types/CommonTypes.ts#L1687-L1717" }, { "kind": "enum", @@ -175283,7 +175283,7 @@ "name": "LlamaServiceType", "namespace": "inference._types" }, - "specLocation": "inference/_types/CommonTypes.ts#L1723-L1725" + "specLocation": "inference/_types/CommonTypes.ts#L1725-L1727" }, { "kind": "enum", @@ -175302,7 +175302,7 @@ "name": "LlamaSimilarityType", "namespace": "inference._types" }, - "specLocation": "inference/_types/CommonTypes.ts#L1727-L1731" + "specLocation": "inference/_types/CommonTypes.ts#L1729-L1733" }, { "kind": "enum", @@ -175321,7 +175321,7 @@ "name": "LlamaTaskType", "namespace": "inference._types" }, - "specLocation": "inference/_types/CommonTypes.ts#L1717-L1721" + "specLocation": "inference/_types/CommonTypes.ts#L1719-L1723" }, { "kind": "interface", @@ -175479,7 +175479,7 @@ } } ], - "specLocation": "inference/_types/CommonTypes.ts#L1733-L1760" + "specLocation": "inference/_types/CommonTypes.ts#L1735-L1762" }, { "kind": "enum", @@ -175492,7 +175492,7 @@ "name": "MistralServiceType", "namespace": "inference._types" }, - "specLocation": "inference/_types/CommonTypes.ts#L1768-L1770" + "specLocation": "inference/_types/CommonTypes.ts#L1770-L1772" }, { "kind": "enum", @@ -175511,7 +175511,7 @@ "name": "MistralTaskType", "namespace": "inference._types" }, - "specLocation": "inference/_types/CommonTypes.ts#L1762-L1766" + "specLocation": "inference/_types/CommonTypes.ts#L1764-L1768" }, { "kind": "interface", @@ -175598,7 +175598,7 @@ } } ], - "specLocation": "inference/_types/CommonTypes.ts#L1772-L1814" + "specLocation": "inference/_types/CommonTypes.ts#L1774-L1816" }, { "kind": "enum", @@ -175611,7 +175611,7 @@ "name": "OpenAIServiceType", "namespace": "inference._types" }, - "specLocation": "inference/_types/CommonTypes.ts#L1841-L1843" + "specLocation": "inference/_types/CommonTypes.ts#L1843-L1845" }, { "kind": "interface", @@ -175641,7 +175641,7 @@ } } ], - "specLocation": "inference/_types/CommonTypes.ts#L1816-L1833" + "specLocation": "inference/_types/CommonTypes.ts#L1818-L1835" }, { "kind": "enum", @@ -175660,7 +175660,7 @@ "name": "OpenAITaskType", "namespace": "inference._types" }, - "specLocation": "inference/_types/CommonTypes.ts#L1835-L1839" + "specLocation": "inference/_types/CommonTypes.ts#L1837-L1841" }, { "kind": "interface", @@ -176506,7 +176506,7 @@ } } ], - "specLocation": "inference/_types/CommonTypes.ts#L1534-L1539" + "specLocation": "inference/_types/CommonTypes.ts#L1536-L1541" }, { "kind": "interface", @@ -176652,7 +176652,7 @@ } } ], - "specLocation": "inference/_types/CommonTypes.ts#L1845-L1876" + "specLocation": "inference/_types/CommonTypes.ts#L1847-L1878" }, { "kind": "enum", @@ -176665,7 +176665,7 @@ "name": "VoyageAIServiceType", "namespace": "inference._types" }, - "specLocation": "inference/_types/CommonTypes.ts#L1909-L1911" + "specLocation": "inference/_types/CommonTypes.ts#L1911-L1913" }, { "kind": "interface", @@ -176725,7 +176725,7 @@ } } ], - "specLocation": "inference/_types/CommonTypes.ts#L1878-L1902" + "specLocation": "inference/_types/CommonTypes.ts#L1880-L1904" }, { "kind": "enum", @@ -176741,7 +176741,7 @@ "name": "VoyageAITaskType", "namespace": "inference._types" }, - "specLocation": "inference/_types/CommonTypes.ts#L1904-L1907" + "specLocation": "inference/_types/CommonTypes.ts#L1906-L1909" }, { "kind": "interface", @@ -176829,7 +176829,7 @@ } } ], - "specLocation": "inference/_types/CommonTypes.ts#L1913-L1951" + "specLocation": "inference/_types/CommonTypes.ts#L1915-L1953" }, { "kind": "enum", @@ -176842,7 +176842,7 @@ "name": "WatsonxServiceType", "namespace": "inference._types" }, - "specLocation": "inference/_types/CommonTypes.ts#L1959-L1961" + "specLocation": "inference/_types/CommonTypes.ts#L1961-L1963" }, { "kind": "enum", @@ -176861,7 +176861,7 @@ "name": "WatsonxTaskType", "namespace": "inference._types" }, - "specLocation": "inference/_types/CommonTypes.ts#L1953-L1957" + "specLocation": "inference/_types/CommonTypes.ts#L1955-L1959" }, { "kind": "request", @@ -180596,22 +180596,64 @@ "value": "{\n \"service\": \"googlevertexai\",\n \"service_settings\": {\n \"service_account_json\": \"service-account-json\",\n \"model_id\": \"model-id\",\n \"location\": \"location\",\n \"project_id\": \"project-id\"\n }\n}" }, "PutGoogleVertexAiRequestExample10": { - "description": "Run `PUT _inference/chat_completion/google_model_garden_mistral_chat_completion` to create an inference endpoint to perform a `chat_completion` task using Mistral's model hosted on Google Model Garden with single streaming URL provided.", - "method_request": "PUT _inference/chat_completion/google_model_garden_mistral_chat_completion", - "summary": "A chat_completion task for Google Model Garden Mistral endpoint with single streaming URL provided", - "value": "{\n \"service\": \"googlevertexai\",\n \"service_settings\": {\n \"provider\": \"mistral\",\n \"model_id\": \"mistral-small-2503\",\n \"service_account_json\": \"service-account-json\",\n \"streaming_url\": \"https://url:streamRawPredict\"\n }\n}" + "description": "Run `PUT _inference/chat_completion/google_model_garden_meta_chat_completion` to create an inference endpoint to perform a `chat_completion` task using Meta's model hosted on Google Model Garden shared endpoint with single streaming URL provided. See the endpoint's `Sample request` page for the variable values used in the URL.", + "method_request": "PUT _inference/chat_completion/google_model_garden_meta_chat_completion", + "summary": "A chat_completion task for Google Model Garden Meta shared endpoint with single streaming URL provided", + "value": "{\n \"service\": \"googlevertexai\",\n \"service_settings\": {\n \"provider\": \"meta\",\n \"service_account_json\": \"service-account-json\",\n \"streaming_url\": \"https://%LOCATION_ID%-aiplatform.googleapis.com/v1/projects/%PROJECT_ID%/locations/%LOCATION_ID%/endpoints/%ENDPOINT_ID%/chat/completions\"\n }\n}" }, "PutGoogleVertexAiRequestExample11": { - "description": "Run `PUT _inference/completion/google_model_garden_ai21_completion` to create an inference endpoint to perform a `completion` task using AI21's model hosted on Google Model Garden with separate URLs for streaming and non-streaming tasks.", - "method_request": "PUT _inference/completion/google_model_garden_ai21_completion", - "summary": "A completion task for Google Model Garden AI21 endpoint with separate URLs for streaming and non-streaming tasks", - "value": "{\n \"service\": \"googlevertexai\",\n \"service_settings\": {\n \"provider\": \"ai21\",\n \"service_account_json\": \"service-account-json\",\n \"url\": \"https://url:rawPredict\",\n \"streaming_url\": \"https://url:streamRawPredict\"\n }\n}" + "description": "Run `PUT _inference/completion/google_model_garden_hugging_face_completion` to create an inference endpoint to perform a `completion` task using Hugging Face's model hosted on Google Model Garden dedicated endpoint with single URL provided for both streaming and non-streaming tasks. See the endpoint's `Sample request` page for the variable values used in the URL.", + "method_request": "PUT _inference/completion/google_model_garden_hugging_face_completion", + "summary": "A completion task for Google Model Garden Hugging Face dedicated endpoint with single URL provided for both streaming and non-streaming tasks", + "value": "{\n \"service\": \"googlevertexai\",\n \"service_settings\": {\n \"provider\": \"hugging_face\",\n \"service_account_json\": \"service-account-json\",\n \"url\": \"https://%ENDPOINT_ID%.%LOCATION_ID%-%PROJECT_ID%.prediction.vertexai.goog/v1/projects/%PROJECT_ID%/locations/%LOCATION_ID%/endpoints/%ENDPOINT_ID%/chat/completions\"\n }\n}" }, "PutGoogleVertexAiRequestExample12": { - "description": "Run `PUT _inference/chat_completion/google_model_garden_ai21_chat_completion` to create an inference endpoint to perform a `chat_completion` task using AI21's model hosted on Google Model Garden with single streaming URL provided.", - "method_request": "PUT _inference/chat_completion/google_model_garden_ai21_chat_completion", - "summary": "A chat_completion task for Google Model Garden AI21 endpoint with single streaming URL provided", - "value": "{\n \"service\": \"googlevertexai\",\n \"service_settings\": {\n \"provider\": \"ai21\",\n \"service_account_json\": \"service-account-json\",\n \"streaming_url\": \"https://url:streamRawPredict\"\n }\n}" + "description": "Run `PUT _inference/chat_completion/google_model_garden_hugging_face_chat_completion` to create an inference endpoint to perform a `chat_completion` task using Hugging Face's model hosted on Google Model Garden dedicated endpoint with single streaming URL provided. See the endpoint's `Sample request` page for the variable values used in the URL.", + "method_request": "PUT _inference/chat_completion/google_model_garden_hugging_face_chat_completion", + "summary": "A chat_completion task for Google Model Garden Hugging Face dedicated endpoint with single streaming URL provided", + "value": "{\n \"service\": \"googlevertexai\",\n \"service_settings\": {\n \"provider\": \"hugging_face\",\n \"service_account_json\": \"service-account-json\",\n \"streaming_url\": \"https://%ENDPOINT_ID%.%LOCATION_ID%-%PROJECT_ID%.prediction.vertexai.goog/v1/projects/%PROJECT_ID%/locations/%LOCATION_ID%/endpoints/%ENDPOINT_ID%/chat/completions\"\n }\n}" + }, + "PutGoogleVertexAiRequestExample13": { + "description": "Run `PUT _inference/completion/google_model_garden_hugging_face_completion` to create an inference endpoint to perform a `completion` task using Hugging Face's model hosted on Google Model Garden shared endpoint with single URL provided for both streaming and non-streaming tasks. See the endpoint's `Sample request` page for the variable values used in the URL.", + "method_request": "PUT _inference/completion/google_model_garden_hugging_face_completion", + "summary": "A completion task for Google Model Garden Hugging Face shared endpoint with single URL provided for both streaming and non-streaming tasks", + "value": "{\n \"service\": \"googlevertexai\",\n \"service_settings\": {\n \"provider\": \"hugging_face\",\n \"service_account_json\": \"service-account-json\",\n \"url\": \"https://%LOCATION_ID%-aiplatform.googleapis.com/v1/projects/%PROJECT_ID%/locations/%LOCATION_ID%/endpoints/%ENDPOINT_ID%/chat/completions\"\n }\n}" + }, + "PutGoogleVertexAiRequestExample14": { + "description": "Run `PUT _inference/chat_completion/google_model_garden_hugging_face_chat_completion` to create an inference endpoint to perform a `chat_completion` task using Hugging Face's model hosted on Google Model Garden shared endpoint with single streaming URL provided. See the endpoint's `Sample request` page for the variable values used in the URL.", + "method_request": "PUT _inference/chat_completion/google_model_garden_hugging_face_chat_completion", + "summary": "A chat_completion task for Google Model Garden Hugging Face shared endpoint with single streaming URL provided", + "value": "{\n \"service\": \"googlevertexai\",\n \"service_settings\": {\n \"provider\": \"hugging_face\",\n \"service_account_json\": \"service-account-json\",\n \"streaming_url\": \"https://%LOCATION_ID%-aiplatform.googleapis.com/v1/projects/%PROJECT_ID%/locations/%LOCATION_ID%/endpoints/%ENDPOINT_ID%/chat/completions\"\n }\n}" + }, + "PutGoogleVertexAiRequestExample15": { + "description": "Run `PUT _inference/completion/google_model_garden_mistral_completion` to create an inference endpoint to perform a `completion` task using Mistral's serverless model hosted on Google Model Garden with separate URLs for streaming and non-streaming tasks. See the Mistral model documentation for instructions on how to construct URLs.", + "method_request": "PUT _inference/completion/google_model_garden_mistral_completion", + "summary": "A completion task for Google Model Garden Mistral serverless endpoint with separate URLs for streaming and non-streaming tasks", + "value": "{\n \"service\": \"googlevertexai\",\n \"service_settings\": {\n \"provider\": \"mistral\",\n \"model_id\": \"mistral-small-2503\",\n \"service_account_json\": \"service-account-json\",\n \"url\": \"https://%LOCATION_ID%-aiplatform.googleapis.com/v1/projects/%PROJECT_ID%/locations/%LOCATION_ID%/publishers/mistralai/models/%MODEL_ID%:rawPredict\",\n \"streaming_url\": \"https://%LOCATION_ID%-aiplatform.googleapis.com/v1/projects/%PROJECT_ID%/locations/%LOCATION_ID%/publishers/mistralai/models/%MODEL_ID%:streamRawPredict\"\n }\n}" + }, + "PutGoogleVertexAiRequestExample16": { + "description": "Run `PUT _inference/chat_completion/google_model_garden_mistral_chat_completion` to create an inference endpoint to perform a `chat_completion` task using Mistral's serverless model hosted on Google Model Garden with single streaming URL provided. See the Mistral model documentation for instructions on how to construct the URL.", + "method_request": "PUT _inference/chat_completion/google_model_garden_mistral_chat_completion", + "summary": "A chat_completion task for Google Model Garden Mistral serverless endpoint with single streaming URL provided", + "value": "{\n \"service\": \"googlevertexai\",\n \"service_settings\": {\n \"provider\": \"mistral\",\n \"model_id\": \"mistral-small-2503\",\n \"service_account_json\": \"service-account-json\",\n \"streaming_url\": \"https://%LOCATION_ID%-aiplatform.googleapis.com/v1/projects/%PROJECT_ID%/locations/%LOCATION_ID%/publishers/mistralai/models/%MODEL_ID%:streamRawPredict\"\n }\n}" + }, + "PutGoogleVertexAiRequestExample17": { + "description": "Run `PUT _inference/completion/google_model_garden_mistral_completion` to create an inference endpoint to perform a `completion` task using Mistral's model hosted on Google Model Garden dedicated endpoint with single URL provided for both streaming and non-streaming tasks. See the endpoint's `Sample request` page for the variable values used in the URL.", + "method_request": "PUT _inference/completion/google_model_garden_mistral_completion", + "summary": "A completion task for Google Model Garden Mistral dedicated endpoint with single URL provided for both streaming and non-streaming tasks", + "value": "{\n \"service\": \"googlevertexai\",\n \"service_settings\": {\n \"provider\": \"mistral\",\n \"service_account_json\": \"service-account-json\",\n \"url\": \"https://%ENDPOINT_ID%.%LOCATION_ID%-%PROJECT_ID%.prediction.vertexai.goog/v1/projects/%PROJECT_ID%/locations/%LOCATION_ID%/endpoints/%ENDPOINT_ID%/chat/completions\"\n }\n}" + }, + "PutGoogleVertexAiRequestExample18": { + "description": "Run `PUT _inference/chat_completion/google_model_garden_mistral_chat_completion` to create an inference endpoint to perform a `chat_completion` task using Mistral's model hosted on Google Model Garden dedicated endpoint with single streaming URL provided. See the endpoint's `Sample request` page for the variable values used in the URL.", + "method_request": "PUT _inference/chat_completion/google_model_garden_mistral_chat_completion", + "summary": "A chat_completion task for Google Model Garden Mistral dedicated endpoint with single streaming URL provided", + "value": "{\n \"service\": \"googlevertexai\",\n \"service_settings\": {\n \"provider\": \"mistral\",\n \"service_account_json\": \"service-account-json\",\n \"streaming_url\": \"https://%ENDPOINT_ID%.%LOCATION_ID%-%PROJECT_ID%.prediction.vertexai.goog/v1/projects/%PROJECT_ID%/locations/%LOCATION_ID%/endpoints/%ENDPOINT_ID%/chat/completions\"\n }\n}" + }, + "PutGoogleVertexAiRequestExample19": { + "description": "Run `PUT _inference/completion/google_model_garden_mistral_completion` to create an inference endpoint to perform a `completion` task using Mistral's model hosted on Google Model Garden shared endpoint with single URL provided for both streaming and non-streaming tasks. See the endpoint's `Sample request` page for the variable values used in the URL.", + "method_request": "PUT _inference/completion/google_model_garden_mistral_completion", + "summary": "A completion task for Google Model Garden Mistral shared endpoint with single URL provided for both streaming and non-streaming tasks", + "value": "{\n \"service\": \"googlevertexai\",\n \"service_settings\": {\n \"provider\": \"mistral\",\n \"service_account_json\": \"service-account-json\",\n \"url\": \"https://%LOCATION_ID%-aiplatform.googleapis.com/v1/projects/%PROJECT_ID%/locations/%LOCATION_ID%/endpoints/%ENDPOINT_ID%/chat/completions\"\n }\n}" }, "PutGoogleVertexAiRequestExample2": { "alternatives": [ @@ -180641,47 +180683,65 @@ "summary": "A rerank task", "value": "{\n \"service\": \"googlevertexai\",\n \"service_settings\": {\n \"service_account_json\": \"service-account-json\",\n \"project_id\": \"project-id\"\n }\n}" }, + "PutGoogleVertexAiRequestExample20": { + "description": "Run `PUT _inference/chat_completion/google_model_garden_mistral_chat_completion` to create an inference endpoint to perform a `chat_completion` task using Mistral's model hosted on Google Model Garden shared endpoint with single streaming URL provided. See the endpoint's `Sample request` page for the variable values used in the URL.", + "method_request": "PUT _inference/chat_completion/google_model_garden_mistral_chat_completion", + "summary": "A chat_completion task for Google Model Garden Mistral shared endpoint with single streaming URL provided", + "value": "{\n \"service\": \"googlevertexai\",\n \"service_settings\": {\n \"provider\": \"mistral\",\n \"service_account_json\": \"service-account-json\",\n \"streaming_url\": \"https://%LOCATION_ID%-aiplatform.googleapis.com/v1/projects/%PROJECT_ID%/locations/%LOCATION_ID%/endpoints/%ENDPOINT_ID%/chat/completions\"\n }\n}" + }, + "PutGoogleVertexAiRequestExample21": { + "description": "Run `PUT _inference/completion/google_model_garden_ai21_completion` to create an inference endpoint to perform a `completion` task using AI21's model hosted on Google Model Garden serverless endpoint with separate URLs for streaming and non-streaming tasks. See the AI21 model documentation for instructions on how to construct URLs.", + "method_request": "PUT _inference/completion/google_model_garden_ai21_completion", + "summary": "A completion task for Google Model Garden AI21 serverless endpoint with separate URLs for streaming and non-streaming tasks", + "value": "{\n \"service\": \"googlevertexai\",\n \"service_settings\": {\n \"provider\": \"ai21\",\n \"service_account_json\": \"service-account-json\",\n \"url\": \"https://%LOCATION_ID%-aiplatform.googleapis.com/v1/projects/%PROJECT_ID%/locations/%LOCATION_ID%/publishers/ai21/models/%MODEL_ID%:rawPredict\",\n \"streaming_url\": \"https://%LOCATION_ID%-aiplatform.googleapis.com/v1/projects/%PROJECT_ID%/locations/%LOCATION_ID%/publishers/ai21/models/%MODEL_ID%:streamRawPredict\"\n }\n}" + }, + "PutGoogleVertexAiRequestExample22": { + "description": "Run `PUT _inference/chat_completion/google_model_garden_ai21_chat_completion` to create an inference endpoint to perform a `chat_completion` task using AI21's model hosted on Google Model Garden serverless endpoint with single streaming URL provided. See the AI21 model documentation for instructions on how to construct URLs.", + "method_request": "PUT _inference/chat_completion/google_model_garden_ai21_chat_completion", + "summary": "A chat_completion task for Google Model Garden AI21 serverless endpoint with single streaming URL provided", + "value": "{\n \"service\": \"googlevertexai\",\n \"service_settings\": {\n \"provider\": \"ai21\",\n \"service_account_json\": \"service-account-json\",\n \"streaming_url\": \"https://%LOCATION_ID%-aiplatform.googleapis.com/v1/projects/%PROJECT_ID%/locations/%LOCATION_ID%/publishers/ai21/models/%MODEL_ID%:streamRawPredict\"\n }\n}" + }, "PutGoogleVertexAiRequestExample3": { - "description": "Run `PUT _inference/completion/google_model_garden_anthropic_completion` to create an inference endpoint to perform a `completion` task using Anthropic's model hosted on Google Model Garden with separate URLs for streaming and non-streaming tasks.", + "description": "Run `PUT _inference/completion/google_model_garden_anthropic_completion` to create an inference endpoint to perform a `completion` task using Anthropic's serverless model hosted on Google Model Garden with separate URLs for streaming and non-streaming tasks. See the Anthropic model documentation for instructions on how to construct URLs.", "method_request": "PUT _inference/completion/google_model_garden_anthropic_completion", - "summary": "A completion task for Google Model Garden Anthropic endpoint with separate URLs for streaming and non-streaming tasks", - "value": "{\n \"service\": \"googlevertexai\",\n \"service_settings\": {\n \"provider\": \"anthropic\",\n \"service_account_json\": \"service-account-json\",\n \"url\": \"https://url:rawPredict\",\n \"streaming_url\": \"https://streaming_url:streamRawPredict\"\n },\n \"task_settings\": {\n \"max_tokens\": 128\n }\n}" + "summary": "A completion task for Google Model Garden Anthropic serverless endpoint with separate URLs for streaming and non-streaming tasks", + "value": "{\n \"service\": \"googlevertexai\",\n \"service_settings\": {\n \"provider\": \"anthropic\",\n \"service_account_json\": \"service-account-json\",\n \"url\": \"https://%LOCATION_ID%-aiplatform.googleapis.com/v1/projects/%PROJECT_ID%/locations/%LOCATION_ID%/publishers/anthropic/models/%MODEL_ID%:rawPredict\",\n \"streaming_url\": \"https://%LOCATION_ID%-aiplatform.googleapis.com/v1/projects/%PROJECT_ID%/locations/%LOCATION_ID%/publishers/anthropic/models/%MODEL_ID%:streamRawPredict\"\n },\n \"task_settings\": {\n \"max_tokens\": 128\n }\n}" }, "PutGoogleVertexAiRequestExample4": { - "description": "Run `PUT _inference/chat_completion/google_model_garden_anthropic_chat_completion` to create an inference endpoint to perform a `chat_completion` task using Anthropic's model hosted on Google Model Garden with single streaming URL provided.", + "description": "Run `PUT _inference/chat_completion/google_model_garden_anthropic_chat_completion` to create an inference endpoint to perform a `chat_completion` task using Anthropic's serverless model hosted on Google Model Garden with single streaming URL provided. See the Anthropic model documentation for instructions on how to construct the URL.", "method_request": "PUT _inference/chat_completion/google_model_garden_anthropic_chat_completion", - "summary": "A chat_completion task for Google Model Garden Anthropic endpoint with single streaming URL provided", - "value": "{\n \"service\": \"googlevertexai\",\n \"service_settings\": {\n \"provider\": \"anthropic\",\n \"service_account_json\": \"service-account-json\",\n \"streaming_url\": \"https://streaming_url:streamRawPredict\"\n },\n \"task_settings\": {\n \"max_tokens\": 128\n }\n}" + "summary": "A chat_completion task for Google Model Garden Anthropic serverless endpoint with single streaming URL provided", + "value": "{\n \"service\": \"googlevertexai\",\n \"service_settings\": {\n \"provider\": \"anthropic\",\n \"service_account_json\": \"service-account-json\",\n \"streaming_url\": \"https://%LOCATION_ID%-aiplatform.googleapis.com/v1/projects/%PROJECT_ID%/locations/%LOCATION_ID%/publishers/anthropic/models/%MODEL_ID%:streamRawPredict\"\n },\n \"task_settings\": {\n \"max_tokens\": 128\n }\n}" }, "PutGoogleVertexAiRequestExample5": { - "description": "Run `PUT _inference/completion/google_model_garden_meta_completion` to create an inference endpoint to perform a `completion` task using Meta's model hosted on Google Model Garden with single URL provided for both streaming and non-streaming tasks.", + "description": "Run `PUT _inference/completion/google_model_garden_meta_completion` to create an inference endpoint to perform a `completion` task using Meta's serverless model hosted on Google Model Garden with single URL provided for both streaming and non-streaming tasks. See the Meta model documentation for instructions on how to construct the URL.", "method_request": "PUT _inference/completion/google_model_garden_meta_completion", - "summary": "A completion task for Google Model Garden Meta endpoint with single URL provided for both streaming and non-streaming tasks", - "value": "{\n \"service\": \"googlevertexai\",\n \"service_settings\": {\n \"provider\": \"meta\",\n \"model_id\": \"meta/llama-3.3-70b-instruct-maas\",\n \"service_account_json\": \"service-account-json\",\n \"url\": \"https://url/openapi/chat/completions\"\n }\n}" + "summary": "A completion task for Google Model Garden Meta serverless endpoint with single URL provided for both streaming and non-streaming tasks", + "value": "{\n \"service\": \"googlevertexai\",\n \"service_settings\": {\n \"provider\": \"meta\",\n \"model_id\": \"meta/llama-3.3-70b-instruct-maas\",\n \"service_account_json\": \"service-account-json\",\n \"url\": \"https://%LOCATION_ID%-aiplatform.googleapis.com/v1/projects/%PROJECT_ID%/locations/%LOCATION_ID%/endpoints/openapi/chat/completions\"\n }\n}" }, "PutGoogleVertexAiRequestExample6": { - "description": "Run `PUT _inference/chat_completion/google_model_garden_meta_chat_completion` to create an inference endpoint to perform a `chat_completion` task using Meta's model hosted on Google Model Garden with single streaming URL provided.", + "description": "Run `PUT _inference/chat_completion/google_model_garden_meta_chat_completion` to create an inference endpoint to perform a `chat_completion` task using Meta's serverless model hosted on Google Model Garden with single streaming URL provided. See the Meta model documentation for instructions on how to construct the URL.", "method_request": "PUT _inference/chat_completion/google_model_garden_meta_chat_completion", - "summary": "A chat_completion task for Google Model Garden Meta endpoint with single streaming URL provided", - "value": "{\n \"service\": \"googlevertexai\",\n \"service_settings\": {\n \"provider\": \"meta\",\n \"model_id\": \"meta/llama-3.3-70b-instruct-maas\",\n \"service_account_json\": \"service-account-json\",\n \"streaming_url\": \"https://url/openapi/chat/completions\"\n }\n}" + "summary": "A chat_completion task for Google Model Garden Meta serverless endpoint with single streaming URL provided", + "value": "{\n \"service\": \"googlevertexai\",\n \"service_settings\": {\n \"provider\": \"meta\",\n \"model_id\": \"meta/llama-3.3-70b-instruct-maas\",\n \"service_account_json\": \"service-account-json\",\n \"streaming_url\": \"https://%LOCATION_ID%-aiplatform.googleapis.com/v1/projects/%PROJECT_ID%/locations/%LOCATION_ID%/endpoints/openapi/chat/completions\"\n }\n}" }, "PutGoogleVertexAiRequestExample7": { - "description": "Run `PUT _inference/completion/google_model_garden_hugging_face_completion` to create an inference endpoint to perform a `completion` task using Hugging Face's model hosted on Google Model Garden with single URL provided for both streaming and non-streaming tasks.", - "method_request": "PUT _inference/completion/google_model_garden_hugging_face_completion", - "summary": "A completion task for Google Model Garden Hugging Face endpoint with single URL provided for both streaming and non-streaming tasks", - "value": "{\n \"service\": \"googlevertexai\",\n \"service_settings\": {\n \"provider\": \"hugging_face\",\n \"service_account_json\": \"service-account-json\",\n \"url\": \"https://url/chat/completions\"\n }\n}" + "description": "Run `PUT _inference/completion/google_model_garden_meta_completion` to create an inference endpoint to perform a `completion` task using Meta's model hosted on Google Model Garden dedicated endpoint with single URL provided for both streaming and non-streaming tasks. See the endpoint's `Sample request` page for the variable values used in the URL.", + "method_request": "PUT _inference/completion/google_model_garden_meta_completion", + "summary": "A completion task for Google Model Garden Meta dedicated endpoint with single URL provided for both streaming and non-streaming tasks", + "value": "{\n \"service\": \"googlevertexai\",\n \"service_settings\": {\n \"provider\": \"meta\",\n \"service_account_json\": \"service-account-json\",\n \"url\": \"https://%ENDPOINT_ID%.%LOCATION_ID%-fasttryout.prediction.vertexai.goog/v1/projects/%PROJECT_ID%/locations/%LOCATION_ID%/endpoints/%ENDPOINT_ID%/chat/completions\"\n }\n}" }, "PutGoogleVertexAiRequestExample8": { - "description": "Run `PUT _inference/chat_completion/google_model_garden_hugging_face_chat_completion` to create an inference endpoint to perform a `chat_completion` task using Hugging Face's model hosted on Google Model Garden with single streaming URL provided.", - "method_request": "PUT _inference/chat_completion/google_model_garden_hugging_face_chat_completion", - "summary": "A chat_completion task for Google Model Garden Hugging Face endpoint with single streaming URL provided", - "value": "{\n \"service\": \"googlevertexai\",\n \"service_settings\": {\n \"provider\": \"hugging_face\",\n \"service_account_json\": \"service-account-json\",\n \"streaming_url\": \"https://url/chat/completions\"\n }\n}" + "description": "Run `PUT _inference/chat_completion/google_model_garden_meta_chat_completion` to create an inference endpoint to perform a `chat_completion` task using Meta's model hosted on Google Model Garden dedicated endpoint with single streaming URL provided. See the endpoint's `Sample request` page for the variable values used in the URL.", + "method_request": "PUT _inference/chat_completion/google_model_garden_meta_chat_completion", + "summary": "A chat_completion task for Google Model Garden Meta dedicated endpoint with single streaming URL provided", + "value": "{\n \"service\": \"googlevertexai\",\n \"service_settings\": {\n \"provider\": \"meta\",\n \"service_account_json\": \"service-account-json\",\n \"streaming_url\": \"https://%ENDPOINT_ID%.%LOCATION_ID%-fasttryout.prediction.vertexai.goog/v1/projects/%PROJECT_ID%/locations/%LOCATION_ID%/endpoints/%ENDPOINT_ID%/chat/completions\"\n }\n}" }, "PutGoogleVertexAiRequestExample9": { - "description": "Run `PUT _inference/completion/google_model_garden_mistral_completion` to create an inference endpoint to perform a `completion` task using Mistral's model hosted on Google Model Garden with separate URLs for streaming and non-streaming tasks.", - "method_request": "PUT _inference/completion/google_model_garden_mistral_completion", - "summary": "A completion task for Google Model Garden Mistral endpoint with separate URLs for streaming and non-streaming tasks", - "value": "{\n \"service\": \"googlevertexai\",\n \"service_settings\": {\n \"provider\": \"mistral\",\n \"model_id\": \"mistral-small-2503\",\n \"service_account_json\": \"service-account-json\",\n \"url\": \"https://url:rawPredict\",\n \"streaming_url\": \"https://url:streamRawPredict\"\n }\n}" + "description": "Run `PUT _inference/completion/google_model_garden_meta_completion` to create an inference endpoint to perform a `completion` task using Meta's model hosted on Google Model Garden shared endpoint with single URL provided for both streaming and non-streaming tasks. See the endpoint's `Sample request` page for the variable values used in the URL.", + "method_request": "PUT _inference/completion/google_model_garden_meta_completion", + "summary": "A completion task for Google Model Garden Meta shared endpoint with single URL provided for both streaming and non-streaming tasks", + "value": "{\n \"service\": \"googlevertexai\",\n \"service_settings\": {\n \"provider\": \"meta\",\n \"service_account_json\": \"service-account-json\",\n \"url\": \"https://%LOCATION_ID%-aiplatform.googleapis.com/v1/projects/%PROJECT_ID%/locations/%LOCATION_ID%/endpoints/%ENDPOINT_ID%/chat/completions\"\n }\n}" } }, "inherits": { diff --git a/specification/inference/_types/CommonTypes.ts b/specification/inference/_types/CommonTypes.ts index d404b62dc0..6dc52f2cef 100644 --- a/specification/inference/_types/CommonTypes.ts +++ b/specification/inference/_types/CommonTypes.ts @@ -1446,6 +1446,7 @@ export class GoogleVertexAIServiceSettings { * If `provider` is not provided or set to `google` (Google Vertex AI), do not set `url` (or `streaming_url`). * At least one of `url` or `streaming_url` must be provided for Google Model Garden endpoint usage. * Certain providers require separate URLs for streaming and non-streaming operations (e.g., Anthropic, Mistral, AI21). Others support both operation types through a single URL (e.g., Meta, Hugging Face). + * Information on constructing the URL for various providers can be found in the Google Model Garden documentation for the model, or on the endpoint’s `Sample request` page. The request examples also illustrate the proper formatting for the `url`. */ url?: string /** @@ -1455,6 +1456,7 @@ export class GoogleVertexAIServiceSettings { * If `provider` is not provided or set to `google` (Google Vertex AI), do not set `streaming_url` (or `url`). * At least one of `streaming_url` or `url` must be provided for Google Model Garden endpoint usage. * Certain providers require separate URLs for streaming and non-streaming operations (e.g., Anthropic, Mistral, AI21). Others support both operation types through a single URL (e.g., Meta, Hugging Face). + * Information on constructing the URL for various providers can be found in the Google Model Garden documentation for the model, or on the endpoint’s `Sample request` page. The request examples also illustrate the proper formatting for the `streaming_url`. */ streaming_url?: string /** diff --git a/specification/inference/put_googlevertexai/examples/request/PutGoogleVertexAiRequestExample10.yaml b/specification/inference/put_googlevertexai/examples/request/PutGoogleVertexAiRequestExample10.yaml index ad2ed0183a..cd0e0cccea 100644 --- a/specification/inference/put_googlevertexai/examples/request/PutGoogleVertexAiRequestExample10.yaml +++ b/specification/inference/put_googlevertexai/examples/request/PutGoogleVertexAiRequestExample10.yaml @@ -1,14 +1,13 @@ -summary: A chat_completion task for Google Model Garden Mistral endpoint with single streaming URL provided -description: Run `PUT _inference/chat_completion/google_model_garden_mistral_chat_completion` to create an inference endpoint to perform a `chat_completion` task using Mistral's model hosted on Google Model Garden with single streaming URL provided. -method_request: 'PUT _inference/chat_completion/google_model_garden_mistral_chat_completion' +summary: A chat_completion task for Google Model Garden Meta shared endpoint with single streaming URL provided +description: Run `PUT _inference/chat_completion/google_model_garden_meta_chat_completion` to create an inference endpoint to perform a `chat_completion` task using Meta's model hosted on Google Model Garden shared endpoint with single streaming URL provided. See the endpoint's `Sample request` page for the variable values used in the URL. +method_request: 'PUT _inference/chat_completion/google_model_garden_meta_chat_completion' # type: "request" value: |- { "service": "googlevertexai", "service_settings": { - "provider": "mistral", - "model_id": "mistral-small-2503", + "provider": "meta", "service_account_json": "service-account-json", - "streaming_url": "https://url:streamRawPredict" + "streaming_url": "https://%LOCATION_ID%-aiplatform.googleapis.com/v1/projects/%PROJECT_ID%/locations/%LOCATION_ID%/endpoints/%ENDPOINT_ID%/chat/completions" } } diff --git a/specification/inference/put_googlevertexai/examples/request/PutGoogleVertexAiRequestExample11.yaml b/specification/inference/put_googlevertexai/examples/request/PutGoogleVertexAiRequestExample11.yaml index 00a076c727..9faa3c3b5d 100644 --- a/specification/inference/put_googlevertexai/examples/request/PutGoogleVertexAiRequestExample11.yaml +++ b/specification/inference/put_googlevertexai/examples/request/PutGoogleVertexAiRequestExample11.yaml @@ -1,14 +1,13 @@ -summary: A completion task for Google Model Garden AI21 endpoint with separate URLs for streaming and non-streaming tasks -description: Run `PUT _inference/completion/google_model_garden_ai21_completion` to create an inference endpoint to perform a `completion` task using AI21's model hosted on Google Model Garden with separate URLs for streaming and non-streaming tasks. -method_request: 'PUT _inference/completion/google_model_garden_ai21_completion' +summary: A completion task for Google Model Garden Hugging Face dedicated endpoint with single URL provided for both streaming and non-streaming tasks +description: Run `PUT _inference/completion/google_model_garden_hugging_face_completion` to create an inference endpoint to perform a `completion` task using Hugging Face's model hosted on Google Model Garden dedicated endpoint with single URL provided for both streaming and non-streaming tasks. See the endpoint's `Sample request` page for the variable values used in the URL. +method_request: 'PUT _inference/completion/google_model_garden_hugging_face_completion' # type: "request" value: |- { "service": "googlevertexai", "service_settings": { - "provider": "ai21", + "provider": "hugging_face", "service_account_json": "service-account-json", - "url": "https://url:rawPredict", - "streaming_url": "https://url:streamRawPredict" + "url": "https://%ENDPOINT_ID%.%LOCATION_ID%-%PROJECT_ID%.prediction.vertexai.goog/v1/projects/%PROJECT_ID%/locations/%LOCATION_ID%/endpoints/%ENDPOINT_ID%/chat/completions" } } diff --git a/specification/inference/put_googlevertexai/examples/request/PutGoogleVertexAiRequestExample12.yaml b/specification/inference/put_googlevertexai/examples/request/PutGoogleVertexAiRequestExample12.yaml index ceadc43079..f766cb7abb 100644 --- a/specification/inference/put_googlevertexai/examples/request/PutGoogleVertexAiRequestExample12.yaml +++ b/specification/inference/put_googlevertexai/examples/request/PutGoogleVertexAiRequestExample12.yaml @@ -1,13 +1,13 @@ -summary: A chat_completion task for Google Model Garden AI21 endpoint with single streaming URL provided -description: Run `PUT _inference/chat_completion/google_model_garden_ai21_chat_completion` to create an inference endpoint to perform a `chat_completion` task using AI21's model hosted on Google Model Garden with single streaming URL provided. -method_request: 'PUT _inference/chat_completion/google_model_garden_ai21_chat_completion' +summary: A chat_completion task for Google Model Garden Hugging Face dedicated endpoint with single streaming URL provided +description: Run `PUT _inference/chat_completion/google_model_garden_hugging_face_chat_completion` to create an inference endpoint to perform a `chat_completion` task using Hugging Face's model hosted on Google Model Garden dedicated endpoint with single streaming URL provided. See the endpoint's `Sample request` page for the variable values used in the URL. +method_request: 'PUT _inference/chat_completion/google_model_garden_hugging_face_chat_completion' # type: "request" value: |- { "service": "googlevertexai", "service_settings": { - "provider": "ai21", + "provider": "hugging_face", "service_account_json": "service-account-json", - "streaming_url": "https://url:streamRawPredict" + "streaming_url": "https://%ENDPOINT_ID%.%LOCATION_ID%-%PROJECT_ID%.prediction.vertexai.goog/v1/projects/%PROJECT_ID%/locations/%LOCATION_ID%/endpoints/%ENDPOINT_ID%/chat/completions" } } diff --git a/specification/inference/put_googlevertexai/examples/request/PutGoogleVertexAiRequestExample13.yaml b/specification/inference/put_googlevertexai/examples/request/PutGoogleVertexAiRequestExample13.yaml new file mode 100644 index 0000000000..e6e690f9bd --- /dev/null +++ b/specification/inference/put_googlevertexai/examples/request/PutGoogleVertexAiRequestExample13.yaml @@ -0,0 +1,13 @@ +summary: A completion task for Google Model Garden Hugging Face shared endpoint with single URL provided for both streaming and non-streaming tasks +description: Run `PUT _inference/completion/google_model_garden_hugging_face_completion` to create an inference endpoint to perform a `completion` task using Hugging Face's model hosted on Google Model Garden shared endpoint with single URL provided for both streaming and non-streaming tasks. See the endpoint's `Sample request` page for the variable values used in the URL. +method_request: 'PUT _inference/completion/google_model_garden_hugging_face_completion' +# type: "request" +value: |- + { + "service": "googlevertexai", + "service_settings": { + "provider": "hugging_face", + "service_account_json": "service-account-json", + "url": "https://%LOCATION_ID%-aiplatform.googleapis.com/v1/projects/%PROJECT_ID%/locations/%LOCATION_ID%/endpoints/%ENDPOINT_ID%/chat/completions" + } + } diff --git a/specification/inference/put_googlevertexai/examples/request/PutGoogleVertexAiRequestExample14.yaml b/specification/inference/put_googlevertexai/examples/request/PutGoogleVertexAiRequestExample14.yaml new file mode 100644 index 0000000000..19de6d9efb --- /dev/null +++ b/specification/inference/put_googlevertexai/examples/request/PutGoogleVertexAiRequestExample14.yaml @@ -0,0 +1,13 @@ +summary: A chat_completion task for Google Model Garden Hugging Face shared endpoint with single streaming URL provided +description: Run `PUT _inference/chat_completion/google_model_garden_hugging_face_chat_completion` to create an inference endpoint to perform a `chat_completion` task using Hugging Face's model hosted on Google Model Garden shared endpoint with single streaming URL provided. See the endpoint's `Sample request` page for the variable values used in the URL. +method_request: 'PUT _inference/chat_completion/google_model_garden_hugging_face_chat_completion' +# type: "request" +value: |- + { + "service": "googlevertexai", + "service_settings": { + "provider": "hugging_face", + "service_account_json": "service-account-json", + "streaming_url": "https://%LOCATION_ID%-aiplatform.googleapis.com/v1/projects/%PROJECT_ID%/locations/%LOCATION_ID%/endpoints/%ENDPOINT_ID%/chat/completions" + } + } diff --git a/specification/inference/put_googlevertexai/examples/request/PutGoogleVertexAiRequestExample15.yaml b/specification/inference/put_googlevertexai/examples/request/PutGoogleVertexAiRequestExample15.yaml new file mode 100644 index 0000000000..b49fd214db --- /dev/null +++ b/specification/inference/put_googlevertexai/examples/request/PutGoogleVertexAiRequestExample15.yaml @@ -0,0 +1,15 @@ +summary: A completion task for Google Model Garden Mistral serverless endpoint with separate URLs for streaming and non-streaming tasks +description: Run `PUT _inference/completion/google_model_garden_mistral_completion` to create an inference endpoint to perform a `completion` task using Mistral's serverless model hosted on Google Model Garden with separate URLs for streaming and non-streaming tasks. See the Mistral model documentation for instructions on how to construct URLs. +method_request: 'PUT _inference/completion/google_model_garden_mistral_completion' +# type: "request" +value: |- + { + "service": "googlevertexai", + "service_settings": { + "provider": "mistral", + "model_id": "mistral-small-2503", + "service_account_json": "service-account-json", + "url": "https://%LOCATION_ID%-aiplatform.googleapis.com/v1/projects/%PROJECT_ID%/locations/%LOCATION_ID%/publishers/mistralai/models/%MODEL_ID%:rawPredict", + "streaming_url": "https://%LOCATION_ID%-aiplatform.googleapis.com/v1/projects/%PROJECT_ID%/locations/%LOCATION_ID%/publishers/mistralai/models/%MODEL_ID%:streamRawPredict" + } + } diff --git a/specification/inference/put_googlevertexai/examples/request/PutGoogleVertexAiRequestExample16.yaml b/specification/inference/put_googlevertexai/examples/request/PutGoogleVertexAiRequestExample16.yaml new file mode 100644 index 0000000000..fcaba9ff26 --- /dev/null +++ b/specification/inference/put_googlevertexai/examples/request/PutGoogleVertexAiRequestExample16.yaml @@ -0,0 +1,14 @@ +summary: A chat_completion task for Google Model Garden Mistral serverless endpoint with single streaming URL provided +description: Run `PUT _inference/chat_completion/google_model_garden_mistral_chat_completion` to create an inference endpoint to perform a `chat_completion` task using Mistral's serverless model hosted on Google Model Garden with single streaming URL provided. See the Mistral model documentation for instructions on how to construct the URL. +method_request: 'PUT _inference/chat_completion/google_model_garden_mistral_chat_completion' +# type: "request" +value: |- + { + "service": "googlevertexai", + "service_settings": { + "provider": "mistral", + "model_id": "mistral-small-2503", + "service_account_json": "service-account-json", + "streaming_url": "https://%LOCATION_ID%-aiplatform.googleapis.com/v1/projects/%PROJECT_ID%/locations/%LOCATION_ID%/publishers/mistralai/models/%MODEL_ID%:streamRawPredict" + } + } diff --git a/specification/inference/put_googlevertexai/examples/request/PutGoogleVertexAiRequestExample17.yaml b/specification/inference/put_googlevertexai/examples/request/PutGoogleVertexAiRequestExample17.yaml new file mode 100644 index 0000000000..5463166837 --- /dev/null +++ b/specification/inference/put_googlevertexai/examples/request/PutGoogleVertexAiRequestExample17.yaml @@ -0,0 +1,13 @@ +summary: A completion task for Google Model Garden Mistral dedicated endpoint with single URL provided for both streaming and non-streaming tasks +description: Run `PUT _inference/completion/google_model_garden_mistral_completion` to create an inference endpoint to perform a `completion` task using Mistral's model hosted on Google Model Garden dedicated endpoint with single URL provided for both streaming and non-streaming tasks. See the endpoint's `Sample request` page for the variable values used in the URL. +method_request: 'PUT _inference/completion/google_model_garden_mistral_completion' +# type: "request" +value: |- + { + "service": "googlevertexai", + "service_settings": { + "provider": "mistral", + "service_account_json": "service-account-json", + "url": "https://%ENDPOINT_ID%.%LOCATION_ID%-%PROJECT_ID%.prediction.vertexai.goog/v1/projects/%PROJECT_ID%/locations/%LOCATION_ID%/endpoints/%ENDPOINT_ID%/chat/completions" + } + } diff --git a/specification/inference/put_googlevertexai/examples/request/PutGoogleVertexAiRequestExample18.yaml b/specification/inference/put_googlevertexai/examples/request/PutGoogleVertexAiRequestExample18.yaml new file mode 100644 index 0000000000..a749a47c80 --- /dev/null +++ b/specification/inference/put_googlevertexai/examples/request/PutGoogleVertexAiRequestExample18.yaml @@ -0,0 +1,13 @@ +summary: A chat_completion task for Google Model Garden Mistral dedicated endpoint with single streaming URL provided +description: Run `PUT _inference/chat_completion/google_model_garden_mistral_chat_completion` to create an inference endpoint to perform a `chat_completion` task using Mistral's model hosted on Google Model Garden dedicated endpoint with single streaming URL provided. See the endpoint's `Sample request` page for the variable values used in the URL. +method_request: 'PUT _inference/chat_completion/google_model_garden_mistral_chat_completion' +# type: "request" +value: |- + { + "service": "googlevertexai", + "service_settings": { + "provider": "mistral", + "service_account_json": "service-account-json", + "streaming_url": "https://%ENDPOINT_ID%.%LOCATION_ID%-%PROJECT_ID%.prediction.vertexai.goog/v1/projects/%PROJECT_ID%/locations/%LOCATION_ID%/endpoints/%ENDPOINT_ID%/chat/completions" + } + } diff --git a/specification/inference/put_googlevertexai/examples/request/PutGoogleVertexAiRequestExample19.yaml b/specification/inference/put_googlevertexai/examples/request/PutGoogleVertexAiRequestExample19.yaml new file mode 100644 index 0000000000..112966f9d4 --- /dev/null +++ b/specification/inference/put_googlevertexai/examples/request/PutGoogleVertexAiRequestExample19.yaml @@ -0,0 +1,13 @@ +summary: A completion task for Google Model Garden Mistral shared endpoint with single URL provided for both streaming and non-streaming tasks +description: Run `PUT _inference/completion/google_model_garden_mistral_completion` to create an inference endpoint to perform a `completion` task using Mistral's model hosted on Google Model Garden shared endpoint with single URL provided for both streaming and non-streaming tasks. See the endpoint's `Sample request` page for the variable values used in the URL. +method_request: 'PUT _inference/completion/google_model_garden_mistral_completion' +# type: "request" +value: |- + { + "service": "googlevertexai", + "service_settings": { + "provider": "mistral", + "service_account_json": "service-account-json", + "url": "https://%LOCATION_ID%-aiplatform.googleapis.com/v1/projects/%PROJECT_ID%/locations/%LOCATION_ID%/endpoints/%ENDPOINT_ID%/chat/completions" + } + } diff --git a/specification/inference/put_googlevertexai/examples/request/PutGoogleVertexAiRequestExample20.yaml b/specification/inference/put_googlevertexai/examples/request/PutGoogleVertexAiRequestExample20.yaml new file mode 100644 index 0000000000..1bdafc14ac --- /dev/null +++ b/specification/inference/put_googlevertexai/examples/request/PutGoogleVertexAiRequestExample20.yaml @@ -0,0 +1,13 @@ +summary: A chat_completion task for Google Model Garden Mistral shared endpoint with single streaming URL provided +description: Run `PUT _inference/chat_completion/google_model_garden_mistral_chat_completion` to create an inference endpoint to perform a `chat_completion` task using Mistral's model hosted on Google Model Garden shared endpoint with single streaming URL provided. See the endpoint's `Sample request` page for the variable values used in the URL. +method_request: 'PUT _inference/chat_completion/google_model_garden_mistral_chat_completion' +# type: "request" +value: |- + { + "service": "googlevertexai", + "service_settings": { + "provider": "mistral", + "service_account_json": "service-account-json", + "streaming_url": "https://%LOCATION_ID%-aiplatform.googleapis.com/v1/projects/%PROJECT_ID%/locations/%LOCATION_ID%/endpoints/%ENDPOINT_ID%/chat/completions" + } + } diff --git a/specification/inference/put_googlevertexai/examples/request/PutGoogleVertexAiRequestExample21.yaml b/specification/inference/put_googlevertexai/examples/request/PutGoogleVertexAiRequestExample21.yaml new file mode 100644 index 0000000000..c7dc7f0b04 --- /dev/null +++ b/specification/inference/put_googlevertexai/examples/request/PutGoogleVertexAiRequestExample21.yaml @@ -0,0 +1,14 @@ +summary: A completion task for Google Model Garden AI21 serverless endpoint with separate URLs for streaming and non-streaming tasks +description: Run `PUT _inference/completion/google_model_garden_ai21_completion` to create an inference endpoint to perform a `completion` task using AI21's model hosted on Google Model Garden serverless endpoint with separate URLs for streaming and non-streaming tasks. See the AI21 model documentation for instructions on how to construct URLs. +method_request: 'PUT _inference/completion/google_model_garden_ai21_completion' +# type: "request" +value: |- + { + "service": "googlevertexai", + "service_settings": { + "provider": "ai21", + "service_account_json": "service-account-json", + "url": "https://%LOCATION_ID%-aiplatform.googleapis.com/v1/projects/%PROJECT_ID%/locations/%LOCATION_ID%/publishers/ai21/models/%MODEL_ID%:rawPredict", + "streaming_url": "https://%LOCATION_ID%-aiplatform.googleapis.com/v1/projects/%PROJECT_ID%/locations/%LOCATION_ID%/publishers/ai21/models/%MODEL_ID%:streamRawPredict" + } + } diff --git a/specification/inference/put_googlevertexai/examples/request/PutGoogleVertexAiRequestExample22.yaml b/specification/inference/put_googlevertexai/examples/request/PutGoogleVertexAiRequestExample22.yaml new file mode 100644 index 0000000000..ef365f73dd --- /dev/null +++ b/specification/inference/put_googlevertexai/examples/request/PutGoogleVertexAiRequestExample22.yaml @@ -0,0 +1,13 @@ +summary: A chat_completion task for Google Model Garden AI21 serverless endpoint with single streaming URL provided +description: Run `PUT _inference/chat_completion/google_model_garden_ai21_chat_completion` to create an inference endpoint to perform a `chat_completion` task using AI21's model hosted on Google Model Garden serverless endpoint with single streaming URL provided. See the AI21 model documentation for instructions on how to construct URLs. +method_request: 'PUT _inference/chat_completion/google_model_garden_ai21_chat_completion' +# type: "request" +value: |- + { + "service": "googlevertexai", + "service_settings": { + "provider": "ai21", + "service_account_json": "service-account-json", + "streaming_url": "https://%LOCATION_ID%-aiplatform.googleapis.com/v1/projects/%PROJECT_ID%/locations/%LOCATION_ID%/publishers/ai21/models/%MODEL_ID%:streamRawPredict" + } + } diff --git a/specification/inference/put_googlevertexai/examples/request/PutGoogleVertexAiRequestExample3.yaml b/specification/inference/put_googlevertexai/examples/request/PutGoogleVertexAiRequestExample3.yaml index 224eec318a..dd4026665d 100644 --- a/specification/inference/put_googlevertexai/examples/request/PutGoogleVertexAiRequestExample3.yaml +++ b/specification/inference/put_googlevertexai/examples/request/PutGoogleVertexAiRequestExample3.yaml @@ -1,5 +1,5 @@ -summary: A completion task for Google Model Garden Anthropic endpoint with separate URLs for streaming and non-streaming tasks -description: Run `PUT _inference/completion/google_model_garden_anthropic_completion` to create an inference endpoint to perform a `completion` task using Anthropic's model hosted on Google Model Garden with separate URLs for streaming and non-streaming tasks. +summary: A completion task for Google Model Garden Anthropic serverless endpoint with separate URLs for streaming and non-streaming tasks +description: Run `PUT _inference/completion/google_model_garden_anthropic_completion` to create an inference endpoint to perform a `completion` task using Anthropic's serverless model hosted on Google Model Garden with separate URLs for streaming and non-streaming tasks. See the Anthropic model documentation for instructions on how to construct URLs. method_request: 'PUT _inference/completion/google_model_garden_anthropic_completion' # type: "request" value: |- @@ -8,8 +8,8 @@ value: |- "service_settings": { "provider": "anthropic", "service_account_json": "service-account-json", - "url": "https://url:rawPredict", - "streaming_url": "https://streaming_url:streamRawPredict" + "url": "https://%LOCATION_ID%-aiplatform.googleapis.com/v1/projects/%PROJECT_ID%/locations/%LOCATION_ID%/publishers/anthropic/models/%MODEL_ID%:rawPredict", + "streaming_url": "https://%LOCATION_ID%-aiplatform.googleapis.com/v1/projects/%PROJECT_ID%/locations/%LOCATION_ID%/publishers/anthropic/models/%MODEL_ID%:streamRawPredict" }, "task_settings": { "max_tokens": 128 diff --git a/specification/inference/put_googlevertexai/examples/request/PutGoogleVertexAiRequestExample4.yaml b/specification/inference/put_googlevertexai/examples/request/PutGoogleVertexAiRequestExample4.yaml index a8ae043de1..058f60f3a2 100644 --- a/specification/inference/put_googlevertexai/examples/request/PutGoogleVertexAiRequestExample4.yaml +++ b/specification/inference/put_googlevertexai/examples/request/PutGoogleVertexAiRequestExample4.yaml @@ -1,5 +1,5 @@ -summary: A chat_completion task for Google Model Garden Anthropic endpoint with single streaming URL provided -description: Run `PUT _inference/chat_completion/google_model_garden_anthropic_chat_completion` to create an inference endpoint to perform a `chat_completion` task using Anthropic's model hosted on Google Model Garden with single streaming URL provided. +summary: A chat_completion task for Google Model Garden Anthropic serverless endpoint with single streaming URL provided +description: Run `PUT _inference/chat_completion/google_model_garden_anthropic_chat_completion` to create an inference endpoint to perform a `chat_completion` task using Anthropic's serverless model hosted on Google Model Garden with single streaming URL provided. See the Anthropic model documentation for instructions on how to construct the URL. method_request: 'PUT _inference/chat_completion/google_model_garden_anthropic_chat_completion' # type: "request" value: |- @@ -8,7 +8,7 @@ value: |- "service_settings": { "provider": "anthropic", "service_account_json": "service-account-json", - "streaming_url": "https://streaming_url:streamRawPredict" + "streaming_url": "https://%LOCATION_ID%-aiplatform.googleapis.com/v1/projects/%PROJECT_ID%/locations/%LOCATION_ID%/publishers/anthropic/models/%MODEL_ID%:streamRawPredict" }, "task_settings": { "max_tokens": 128 diff --git a/specification/inference/put_googlevertexai/examples/request/PutGoogleVertexAiRequestExample5.yaml b/specification/inference/put_googlevertexai/examples/request/PutGoogleVertexAiRequestExample5.yaml index 19f4ce4de5..e7c86dc760 100644 --- a/specification/inference/put_googlevertexai/examples/request/PutGoogleVertexAiRequestExample5.yaml +++ b/specification/inference/put_googlevertexai/examples/request/PutGoogleVertexAiRequestExample5.yaml @@ -1,5 +1,5 @@ -summary: A completion task for Google Model Garden Meta endpoint with single URL provided for both streaming and non-streaming tasks -description: Run `PUT _inference/completion/google_model_garden_meta_completion` to create an inference endpoint to perform a `completion` task using Meta's model hosted on Google Model Garden with single URL provided for both streaming and non-streaming tasks. +summary: A completion task for Google Model Garden Meta serverless endpoint with single URL provided for both streaming and non-streaming tasks +description: Run `PUT _inference/completion/google_model_garden_meta_completion` to create an inference endpoint to perform a `completion` task using Meta's serverless model hosted on Google Model Garden with single URL provided for both streaming and non-streaming tasks. See the Meta model documentation for instructions on how to construct the URL. method_request: 'PUT _inference/completion/google_model_garden_meta_completion' # type: "request" value: |- @@ -9,6 +9,6 @@ value: |- "provider": "meta", "model_id": "meta/llama-3.3-70b-instruct-maas", "service_account_json": "service-account-json", - "url": "https://url/openapi/chat/completions" + "url": "https://%LOCATION_ID%-aiplatform.googleapis.com/v1/projects/%PROJECT_ID%/locations/%LOCATION_ID%/endpoints/openapi/chat/completions" } } diff --git a/specification/inference/put_googlevertexai/examples/request/PutGoogleVertexAiRequestExample6.yaml b/specification/inference/put_googlevertexai/examples/request/PutGoogleVertexAiRequestExample6.yaml index 8d38a23ca5..0e3241b4ae 100644 --- a/specification/inference/put_googlevertexai/examples/request/PutGoogleVertexAiRequestExample6.yaml +++ b/specification/inference/put_googlevertexai/examples/request/PutGoogleVertexAiRequestExample6.yaml @@ -1,5 +1,5 @@ -summary: A chat_completion task for Google Model Garden Meta endpoint with single streaming URL provided -description: Run `PUT _inference/chat_completion/google_model_garden_meta_chat_completion` to create an inference endpoint to perform a `chat_completion` task using Meta's model hosted on Google Model Garden with single streaming URL provided. +summary: A chat_completion task for Google Model Garden Meta serverless endpoint with single streaming URL provided +description: Run `PUT _inference/chat_completion/google_model_garden_meta_chat_completion` to create an inference endpoint to perform a `chat_completion` task using Meta's serverless model hosted on Google Model Garden with single streaming URL provided. See the Meta model documentation for instructions on how to construct the URL. method_request: 'PUT _inference/chat_completion/google_model_garden_meta_chat_completion' # type: "request" value: |- @@ -9,6 +9,6 @@ value: |- "provider": "meta", "model_id": "meta/llama-3.3-70b-instruct-maas", "service_account_json": "service-account-json", - "streaming_url": "https://url/openapi/chat/completions" + "streaming_url": "https://%LOCATION_ID%-aiplatform.googleapis.com/v1/projects/%PROJECT_ID%/locations/%LOCATION_ID%/endpoints/openapi/chat/completions" } } diff --git a/specification/inference/put_googlevertexai/examples/request/PutGoogleVertexAiRequestExample7.yaml b/specification/inference/put_googlevertexai/examples/request/PutGoogleVertexAiRequestExample7.yaml index e503c99756..c7a68ce82a 100644 --- a/specification/inference/put_googlevertexai/examples/request/PutGoogleVertexAiRequestExample7.yaml +++ b/specification/inference/put_googlevertexai/examples/request/PutGoogleVertexAiRequestExample7.yaml @@ -1,13 +1,13 @@ -summary: A completion task for Google Model Garden Hugging Face endpoint with single URL provided for both streaming and non-streaming tasks -description: Run `PUT _inference/completion/google_model_garden_hugging_face_completion` to create an inference endpoint to perform a `completion` task using Hugging Face's model hosted on Google Model Garden with single URL provided for both streaming and non-streaming tasks. -method_request: 'PUT _inference/completion/google_model_garden_hugging_face_completion' +summary: A completion task for Google Model Garden Meta dedicated endpoint with single URL provided for both streaming and non-streaming tasks +description: Run `PUT _inference/completion/google_model_garden_meta_completion` to create an inference endpoint to perform a `completion` task using Meta's model hosted on Google Model Garden dedicated endpoint with single URL provided for both streaming and non-streaming tasks. See the endpoint's `Sample request` page for the variable values used in the URL. +method_request: 'PUT _inference/completion/google_model_garden_meta_completion' # type: "request" value: |- { "service": "googlevertexai", "service_settings": { - "provider": "hugging_face", + "provider": "meta", "service_account_json": "service-account-json", - "url": "https://url/chat/completions" + "url": "https://%ENDPOINT_ID%.%LOCATION_ID%-fasttryout.prediction.vertexai.goog/v1/projects/%PROJECT_ID%/locations/%LOCATION_ID%/endpoints/%ENDPOINT_ID%/chat/completions" } } diff --git a/specification/inference/put_googlevertexai/examples/request/PutGoogleVertexAiRequestExample8.yaml b/specification/inference/put_googlevertexai/examples/request/PutGoogleVertexAiRequestExample8.yaml index 59c9c14010..35810d8992 100644 --- a/specification/inference/put_googlevertexai/examples/request/PutGoogleVertexAiRequestExample8.yaml +++ b/specification/inference/put_googlevertexai/examples/request/PutGoogleVertexAiRequestExample8.yaml @@ -1,13 +1,13 @@ -summary: A chat_completion task for Google Model Garden Hugging Face endpoint with single streaming URL provided -description: Run `PUT _inference/chat_completion/google_model_garden_hugging_face_chat_completion` to create an inference endpoint to perform a `chat_completion` task using Hugging Face's model hosted on Google Model Garden with single streaming URL provided. -method_request: 'PUT _inference/chat_completion/google_model_garden_hugging_face_chat_completion' +summary: A chat_completion task for Google Model Garden Meta dedicated endpoint with single streaming URL provided +description: Run `PUT _inference/chat_completion/google_model_garden_meta_chat_completion` to create an inference endpoint to perform a `chat_completion` task using Meta's model hosted on Google Model Garden dedicated endpoint with single streaming URL provided. See the endpoint's `Sample request` page for the variable values used in the URL. +method_request: 'PUT _inference/chat_completion/google_model_garden_meta_chat_completion' # type: "request" value: |- { "service": "googlevertexai", "service_settings": { - "provider": "hugging_face", + "provider": "meta", "service_account_json": "service-account-json", - "streaming_url": "https://url/chat/completions" + "streaming_url": "https://%ENDPOINT_ID%.%LOCATION_ID%-fasttryout.prediction.vertexai.goog/v1/projects/%PROJECT_ID%/locations/%LOCATION_ID%/endpoints/%ENDPOINT_ID%/chat/completions" } } diff --git a/specification/inference/put_googlevertexai/examples/request/PutGoogleVertexAiRequestExample9.yaml b/specification/inference/put_googlevertexai/examples/request/PutGoogleVertexAiRequestExample9.yaml index 17c69126b5..682fa174a7 100644 --- a/specification/inference/put_googlevertexai/examples/request/PutGoogleVertexAiRequestExample9.yaml +++ b/specification/inference/put_googlevertexai/examples/request/PutGoogleVertexAiRequestExample9.yaml @@ -1,15 +1,13 @@ -summary: A completion task for Google Model Garden Mistral endpoint with separate URLs for streaming and non-streaming tasks -description: Run `PUT _inference/completion/google_model_garden_mistral_completion` to create an inference endpoint to perform a `completion` task using Mistral's model hosted on Google Model Garden with separate URLs for streaming and non-streaming tasks. -method_request: 'PUT _inference/completion/google_model_garden_mistral_completion' +summary: A completion task for Google Model Garden Meta shared endpoint with single URL provided for both streaming and non-streaming tasks +description: Run `PUT _inference/completion/google_model_garden_meta_completion` to create an inference endpoint to perform a `completion` task using Meta's model hosted on Google Model Garden shared endpoint with single URL provided for both streaming and non-streaming tasks. See the endpoint's `Sample request` page for the variable values used in the URL. +method_request: 'PUT _inference/completion/google_model_garden_meta_completion' # type: "request" value: |- { "service": "googlevertexai", "service_settings": { - "provider": "mistral", - "model_id": "mistral-small-2503", + "provider": "meta", "service_account_json": "service-account-json", - "url": "https://url:rawPredict", - "streaming_url": "https://url:streamRawPredict" + "url": "https://%LOCATION_ID%-aiplatform.googleapis.com/v1/projects/%PROJECT_ID%/locations/%LOCATION_ID%/endpoints/%ENDPOINT_ID%/chat/completions" } }