From 960a4872cd854d3fd060dc6a2739190bd7f11553 Mon Sep 17 00:00:00 2001 From: lcawl Date: Wed, 19 Mar 2025 00:09:31 -0700 Subject: [PATCH 1/2] Add inference.put_googleaistudio.json --- .../inference.put_googleaistudio.json | 35 +++++++++++++++++++ 1 file changed, 35 insertions(+) create mode 100644 specification/_json_spec/inference.put_googleaistudio.json diff --git a/specification/_json_spec/inference.put_googleaistudio.json b/specification/_json_spec/inference.put_googleaistudio.json new file mode 100644 index 0000000000..4574626b61 --- /dev/null +++ b/specification/_json_spec/inference.put_googleaistudio.json @@ -0,0 +1,35 @@ +{ + "inference.put_googleaistudio": { + "documentation": { + "url": "https://www.elastic.co/guide/en/elasticsearch/reference/current/infer-service-google-ai-studio.html", + "description": "Configure a Google AI Studio inference endpoint" + }, + "stability": "stable", + "visibility": "public", + "headers": { + "accept": ["application/json"], + "content_type": ["application/json"] + }, + "url": { + "paths": [ + { + "path": "/_inference/{task_type}/{googleaistudio_inference_id}", + "methods": ["PUT"], + "parts": { + "task_type": { + "type": "string", + "description": "The task type" + }, + "googleaistudio_inference_id": { + "type": "string", + "description": "The inference Id" + } + } + } + ] + }, + "body": { + "description": "The inference endpoint's task and service settings" + } + } +} From 60a83b83b667b0f4c85c69ee50961881a8bb4273 Mon Sep 17 00:00:00 2001 From: lcawl Date: Fri, 21 Mar 2025 20:02:23 -0700 Subject: [PATCH 2/2] Add Google AI Studio details --- output/openapi/elasticsearch-openapi.json | 114 +++++++++ .../elasticsearch-serverless-openapi.json | 114 +++++++++ output/schema/schema-serverless.json | 235 ++++++++++++++++++ output/schema/schema.json | 235 ++++++++++++++++++ output/typescript/types.ts | 22 ++ specification/_doc_ids/table.csv | 2 + .../PutGoogleAiStudioRequest.ts | 102 ++++++++ .../PutGoogleAiStudioResponse.ts | 24 ++ .../PutGoogleAiStudioRequestExample1.yaml | 12 + 9 files changed, 860 insertions(+) create mode 100644 specification/inference/put_googleaistudio/PutGoogleAiStudioRequest.ts create mode 100644 specification/inference/put_googleaistudio/PutGoogleAiStudioResponse.ts create mode 100644 specification/inference/put_googleaistudio/examples/request/PutGoogleAiStudioRequestExample1.yaml diff --git a/output/openapi/elasticsearch-openapi.json b/output/openapi/elasticsearch-openapi.json index 65f6661a0f..03694b1857 100644 --- a/output/openapi/elasticsearch-openapi.json +++ b/output/openapi/elasticsearch-openapi.json @@ -17830,6 +17830,84 @@ "x-state": "Added in 8.12.0" } }, + "/_inference/{task_type}/{googleaistudio_inference_id}": { + "put": { + "tags": [ + "inference" + ], + "summary": "Create an Google AI Studio inference endpoint", + "description": "Create an inference endpoint to perform an inference task with the `googleaistudio` service.\n\nWhen you create an inference endpoint, the associated machine learning model is automatically deployed if it is not already running.\nAfter creating the endpoint, wait for the model deployment to complete before using it.\nTo verify the deployment status, use the get trained model statistics API.\nLook for `\"state\": \"fully_allocated\"` in the response and ensure that the `\"allocation_count\"` matches the `\"target_allocation_count\"`.\nAvoid creating multiple endpoints for the same model unless required, as each endpoint consumes significant resources.", + "operationId": "inference-put-googleaistudio", + "parameters": [ + { + "in": "path", + "name": "task_type", + "description": "The type of the inference task that the model will perform.", + "required": true, + "deprecated": false, + "schema": { + "$ref": "#/components/schemas/inference.put_googleaistudio:GoogleAiStudioTaskType" + }, + "style": "simple" + }, + { + "in": "path", + "name": "googleaistudio_inference_id", + "description": "The unique identifier of the inference endpoint.", + "required": true, + "deprecated": false, + "schema": { + "$ref": "#/components/schemas/_types:Id" + }, + "style": "simple" + } + ], + "requestBody": { + "content": { + "application/json": { + "schema": { + "type": "object", + "properties": { + "chunking_settings": { + "$ref": "#/components/schemas/inference._types:InferenceChunkingSettings" + }, + "service": { + "$ref": "#/components/schemas/inference.put_googleaistudio:ServiceType" + }, + "service_settings": { + "$ref": "#/components/schemas/inference.put_googleaistudio:GoogleAiStudioServiceSettings" + } + }, + "required": [ + "service", + "service_settings" + ] + }, + "examples": { + "PutGoogleAiStudioRequestExample1": { + "summary": "A completion task", + "description": "Run `PUT _inference/completion/google_ai_studio_completion` to create an inference endpoint to perform a `completion` task type.", + "value": "{\n \"service\": \"googleaistudio\",\n \"service_settings\": {\n \"api_key\": \"api-key\",\n \"model_id\": \"model-id\"\n }\n}" + } + } + } + } + }, + "responses": { + "200": { + "description": "", + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/inference._types:InferenceEndpointInfo" + } + } + } + } + }, + "x-state": "Added in 8.15.0" + } + }, "/_inference/{task_type}/{googlevertexai_inference_id}": { "put": { "tags": [ @@ -77288,6 +77366,42 @@ } } }, + "inference.put_googleaistudio:GoogleAiStudioTaskType": { + "type": "string", + "enum": [ + "completion", + "text_embedding" + ] + }, + "inference.put_googleaistudio:ServiceType": { + "type": "string", + "enum": [ + "googleaistudio" + ] + }, + "inference.put_googleaistudio:GoogleAiStudioServiceSettings": { + "type": "object", + "properties": { + "api_key": { + "description": "A valid API key of your Google Gemini account.", + "type": "string" + }, + "model_id": { + "externalDocs": { + "url": "https://ai.google.dev/gemini-api/docs/models" + }, + "description": "The name of the model to use for the inference task.\nRefer to the Google documentation for the list of supported models.", + "type": "string" + }, + "rate_limit": { + "$ref": "#/components/schemas/inference._types:RateLimitSetting" + } + }, + "required": [ + "api_key", + "model_id" + ] + }, "inference.put_googlevertexai:GoogleVertexAITaskType": { "type": "string", "enum": [ diff --git a/output/openapi/elasticsearch-serverless-openapi.json b/output/openapi/elasticsearch-serverless-openapi.json index 71b7be42fe..635ca82d14 100644 --- a/output/openapi/elasticsearch-serverless-openapi.json +++ b/output/openapi/elasticsearch-serverless-openapi.json @@ -9652,6 +9652,84 @@ "x-state": "Added in 8.12.0" } }, + "/_inference/{task_type}/{googleaistudio_inference_id}": { + "put": { + "tags": [ + "inference" + ], + "summary": "Create an Google AI Studio inference endpoint", + "description": "Create an inference endpoint to perform an inference task with the `googleaistudio` service.\n\nWhen you create an inference endpoint, the associated machine learning model is automatically deployed if it is not already running.\nAfter creating the endpoint, wait for the model deployment to complete before using it.\nTo verify the deployment status, use the get trained model statistics API.\nLook for `\"state\": \"fully_allocated\"` in the response and ensure that the `\"allocation_count\"` matches the `\"target_allocation_count\"`.\nAvoid creating multiple endpoints for the same model unless required, as each endpoint consumes significant resources.", + "operationId": "inference-put-googleaistudio", + "parameters": [ + { + "in": "path", + "name": "task_type", + "description": "The type of the inference task that the model will perform.", + "required": true, + "deprecated": false, + "schema": { + "$ref": "#/components/schemas/inference.put_googleaistudio:GoogleAiStudioTaskType" + }, + "style": "simple" + }, + { + "in": "path", + "name": "googleaistudio_inference_id", + "description": "The unique identifier of the inference endpoint.", + "required": true, + "deprecated": false, + "schema": { + "$ref": "#/components/schemas/_types:Id" + }, + "style": "simple" + } + ], + "requestBody": { + "content": { + "application/json": { + "schema": { + "type": "object", + "properties": { + "chunking_settings": { + "$ref": "#/components/schemas/inference._types:InferenceChunkingSettings" + }, + "service": { + "$ref": "#/components/schemas/inference.put_googleaistudio:ServiceType" + }, + "service_settings": { + "$ref": "#/components/schemas/inference.put_googleaistudio:GoogleAiStudioServiceSettings" + } + }, + "required": [ + "service", + "service_settings" + ] + }, + "examples": { + "PutGoogleAiStudioRequestExample1": { + "summary": "A completion task", + "description": "Run `PUT _inference/completion/google_ai_studio_completion` to create an inference endpoint to perform a `completion` task type.", + "value": "{\n \"service\": \"googleaistudio\",\n \"service_settings\": {\n \"api_key\": \"api-key\",\n \"model_id\": \"model-id\"\n }\n}" + } + } + } + } + }, + "responses": { + "200": { + "description": "", + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/inference._types:InferenceEndpointInfo" + } + } + } + } + }, + "x-state": "Added in 8.15.0" + } + }, "/_inference/{task_type}/{googlevertexai_inference_id}": { "put": { "tags": [ @@ -48480,6 +48558,42 @@ } } }, + "inference.put_googleaistudio:GoogleAiStudioTaskType": { + "type": "string", + "enum": [ + "completion", + "text_embedding" + ] + }, + "inference.put_googleaistudio:ServiceType": { + "type": "string", + "enum": [ + "googleaistudio" + ] + }, + "inference.put_googleaistudio:GoogleAiStudioServiceSettings": { + "type": "object", + "properties": { + "api_key": { + "description": "A valid API key of your Google Gemini account.", + "type": "string" + }, + "model_id": { + "externalDocs": { + "url": "https://ai.google.dev/gemini-api/docs/models" + }, + "description": "The name of the model to use for the inference task.\nRefer to the Google documentation for the list of supported models.", + "type": "string" + }, + "rate_limit": { + "$ref": "#/components/schemas/inference._types:RateLimitSetting" + } + }, + "required": [ + "api_key", + "model_id" + ] + }, "inference.put_googlevertexai:GoogleVertexAITaskType": { "type": "string", "enum": [ diff --git a/output/schema/schema-serverless.json b/output/schema/schema-serverless.json index d5316fa316..cf60888bef 100644 --- a/output/schema/schema-serverless.json +++ b/output/schema/schema-serverless.json @@ -4636,6 +4636,51 @@ } ] }, + { + "availability": { + "serverless": { + "stability": "stable", + "visibility": "public" + }, + "stack": { + "since": "8.15.0", + "stability": "stable", + "visibility": "public" + } + }, + "description": "Create an Google AI Studio inference endpoint.\n\nCreate an inference endpoint to perform an inference task with the `googleaistudio` service.\n\nWhen you create an inference endpoint, the associated machine learning model is automatically deployed if it is not already running.\nAfter creating the endpoint, wait for the model deployment to complete before using it.\nTo verify the deployment status, use the get trained model statistics API.\nLook for `\"state\": \"fully_allocated\"` in the response and ensure that the `\"allocation_count\"` matches the `\"target_allocation_count\"`.\nAvoid creating multiple endpoints for the same model unless required, as each endpoint consumes significant resources.", + "docId": "inference-api-put-googleaistudio", + "docUrl": "https://www.elastic.co/guide/en/elasticsearch/reference/current/infer-service-google-ai-studio.html", + "name": "inference.put_googleaistudio", + "privileges": { + "cluster": [ + "manage_inference" + ] + }, + "request": { + "name": "Request", + "namespace": "inference.put_googleaistudio" + }, + "requestBodyRequired": false, + "requestMediaType": [ + "application/json" + ], + "response": { + "name": "Response", + "namespace": "inference.put_googleaistudio" + }, + "responseMediaType": [ + "application/json" + ], + "urls": [ + { + "methods": [ + "PUT" + ], + "path": "/_inference/{task_type}/{googleaistudio_inference_id}" + } + ] + }, { "availability": { "serverless": { @@ -27246,6 +27291,119 @@ }, "specLocation": "inference/put_eis/PutEisResponse.ts#L22-L24" }, + { + "attachedBehaviors": [ + "CommonQueryParameters" + ], + "body": { + "kind": "properties", + "properties": [ + { + "description": "The chunking configuration object.", + "extDocId": "inference-chunking", + "extDocUrl": "https://www.elastic.co/guide/en/elasticsearch/reference/current/inference-apis.html#infer-chunking-config", + "name": "chunking_settings", + "required": false, + "type": { + "kind": "instance_of", + "type": { + "name": "InferenceChunkingSettings", + "namespace": "inference._types" + } + } + }, + { + "description": "The type of service supported for the specified task type. In this case, `googleaistudio`.", + "name": "service", + "required": true, + "type": { + "kind": "instance_of", + "type": { + "name": "ServiceType", + "namespace": "inference.put_googleaistudio" + } + } + }, + { + "description": "Settings used to install the inference model. These settings are specific to the `googleaistudio` service.", + "name": "service_settings", + "required": true, + "type": { + "kind": "instance_of", + "type": { + "name": "GoogleAiStudioServiceSettings", + "namespace": "inference.put_googleaistudio" + } + } + } + ] + }, + "description": "Create an Google AI Studio inference endpoint.\n\nCreate an inference endpoint to perform an inference task with the `googleaistudio` service.\n\nWhen you create an inference endpoint, the associated machine learning model is automatically deployed if it is not already running.\nAfter creating the endpoint, wait for the model deployment to complete before using it.\nTo verify the deployment status, use the get trained model statistics API.\nLook for `\"state\": \"fully_allocated\"` in the response and ensure that the `\"allocation_count\"` matches the `\"target_allocation_count\"`.\nAvoid creating multiple endpoints for the same model unless required, as each endpoint consumes significant resources.", + "examples": { + "PutGoogleAiStudioRequestExample1": { + "description": "Run `PUT _inference/completion/google_ai_studio_completion` to create an inference endpoint to perform a `completion` task type.", + "summary": "A completion task", + "value": "{\n \"service\": \"googleaistudio\",\n \"service_settings\": {\n \"api_key\": \"api-key\",\n \"model_id\": \"model-id\"\n }\n}" + } + }, + "inherits": { + "type": { + "name": "RequestBase", + "namespace": "_types" + } + }, + "kind": "request", + "name": { + "name": "Request", + "namespace": "inference.put_googleaistudio" + }, + "path": [ + { + "description": "The type of the inference task that the model will perform.", + "name": "task_type", + "required": true, + "type": { + "kind": "instance_of", + "type": { + "name": "GoogleAiStudioTaskType", + "namespace": "inference.put_googleaistudio" + } + } + }, + { + "description": "The unique identifier of the inference endpoint.", + "name": "googleaistudio_inference_id", + "required": true, + "type": { + "kind": "instance_of", + "type": { + "name": "Id", + "namespace": "_types" + } + } + } + ], + "query": [], + "specLocation": "inference/put_googleaistudio/PutGoogleAiStudioRequest.ts#L27-L75" + }, + { + "body": { + "kind": "value", + "value": { + "kind": "instance_of", + "type": { + "name": "InferenceEndpointInfo", + "namespace": "inference._types" + } + } + }, + "kind": "response", + "name": { + "name": "Response", + "namespace": "inference.put_googleaistudio" + }, + "specLocation": "inference/put_googleaistudio/PutGoogleAiStudioResponse.ts#L22-L24" + }, { "attachedBehaviors": [ "CommonQueryParameters" @@ -101133,6 +101291,35 @@ }, "specLocation": "inference/put_eis/PutEisRequest.ts#L68-L70" }, + { + "kind": "enum", + "members": [ + { + "name": "completion" + }, + { + "name": "text_embedding" + } + ], + "name": { + "name": "GoogleAiStudioTaskType", + "namespace": "inference.put_googleaistudio" + }, + "specLocation": "inference/put_googleaistudio/PutGoogleAiStudioRequest.ts#L77-L80" + }, + { + "kind": "enum", + "members": [ + { + "name": "googleaistudio" + } + ], + "name": { + "name": "ServiceType", + "namespace": "inference.put_googleaistudio" + }, + "specLocation": "inference/put_googleaistudio/PutGoogleAiStudioRequest.ts#L82-L84" + }, { "kind": "enum", "members": [ @@ -121784,6 +121971,54 @@ ], "specLocation": "inference/_types/Services.ts#L95-L100" }, + { + "kind": "interface", + "name": { + "name": "GoogleAiStudioServiceSettings", + "namespace": "inference.put_googleaistudio" + }, + "properties": [ + { + "description": "A valid API key of your Google Gemini account.", + "name": "api_key", + "required": true, + "type": { + "kind": "instance_of", + "type": { + "name": "string", + "namespace": "_builtins" + } + } + }, + { + "description": "The name of the model to use for the inference task.\nRefer to the Google documentation for the list of supported models.", + "extDocId": "googleaistudio-models", + "extDocUrl": "https://ai.google.dev/gemini-api/docs/models", + "name": "model_id", + "required": true, + "type": { + "kind": "instance_of", + "type": { + "name": "string", + "namespace": "_builtins" + } + } + }, + { + "description": "This setting helps to minimize the number of rate limit errors returned from Google AI Studio.\nBy default, the `googleaistudio` service sets the number of requests allowed per minute to 360.", + "name": "rate_limit", + "required": false, + "type": { + "kind": "instance_of", + "type": { + "name": "RateLimitSetting", + "namespace": "inference._types" + } + } + } + ], + "specLocation": "inference/put_googleaistudio/PutGoogleAiStudioRequest.ts#L86-L102" + }, { "kind": "interface", "name": { diff --git a/output/schema/schema.json b/output/schema/schema.json index eb3ff11fb1..200c2ac618 100644 --- a/output/schema/schema.json +++ b/output/schema/schema.json @@ -9348,6 +9348,51 @@ } ] }, + { + "availability": { + "serverless": { + "stability": "stable", + "visibility": "public" + }, + "stack": { + "since": "8.15.0", + "stability": "stable", + "visibility": "public" + } + }, + "description": "Create an Google AI Studio inference endpoint.\n\nCreate an inference endpoint to perform an inference task with the `googleaistudio` service.\n\nWhen you create an inference endpoint, the associated machine learning model is automatically deployed if it is not already running.\nAfter creating the endpoint, wait for the model deployment to complete before using it.\nTo verify the deployment status, use the get trained model statistics API.\nLook for `\"state\": \"fully_allocated\"` in the response and ensure that the `\"allocation_count\"` matches the `\"target_allocation_count\"`.\nAvoid creating multiple endpoints for the same model unless required, as each endpoint consumes significant resources.", + "docId": "inference-api-put-googleaistudio", + "docUrl": "https://www.elastic.co/guide/en/elasticsearch/reference/current/infer-service-google-ai-studio.html", + "name": "inference.put_googleaistudio", + "privileges": { + "cluster": [ + "manage_inference" + ] + }, + "request": { + "name": "Request", + "namespace": "inference.put_googleaistudio" + }, + "requestBodyRequired": false, + "requestMediaType": [ + "application/json" + ], + "response": { + "name": "Response", + "namespace": "inference.put_googleaistudio" + }, + "responseMediaType": [ + "application/json" + ], + "urls": [ + { + "methods": [ + "PUT" + ], + "path": "/_inference/{task_type}/{googleaistudio_inference_id}" + } + ] + }, { "availability": { "serverless": { @@ -150751,6 +150796,196 @@ }, "specLocation": "inference/put_eis/PutEisRequest.ts#L68-L70" }, + { + "kind": "interface", + "name": { + "name": "GoogleAiStudioServiceSettings", + "namespace": "inference.put_googleaistudio" + }, + "properties": [ + { + "description": "A valid API key of your Google Gemini account.", + "name": "api_key", + "required": true, + "type": { + "kind": "instance_of", + "type": { + "name": "string", + "namespace": "_builtins" + } + } + }, + { + "description": "The name of the model to use for the inference task.\nRefer to the Google documentation for the list of supported models.", + "extDocId": "googleaistudio-models", + "extDocUrl": "https://ai.google.dev/gemini-api/docs/models", + "name": "model_id", + "required": true, + "type": { + "kind": "instance_of", + "type": { + "name": "string", + "namespace": "_builtins" + } + } + }, + { + "description": "This setting helps to minimize the number of rate limit errors returned from Google AI Studio.\nBy default, the `googleaistudio` service sets the number of requests allowed per minute to 360.", + "name": "rate_limit", + "required": false, + "type": { + "kind": "instance_of", + "type": { + "name": "RateLimitSetting", + "namespace": "inference._types" + } + } + } + ], + "specLocation": "inference/put_googleaistudio/PutGoogleAiStudioRequest.ts#L86-L102" + }, + { + "kind": "enum", + "members": [ + { + "name": "completion" + }, + { + "name": "text_embedding" + } + ], + "name": { + "name": "GoogleAiStudioTaskType", + "namespace": "inference.put_googleaistudio" + }, + "specLocation": "inference/put_googleaistudio/PutGoogleAiStudioRequest.ts#L77-L80" + }, + { + "kind": "request", + "attachedBehaviors": [ + "CommonQueryParameters" + ], + "body": { + "kind": "properties", + "properties": [ + { + "description": "The chunking configuration object.", + "extDocId": "inference-chunking", + "extDocUrl": "https://www.elastic.co/guide/en/elasticsearch/reference/current/inference-apis.html#infer-chunking-config", + "name": "chunking_settings", + "required": false, + "type": { + "kind": "instance_of", + "type": { + "name": "InferenceChunkingSettings", + "namespace": "inference._types" + } + } + }, + { + "description": "The type of service supported for the specified task type. In this case, `googleaistudio`.", + "name": "service", + "required": true, + "type": { + "kind": "instance_of", + "type": { + "name": "ServiceType", + "namespace": "inference.put_googleaistudio" + } + } + }, + { + "description": "Settings used to install the inference model. These settings are specific to the `googleaistudio` service.", + "name": "service_settings", + "required": true, + "type": { + "kind": "instance_of", + "type": { + "name": "GoogleAiStudioServiceSettings", + "namespace": "inference.put_googleaistudio" + } + } + } + ] + }, + "description": "Create an Google AI Studio inference endpoint.\n\nCreate an inference endpoint to perform an inference task with the `googleaistudio` service.\n\nWhen you create an inference endpoint, the associated machine learning model is automatically deployed if it is not already running.\nAfter creating the endpoint, wait for the model deployment to complete before using it.\nTo verify the deployment status, use the get trained model statistics API.\nLook for `\"state\": \"fully_allocated\"` in the response and ensure that the `\"allocation_count\"` matches the `\"target_allocation_count\"`.\nAvoid creating multiple endpoints for the same model unless required, as each endpoint consumes significant resources.", + "examples": { + "PutGoogleAiStudioRequestExample1": { + "description": "Run `PUT _inference/completion/google_ai_studio_completion` to create an inference endpoint to perform a `completion` task type.", + "summary": "A completion task", + "value": "{\n \"service\": \"googleaistudio\",\n \"service_settings\": {\n \"api_key\": \"api-key\",\n \"model_id\": \"model-id\"\n }\n}" + } + }, + "inherits": { + "type": { + "name": "RequestBase", + "namespace": "_types" + } + }, + "name": { + "name": "Request", + "namespace": "inference.put_googleaistudio" + }, + "path": [ + { + "description": "The type of the inference task that the model will perform.", + "name": "task_type", + "required": true, + "type": { + "kind": "instance_of", + "type": { + "name": "GoogleAiStudioTaskType", + "namespace": "inference.put_googleaistudio" + } + } + }, + { + "description": "The unique identifier of the inference endpoint.", + "name": "googleaistudio_inference_id", + "required": true, + "type": { + "kind": "instance_of", + "type": { + "name": "Id", + "namespace": "_types" + } + } + } + ], + "query": [], + "specLocation": "inference/put_googleaistudio/PutGoogleAiStudioRequest.ts#L27-L75" + }, + { + "kind": "response", + "body": { + "kind": "value", + "value": { + "kind": "instance_of", + "type": { + "name": "InferenceEndpointInfo", + "namespace": "inference._types" + } + } + }, + "name": { + "name": "Response", + "namespace": "inference.put_googleaistudio" + }, + "specLocation": "inference/put_googleaistudio/PutGoogleAiStudioResponse.ts#L22-L24" + }, + { + "kind": "enum", + "members": [ + { + "name": "googleaistudio" + } + ], + "name": { + "name": "ServiceType", + "namespace": "inference.put_googleaistudio" + }, + "specLocation": "inference/put_googleaistudio/PutGoogleAiStudioRequest.ts#L82-L84" + }, { "kind": "interface", "name": { diff --git a/output/typescript/types.ts b/output/typescript/types.ts index bbcb023205..846982c1cd 100644 --- a/output/typescript/types.ts +++ b/output/typescript/types.ts @@ -13272,6 +13272,28 @@ export type InferencePutEisResponse = InferenceInferenceEndpointInfo export type InferencePutEisServiceType = 'elastic' +export interface InferencePutGoogleaistudioGoogleAiStudioServiceSettings { + api_key: string + model_id: string + rate_limit?: InferenceRateLimitSetting +} + +export type InferencePutGoogleaistudioGoogleAiStudioTaskType = 'completion' | 'text_embedding' + +export interface InferencePutGoogleaistudioRequest extends RequestBase { + task_type: InferencePutGoogleaistudioGoogleAiStudioTaskType + googleaistudio_inference_id: Id + body?: { + chunking_settings?: InferenceInferenceChunkingSettings + service: InferencePutGoogleaistudioServiceType + service_settings: InferencePutGoogleaistudioGoogleAiStudioServiceSettings + } +} + +export type InferencePutGoogleaistudioResponse = InferenceInferenceEndpointInfo + +export type InferencePutGoogleaistudioServiceType = 'googleaistudio' + export interface InferencePutGooglevertexaiGoogleVertexAIServiceSettings { location: string model_id: string diff --git a/specification/_doc_ids/table.csv b/specification/_doc_ids/table.csv index b8bb5c5a17..b11d2f41d2 100644 --- a/specification/_doc_ids/table.csv +++ b/specification/_doc_ids/table.csv @@ -239,6 +239,7 @@ get-transform,https://www.elastic.co/docs/api/doc/elasticsearch/operation/operat get-trial-status,https://www.elastic.co/docs/api/doc/elasticsearch/operation/operation-license-get-trial-status googlevertexai-locations,https://cloud.google.com/vertex-ai/generative-ai/docs/learn/locations googlevertexai-models,https://cloud.google.com/vertex-ai/generative-ai/docs/model-reference/text-embeddings-api +googleaistudio-models,https://ai.google.dev/gemini-api/docs/models graph,https://www.elastic.co/guide/en/kibana/current/xpack-graph.html graph-explore-api,https://www.elastic.co/docs/api/doc/elasticsearch/group/endpoint-graph grok,https://www.elastic.co/guide/en/elasticsearch/reference/current/grok.html @@ -325,6 +326,7 @@ inference-api-put-eis,https://www.elastic.co/guide/en/elasticsearch/reference/cu inference-api-put-huggingface,https://www.elastic.co/guide/en/elasticsearch/reference/current/infer-service-hugging-face.html inference-api-put-jinaai,https://www.elastic.co/guide/en/elasticsearch/reference/current/infer-service-jinaai.html inference-api-put-googlevertexai,https://www.elastic.co/guide/en/elasticsearch/reference/current/infer-service-google-vertex-ai.html +inference-api-put-googleaistudio,https://www.elastic.co/guide/en/elasticsearch/reference/current/infer-service-google-ai-studio.html inference-api-put-openai,https://www.elastic.co/guide/en/elasticsearch/reference/current/infer-service-openai.html inference-api-put-voyageai,https://www.elastic.co/docs/api/doc/elasticsearch/operation/operation-inference-put-voyageai inference-api-put-watsonx,https://www.elastic.co/docs/api/doc/elasticsearch/operation/operation-inference-put-watsonx diff --git a/specification/inference/put_googleaistudio/PutGoogleAiStudioRequest.ts b/specification/inference/put_googleaistudio/PutGoogleAiStudioRequest.ts new file mode 100644 index 0000000000..aeac77c0b7 --- /dev/null +++ b/specification/inference/put_googleaistudio/PutGoogleAiStudioRequest.ts @@ -0,0 +1,102 @@ +/* + * Licensed to Elasticsearch B.V. under one or more contributor + * license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright + * ownership. Elasticsearch B.V. licenses this file to you under + * the Apache License, Version 2.0 (the "License"); you may + * not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +import { + InferenceChunkingSettings, + RateLimitSetting +} from '@inference/_types/Services' +import { RequestBase } from '@_types/Base' +import { Id } from '@_types/common' + +/** + * Create an Google AI Studio inference endpoint. + * + * Create an inference endpoint to perform an inference task with the `googleaistudio` service. + * + * When you create an inference endpoint, the associated machine learning model is automatically deployed if it is not already running. + * After creating the endpoint, wait for the model deployment to complete before using it. + * To verify the deployment status, use the get trained model statistics API. + * Look for `"state": "fully_allocated"` in the response and ensure that the `"allocation_count"` matches the `"target_allocation_count"`. + * Avoid creating multiple endpoints for the same model unless required, as each endpoint consumes significant resources. + * @rest_spec_name inference.put_googleaistudio + * @availability stack since=8.15.0 stability=stable visibility=public + * @availability serverless stability=stable visibility=public + * @cluster_privileges manage_inference + * @doc_id inference-api-put-googleaistudio + */ +export interface Request extends RequestBase { + urls: [ + { + path: '/_inference/{task_type}/{googleaistudio_inference_id}' + methods: ['PUT'] + } + ] + path_parts: { + /** + * The type of the inference task that the model will perform. + */ + task_type: GoogleAiStudioTaskType + /** + * The unique identifier of the inference endpoint. + */ + googleaistudio_inference_id: Id + } + body: { + /** + * The chunking configuration object. + * @ext_doc_id inference-chunking + */ + chunking_settings?: InferenceChunkingSettings + /** + * The type of service supported for the specified task type. In this case, `googleaistudio`. + */ + service: ServiceType + /** + * Settings used to install the inference model. These settings are specific to the `googleaistudio` service. + */ + service_settings: GoogleAiStudioServiceSettings + } +} + +export enum GoogleAiStudioTaskType { + completion, + text_embedding +} + +export enum ServiceType { + googleaistudio +} + +export class GoogleAiStudioServiceSettings { + /** + * A valid API key of your Google Gemini account. + */ + api_key: string + /** + * The name of the model to use for the inference task. + * Refer to the Google documentation for the list of supported models. + * @ext_doc_id googleaistudio-models + */ + model_id: string + /** + * This setting helps to minimize the number of rate limit errors returned from Google AI Studio. + * By default, the `googleaistudio` service sets the number of requests allowed per minute to 360. + */ + rate_limit?: RateLimitSetting +} diff --git a/specification/inference/put_googleaistudio/PutGoogleAiStudioResponse.ts b/specification/inference/put_googleaistudio/PutGoogleAiStudioResponse.ts new file mode 100644 index 0000000000..d40639b031 --- /dev/null +++ b/specification/inference/put_googleaistudio/PutGoogleAiStudioResponse.ts @@ -0,0 +1,24 @@ +/* + * Licensed to Elasticsearch B.V. under one or more contributor + * license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright + * ownership. Elasticsearch B.V. licenses this file to you under + * the Apache License, Version 2.0 (the "License"); you may + * not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +import { InferenceEndpointInfo } from '@inference/_types/Services' + +export class Response { + body: InferenceEndpointInfo +} diff --git a/specification/inference/put_googleaistudio/examples/request/PutGoogleAiStudioRequestExample1.yaml b/specification/inference/put_googleaistudio/examples/request/PutGoogleAiStudioRequestExample1.yaml new file mode 100644 index 0000000000..e8304e0e82 --- /dev/null +++ b/specification/inference/put_googleaistudio/examples/request/PutGoogleAiStudioRequestExample1.yaml @@ -0,0 +1,12 @@ +summary: A completion task +description: Run `PUT _inference/completion/google_ai_studio_completion` to create an inference endpoint to perform a `completion` task type. +# method_request: "PUT _inference/completion/google_ai_studio_completion" +# type: "request" +value: |- + { + "service": "googleaistudio", + "service_settings": { + "api_key": "api-key", + "model_id": "model-id" + } + }