From 90de26ac3fb7972bec4d03395a46bbb4fad6341a Mon Sep 17 00:00:00 2001
From: Jan Kazlouski <jan.kazlouski@elastic.co>
Date: Thu, 23 Oct 2025 14:12:25 +0000
Subject: [PATCH 1/4] Add support for multiple Google Model Garden providers
 for completion and chat_completion tasks

---
 output/openapi/elasticsearch-openapi.json     | 16 +++-
 .../elasticsearch-serverless-openapi.json     | 16 +++-
 output/schema/schema.json                     | 90 ++++++++++++-------
 output/typescript/types.ts                    |  2 +-
 specification/inference/_types/CommonTypes.ts |  6 +-
 .../PutGoogleVertexAiRequestExample5.yaml     | 14 +++
 .../PutGoogleVertexAiRequestExample6.yaml     | 14 +++
 7 files changed, 121 insertions(+), 37 deletions(-)
 create mode 100644 specification/inference/put_googlevertexai/examples/request/PutGoogleVertexAiRequestExample5.yaml
 create mode 100644 specification/inference/put_googlevertexai/examples/request/PutGoogleVertexAiRequestExample6.yaml

diff --git a/output/openapi/elasticsearch-openapi.json b/output/openapi/elasticsearch-openapi.json
index 2d2b1bf4d2..aa5ce8f31e 100644
--- a/output/openapi/elasticsearch-openapi.json
+++ b/output/openapi/elasticsearch-openapi.json
@@ -22760,6 +22760,16 @@
                   "summary": "A chat_completion task for Google Model Garden Anthropic endpoint",
                   "description": "Run `PUT _inference/chat_completion/google_model_garden_anthropic_chat_completion` to create an inference endpoint to perform a `chat_completion` task using Anthropic's model hosted on Google Model Garden.",
                   "value": "{\n    \"service\": \"googlevertexai\",\n    \"service_settings\": {\n        \"provider\": \"anthropic\",\n        \"service_account_json\": \"service-account-json\",\n        \"url\": \"https://url:rawPredict\",\n        \"streaming_url\": \"https://streaming_url:streamRawPredict\"\n    },\n    \"task_settings\": {\n        \"max_tokens\": 128\n    }\n}"
+                },
+                "PutGoogleVertexAiRequestExample5": {
+                  "summary": "A completion task for Google Model Garden Meta endpoint with single URL provided",
+                  "description": "Run `PUT _inference/completion/google_model_garden_meta_completion` to create an inference endpoint to perform a `completion` task using Meta's model hosted on Google Model Garden with single URL provided.",
+                  "value": "{\n    \"service\": \"googlevertexai\",\n    \"service_settings\": {\n        \"provider\": \"meta\",\n        \"model_id\": \"meta/llama-3.3-70b-instruct-maas\",\n        \"service_account_json\": \"service-account-json\",\n        \"url\": \"https://url/openapi/chat/completions\"\n    }\n}"
+                },
+                "PutGoogleVertexAiRequestExample6": {
+                  "summary": "A chat_completion task for Google Model Garden Meta endpoint with single streaming URL provided",
+                  "description": "Run `PUT _inference/chat_completion/google_model_garden_meta_chat_completion` to create an inference endpoint to perform a `chat_completion` task using Meta's model hosted on Google Model Garden with single streaming URL provided.",
+                  "value": "{\n    \"service\": \"googlevertexai\",\n    \"service_settings\": {\n        \"provider\": \"meta\",\n        \"model_id\": \"meta/llama-3.3-70b-instruct-maas\",\n        \"service_account_json\": \"service-account-json\",\n        \"streaming_url\": \"https://url/openapi/chat/completions\"\n    }\n}"
                 }
               }
             }
@@ -103015,7 +103025,11 @@
         "type": "string",
         "enum": [
           "google",
-          "anthropic"
+          "anthropic",
+          "meta",
+          "hugging_face",
+          "mistral",
+          "ai21"
         ]
       },
       "inference._types.GoogleVertexAITaskSettings": {
diff --git a/output/openapi/elasticsearch-serverless-openapi.json b/output/openapi/elasticsearch-serverless-openapi.json
index 7ce70fcc14..3821fbb172 100644
--- a/output/openapi/elasticsearch-serverless-openapi.json
+++ b/output/openapi/elasticsearch-serverless-openapi.json
@@ -13742,6 +13742,16 @@
                   "summary": "A chat_completion task for Google Model Garden Anthropic endpoint",
                   "description": "Run `PUT _inference/chat_completion/google_model_garden_anthropic_chat_completion` to create an inference endpoint to perform a `chat_completion` task using Anthropic's model hosted on Google Model Garden.",
                   "value": "{\n    \"service\": \"googlevertexai\",\n    \"service_settings\": {\n        \"provider\": \"anthropic\",\n        \"service_account_json\": \"service-account-json\",\n        \"url\": \"https://url:rawPredict\",\n        \"streaming_url\": \"https://streaming_url:streamRawPredict\"\n    },\n    \"task_settings\": {\n        \"max_tokens\": 128\n    }\n}"
+                },
+                "PutGoogleVertexAiRequestExample5": {
+                  "summary": "A completion task for Google Model Garden Meta endpoint with single URL provided",
+                  "description": "Run `PUT _inference/completion/google_model_garden_meta_completion` to create an inference endpoint to perform a `completion` task using Meta's model hosted on Google Model Garden with single URL provided.",
+                  "value": "{\n    \"service\": \"googlevertexai\",\n    \"service_settings\": {\n        \"provider\": \"meta\",\n        \"model_id\": \"meta/llama-3.3-70b-instruct-maas\",\n        \"service_account_json\": \"service-account-json\",\n        \"url\": \"https://url/openapi/chat/completions\"\n    }\n}"
+                },
+                "PutGoogleVertexAiRequestExample6": {
+                  "summary": "A chat_completion task for Google Model Garden Meta endpoint with single streaming URL provided",
+                  "description": "Run `PUT _inference/chat_completion/google_model_garden_meta_chat_completion` to create an inference endpoint to perform a `chat_completion` task using Meta's model hosted on Google Model Garden with single streaming URL provided.",
+                  "value": "{\n    \"service\": \"googlevertexai\",\n    \"service_settings\": {\n        \"provider\": \"meta\",\n        \"model_id\": \"meta/llama-3.3-70b-instruct-maas\",\n        \"service_account_json\": \"service-account-json\",\n        \"streaming_url\": \"https://url/openapi/chat/completions\"\n    }\n}"
                 }
               }
             }
@@ -67095,7 +67105,11 @@
         "type": "string",
         "enum": [
           "google",
-          "anthropic"
+          "anthropic",
+          "meta",
+          "hugging_face",
+          "mistral",
+          "ai21"
         ]
       },
       "inference._types.GoogleVertexAITaskSettings": {
diff --git a/output/schema/schema.json b/output/schema/schema.json
index 3081b4bcce..fac8f24cbd 100644
--- a/output/schema/schema.json
+++ b/output/schema/schema.json
@@ -173219,13 +173219,25 @@
         },
         {
           "name": "anthropic"
+        },
+        {
+          "name": "meta"
+        },
+        {
+          "name": "hugging_face"
+        },
+        {
+          "name": "mistral"
+        },
+        {
+          "name": "ai21"
         }
       ],
       "name": {
         "name": "GoogleModelGardenProvider",
         "namespace": "inference._types"
       },
-      "specLocation": "inference/_types/CommonTypes.ts#L1498-L1501"
+      "specLocation": "inference/_types/CommonTypes.ts#L1498-L1505"
     },
     {
       "kind": "interface",
@@ -173362,7 +173374,7 @@
         "name": "GoogleVertexAIServiceType",
         "namespace": "inference._types"
       },
-      "specLocation": "inference/_types/CommonTypes.ts#L1542-L1544"
+      "specLocation": "inference/_types/CommonTypes.ts#L1546-L1548"
     },
     {
       "kind": "interface",
@@ -173424,7 +173436,7 @@
           }
         }
       ],
-      "specLocation": "inference/_types/CommonTypes.ts#L1503-L1526"
+      "specLocation": "inference/_types/CommonTypes.ts#L1507-L1530"
     },
     {
       "kind": "enum",
@@ -173446,7 +173458,7 @@
         "name": "GoogleVertexAITaskType",
         "namespace": "inference._types"
       },
-      "specLocation": "inference/_types/CommonTypes.ts#L1535-L1540"
+      "specLocation": "inference/_types/CommonTypes.ts#L1539-L1544"
     },
     {
       "kind": "interface",
@@ -173508,7 +173520,7 @@
           }
         }
       ],
-      "specLocation": "inference/_types/CommonTypes.ts#L1546-L1578"
+      "specLocation": "inference/_types/CommonTypes.ts#L1550-L1582"
     },
     {
       "kind": "enum",
@@ -173521,7 +173533,7 @@
         "name": "HuggingFaceServiceType",
         "namespace": "inference._types"
       },
-      "specLocation": "inference/_types/CommonTypes.ts#L1599-L1601"
+      "specLocation": "inference/_types/CommonTypes.ts#L1603-L1605"
     },
     {
       "kind": "interface",
@@ -173555,7 +173567,7 @@
           }
         }
       ],
-      "specLocation": "inference/_types/CommonTypes.ts#L1580-L1590"
+      "specLocation": "inference/_types/CommonTypes.ts#L1584-L1594"
     },
     {
       "kind": "enum",
@@ -173577,7 +173589,7 @@
         "name": "HuggingFaceTaskType",
         "namespace": "inference._types"
       },
-      "specLocation": "inference/_types/CommonTypes.ts#L1592-L1597"
+      "specLocation": "inference/_types/CommonTypes.ts#L1596-L1601"
     },
     {
       "kind": "interface",
@@ -174809,7 +174821,7 @@
           }
         }
       ],
-      "specLocation": "inference/_types/CommonTypes.ts#L1603-L1632"
+      "specLocation": "inference/_types/CommonTypes.ts#L1607-L1636"
     },
     {
       "kind": "enum",
@@ -174822,7 +174834,7 @@
         "name": "JinaAIServiceType",
         "namespace": "inference._types"
       },
-      "specLocation": "inference/_types/CommonTypes.ts#L1662-L1664"
+      "specLocation": "inference/_types/CommonTypes.ts#L1666-L1668"
     },
     {
       "kind": "enum",
@@ -174841,7 +174853,7 @@
         "name": "JinaAISimilarityType",
         "namespace": "inference._types"
       },
-      "specLocation": "inference/_types/CommonTypes.ts#L1666-L1670"
+      "specLocation": "inference/_types/CommonTypes.ts#L1670-L1674"
     },
     {
       "kind": "interface",
@@ -174887,7 +174899,7 @@
           }
         }
       ],
-      "specLocation": "inference/_types/CommonTypes.ts#L1634-L1655"
+      "specLocation": "inference/_types/CommonTypes.ts#L1638-L1659"
     },
     {
       "kind": "enum",
@@ -174903,7 +174915,7 @@
         "name": "JinaAITaskType",
         "namespace": "inference._types"
       },
-      "specLocation": "inference/_types/CommonTypes.ts#L1657-L1660"
+      "specLocation": "inference/_types/CommonTypes.ts#L1661-L1664"
     },
     {
       "kind": "enum",
@@ -174925,7 +174937,7 @@
         "name": "JinaAITextEmbeddingTask",
         "namespace": "inference._types"
       },
-      "specLocation": "inference/_types/CommonTypes.ts#L1672-L1677"
+      "specLocation": "inference/_types/CommonTypes.ts#L1676-L1681"
     },
     {
       "kind": "interface",
@@ -174997,7 +175009,7 @@
           }
         }
       ],
-      "specLocation": "inference/_types/CommonTypes.ts#L1679-L1709"
+      "specLocation": "inference/_types/CommonTypes.ts#L1683-L1713"
     },
     {
       "kind": "enum",
@@ -175010,7 +175022,7 @@
         "name": "LlamaServiceType",
         "namespace": "inference._types"
       },
-      "specLocation": "inference/_types/CommonTypes.ts#L1717-L1719"
+      "specLocation": "inference/_types/CommonTypes.ts#L1721-L1723"
     },
     {
       "kind": "enum",
@@ -175029,7 +175041,7 @@
         "name": "LlamaSimilarityType",
         "namespace": "inference._types"
       },
-      "specLocation": "inference/_types/CommonTypes.ts#L1721-L1725"
+      "specLocation": "inference/_types/CommonTypes.ts#L1725-L1729"
     },
     {
       "kind": "enum",
@@ -175048,7 +175060,7 @@
         "name": "LlamaTaskType",
         "namespace": "inference._types"
       },
-      "specLocation": "inference/_types/CommonTypes.ts#L1711-L1715"
+      "specLocation": "inference/_types/CommonTypes.ts#L1715-L1719"
     },
     {
       "kind": "interface",
@@ -175206,7 +175218,7 @@
           }
         }
       ],
-      "specLocation": "inference/_types/CommonTypes.ts#L1727-L1754"
+      "specLocation": "inference/_types/CommonTypes.ts#L1731-L1758"
     },
     {
       "kind": "enum",
@@ -175219,7 +175231,7 @@
         "name": "MistralServiceType",
         "namespace": "inference._types"
       },
-      "specLocation": "inference/_types/CommonTypes.ts#L1762-L1764"
+      "specLocation": "inference/_types/CommonTypes.ts#L1766-L1768"
     },
     {
       "kind": "enum",
@@ -175238,7 +175250,7 @@
         "name": "MistralTaskType",
         "namespace": "inference._types"
       },
-      "specLocation": "inference/_types/CommonTypes.ts#L1756-L1760"
+      "specLocation": "inference/_types/CommonTypes.ts#L1760-L1764"
     },
     {
       "kind": "interface",
@@ -175325,7 +175337,7 @@
           }
         }
       ],
-      "specLocation": "inference/_types/CommonTypes.ts#L1766-L1808"
+      "specLocation": "inference/_types/CommonTypes.ts#L1770-L1812"
     },
     {
       "kind": "enum",
@@ -175338,7 +175350,7 @@
         "name": "OpenAIServiceType",
         "namespace": "inference._types"
       },
-      "specLocation": "inference/_types/CommonTypes.ts#L1835-L1837"
+      "specLocation": "inference/_types/CommonTypes.ts#L1839-L1841"
     },
     {
       "kind": "interface",
@@ -175368,7 +175380,7 @@
           }
         }
       ],
-      "specLocation": "inference/_types/CommonTypes.ts#L1810-L1827"
+      "specLocation": "inference/_types/CommonTypes.ts#L1814-L1831"
     },
     {
       "kind": "enum",
@@ -175387,7 +175399,7 @@
         "name": "OpenAITaskType",
         "namespace": "inference._types"
       },
-      "specLocation": "inference/_types/CommonTypes.ts#L1829-L1833"
+      "specLocation": "inference/_types/CommonTypes.ts#L1833-L1837"
     },
     {
       "kind": "interface",
@@ -176233,7 +176245,7 @@
           }
         }
       ],
-      "specLocation": "inference/_types/CommonTypes.ts#L1528-L1533"
+      "specLocation": "inference/_types/CommonTypes.ts#L1532-L1537"
     },
     {
       "kind": "interface",
@@ -176379,7 +176391,7 @@
           }
         }
       ],
-      "specLocation": "inference/_types/CommonTypes.ts#L1839-L1870"
+      "specLocation": "inference/_types/CommonTypes.ts#L1843-L1874"
     },
     {
       "kind": "enum",
@@ -176392,7 +176404,7 @@
         "name": "VoyageAIServiceType",
         "namespace": "inference._types"
       },
-      "specLocation": "inference/_types/CommonTypes.ts#L1903-L1905"
+      "specLocation": "inference/_types/CommonTypes.ts#L1907-L1909"
     },
     {
       "kind": "interface",
@@ -176452,7 +176464,7 @@
           }
         }
       ],
-      "specLocation": "inference/_types/CommonTypes.ts#L1872-L1896"
+      "specLocation": "inference/_types/CommonTypes.ts#L1876-L1900"
     },
     {
       "kind": "enum",
@@ -176468,7 +176480,7 @@
         "name": "VoyageAITaskType",
         "namespace": "inference._types"
       },
-      "specLocation": "inference/_types/CommonTypes.ts#L1898-L1901"
+      "specLocation": "inference/_types/CommonTypes.ts#L1902-L1905"
     },
     {
       "kind": "interface",
@@ -176556,7 +176568,7 @@
           }
         }
       ],
-      "specLocation": "inference/_types/CommonTypes.ts#L1907-L1945"
+      "specLocation": "inference/_types/CommonTypes.ts#L1911-L1949"
     },
     {
       "kind": "enum",
@@ -176569,7 +176581,7 @@
         "name": "WatsonxServiceType",
         "namespace": "inference._types"
       },
-      "specLocation": "inference/_types/CommonTypes.ts#L1953-L1955"
+      "specLocation": "inference/_types/CommonTypes.ts#L1957-L1959"
     },
     {
       "kind": "enum",
@@ -176588,7 +176600,7 @@
         "name": "WatsonxTaskType",
         "namespace": "inference._types"
       },
-      "specLocation": "inference/_types/CommonTypes.ts#L1947-L1951"
+      "specLocation": "inference/_types/CommonTypes.ts#L1951-L1955"
     },
     {
       "kind": "request",
@@ -180361,6 +180373,18 @@
           "method_request": "PUT _inference/chat_completion/google_model_garden_anthropic_chat_completion",
           "summary": "A chat_completion task for Google Model Garden Anthropic endpoint",
           "value": "{\n    \"service\": \"googlevertexai\",\n    \"service_settings\": {\n        \"provider\": \"anthropic\",\n        \"service_account_json\": \"service-account-json\",\n        \"url\": \"https://url:rawPredict\",\n        \"streaming_url\": \"https://streaming_url:streamRawPredict\"\n    },\n    \"task_settings\": {\n        \"max_tokens\": 128\n    }\n}"
+        },
+        "PutGoogleVertexAiRequestExample5": {
+          "description": "Run `PUT _inference/completion/google_model_garden_meta_completion` to create an inference endpoint to perform a `completion` task using Meta's model hosted on Google Model Garden with single URL provided.",
+          "method_request": "PUT _inference/completion/google_model_garden_meta_completion",
+          "summary": "A completion task for Google Model Garden Meta endpoint with single URL provided",
+          "value": "{\n    \"service\": \"googlevertexai\",\n    \"service_settings\": {\n        \"provider\": \"meta\",\n        \"model_id\": \"meta/llama-3.3-70b-instruct-maas\",\n        \"service_account_json\": \"service-account-json\",\n        \"url\": \"https://url/openapi/chat/completions\"\n    }\n}"
+        },
+        "PutGoogleVertexAiRequestExample6": {
+          "description": "Run `PUT _inference/chat_completion/google_model_garden_meta_chat_completion` to create an inference endpoint to perform a `chat_completion` task using Meta's model hosted on Google Model Garden with single streaming URL provided.",
+          "method_request": "PUT _inference/chat_completion/google_model_garden_meta_chat_completion",
+          "summary": "A chat_completion task for Google Model Garden Meta endpoint with single streaming URL provided",
+          "value": "{\n    \"service\": \"googlevertexai\",\n    \"service_settings\": {\n        \"provider\": \"meta\",\n        \"model_id\": \"meta/llama-3.3-70b-instruct-maas\",\n        \"service_account_json\": \"service-account-json\",\n        \"streaming_url\": \"https://url/openapi/chat/completions\"\n    }\n}"
         }
       },
       "inherits": {
diff --git a/output/typescript/types.ts b/output/typescript/types.ts
index f04b4076bc..2a93c1c4e7 100644
--- a/output/typescript/types.ts
+++ b/output/typescript/types.ts
@@ -14248,7 +14248,7 @@ export interface InferenceGoogleAiStudioServiceSettings {
 
 export type InferenceGoogleAiStudioTaskType = 'completion' | 'text_embedding'
 
-export type InferenceGoogleModelGardenProvider = 'google' | 'anthropic'
+export type InferenceGoogleModelGardenProvider = 'google' | 'anthropic' | 'meta' | 'hugging_face' | 'mistral' | 'ai21'
 
 export interface InferenceGoogleVertexAIServiceSettings {
   provider?: InferenceGoogleModelGardenProvider
diff --git a/specification/inference/_types/CommonTypes.ts b/specification/inference/_types/CommonTypes.ts
index 10f45fd4d7..91cb66d192 100644
--- a/specification/inference/_types/CommonTypes.ts
+++ b/specification/inference/_types/CommonTypes.ts
@@ -1497,7 +1497,11 @@ export class GoogleVertexAIServiceSettings {
 
 export enum GoogleModelGardenProvider {
   google,
-  anthropic
+  anthropic,
+  meta,
+  hugging_face,
+  mistral,
+  ai21
 }
 
 export class GoogleVertexAITaskSettings {
diff --git a/specification/inference/put_googlevertexai/examples/request/PutGoogleVertexAiRequestExample5.yaml b/specification/inference/put_googlevertexai/examples/request/PutGoogleVertexAiRequestExample5.yaml
new file mode 100644
index 0000000000..750fc0eaf7
--- /dev/null
+++ b/specification/inference/put_googlevertexai/examples/request/PutGoogleVertexAiRequestExample5.yaml
@@ -0,0 +1,14 @@
+summary: A completion task for Google Model Garden Meta endpoint with single URL provided
+description: Run `PUT _inference/completion/google_model_garden_meta_completion` to create an inference endpoint to perform a `completion` task using Meta's model hosted on Google Model Garden with single URL provided.
+method_request: 'PUT _inference/completion/google_model_garden_meta_completion'
+# type: "request"
+value: |-
+  {
+      "service": "googlevertexai",
+      "service_settings": {
+          "provider": "meta",
+          "model_id": "meta/llama-3.3-70b-instruct-maas",
+          "service_account_json": "service-account-json",
+          "url": "https://url/openapi/chat/completions"
+      }
+  }
diff --git a/specification/inference/put_googlevertexai/examples/request/PutGoogleVertexAiRequestExample6.yaml b/specification/inference/put_googlevertexai/examples/request/PutGoogleVertexAiRequestExample6.yaml
new file mode 100644
index 0000000000..8d38a23ca5
--- /dev/null
+++ b/specification/inference/put_googlevertexai/examples/request/PutGoogleVertexAiRequestExample6.yaml
@@ -0,0 +1,14 @@
+summary: A chat_completion task for Google Model Garden Meta endpoint with single streaming URL provided
+description: Run `PUT _inference/chat_completion/google_model_garden_meta_chat_completion` to create an inference endpoint to perform a `chat_completion` task using Meta's model hosted on Google Model Garden with single streaming URL provided.
+method_request: 'PUT _inference/chat_completion/google_model_garden_meta_chat_completion'
+# type: "request"
+value: |-
+  {
+      "service": "googlevertexai",
+      "service_settings": {
+          "provider": "meta",
+          "model_id": "meta/llama-3.3-70b-instruct-maas",
+          "service_account_json": "service-account-json",
+          "streaming_url": "https://url/openapi/chat/completions"
+      }
+  }

From 8c9c4ef49b54bfc4226ae38309b5f609eee92b69 Mon Sep 17 00:00:00 2001
From: Jan Kazlouski <jan.kazlouski@elastic.co>
Date: Fri, 24 Oct 2025 11:52:11 +0000
Subject: [PATCH 2/4] Add chat_completion and completion task examples for
 various Google Model Garden providers with single and separate streaming URLs

---
 output/openapi/elasticsearch-openapi.json     | 44 +++++++++++++---
 .../elasticsearch-serverless-openapi.json     | 44 +++++++++++++---
 output/schema/schema.json                     | 50 ++++++++++++++++---
 .../PutGoogleVertexAiRequestExample10.yaml    | 14 ++++++
 .../PutGoogleVertexAiRequestExample11.yaml    | 14 ++++++
 .../PutGoogleVertexAiRequestExample12.yaml    | 13 +++++
 .../PutGoogleVertexAiRequestExample3.yaml     |  4 +-
 .../PutGoogleVertexAiRequestExample4.yaml     |  5 +-
 .../PutGoogleVertexAiRequestExample5.yaml     |  4 +-
 .../PutGoogleVertexAiRequestExample7.yaml     | 13 +++++
 .../PutGoogleVertexAiRequestExample8.yaml     | 13 +++++
 .../PutGoogleVertexAiRequestExample9.yaml     | 15 ++++++
 12 files changed, 205 insertions(+), 28 deletions(-)
 create mode 100644 specification/inference/put_googlevertexai/examples/request/PutGoogleVertexAiRequestExample10.yaml
 create mode 100644 specification/inference/put_googlevertexai/examples/request/PutGoogleVertexAiRequestExample11.yaml
 create mode 100644 specification/inference/put_googlevertexai/examples/request/PutGoogleVertexAiRequestExample12.yaml
 create mode 100644 specification/inference/put_googlevertexai/examples/request/PutGoogleVertexAiRequestExample7.yaml
 create mode 100644 specification/inference/put_googlevertexai/examples/request/PutGoogleVertexAiRequestExample8.yaml
 create mode 100644 specification/inference/put_googlevertexai/examples/request/PutGoogleVertexAiRequestExample9.yaml

diff --git a/output/openapi/elasticsearch-openapi.json b/output/openapi/elasticsearch-openapi.json
index 174d8158e4..b038c6b679 100644
--- a/output/openapi/elasticsearch-openapi.json
+++ b/output/openapi/elasticsearch-openapi.json
@@ -22746,30 +22746,60 @@
                   "description": "Run `PUT _inference/text_embedding/google_vertex_ai_embeddings` to create an inference endpoint to perform a `text_embedding` task type.",
                   "value": "{\n    \"service\": \"googlevertexai\",\n    \"service_settings\": {\n        \"service_account_json\": \"service-account-json\",\n        \"model_id\": \"model-id\",\n        \"location\": \"location\",\n        \"project_id\": \"project-id\"\n    }\n}"
                 },
+                "PutGoogleVertexAiRequestExample10": {
+                  "summary": "A chat_completion task for Google Model Garden Mistral endpoint with single streaming URL provided",
+                  "description": "Run `PUT _inference/chat_completion/google_model_garden_mistral_chat_completion` to create an inference endpoint to perform a `chat_completion` task using Mistral's model hosted on Google Model Garden with single streaming URL provided.",
+                  "value": "{\n    \"service\": \"googlevertexai\",\n    \"service_settings\": {\n        \"provider\": \"mistral\",\n        \"model_id\": \"mistral-small-2503\",\n        \"service_account_json\": \"service-account-json\",\n        \"streaming_url\": \"https://url:streamRawPredict\"\n    }\n}"
+                },
+                "PutGoogleVertexAiRequestExample11": {
+                  "summary": "A completion task for Google Model Garden AI21 endpoint with separate URLs for streaming and non-streaming tasks",
+                  "description": "Run `PUT _inference/completion/google_model_garden_ai21_completion` to create an inference endpoint to perform a `completion` task using AI21's model hosted on Google Model Garden with separate URLs for streaming and non-streaming tasks.",
+                  "value": "{\n    \"service\": \"googlevertexai\",\n    \"service_settings\": {\n        \"provider\": \"ai21\",\n        \"service_account_json\": \"service-account-json\",\n        \"url\": \"https://url:rawPredict\",\n        \"streaming_url\": \"https://url:streamRawPredict\"\n    }\n}"
+                },
+                "PutGoogleVertexAiRequestExample12": {
+                  "summary": "A chat_completion task for Google Model Garden AI21 endpoint with single streaming URL provided",
+                  "description": "Run `PUT _inference/chat_completion/google_model_garden_ai21_chat_completion` to create an inference endpoint to perform a `chat_completion` task using AI21's model hosted on Google Model Garden with single streaming URL provided.",
+                  "value": "{\n    \"service\": \"googlevertexai\",\n    \"service_settings\": {\n        \"provider\": \"ai21\",\n        \"service_account_json\": \"service-account-json\",\n        \"streaming_url\": \"https://url:streamRawPredict\"\n    }\n}"
+                },
                 "PutGoogleVertexAiRequestExample2": {
                   "summary": "A rerank task",
                   "description": "Run `PUT _inference/rerank/google_vertex_ai_rerank` to create an inference endpoint to perform a `rerank` task type.",
                   "value": "{\n    \"service\": \"googlevertexai\",\n    \"service_settings\": {\n        \"service_account_json\": \"service-account-json\",\n        \"project_id\": \"project-id\"\n    }\n}"
                 },
                 "PutGoogleVertexAiRequestExample3": {
-                  "summary": "A completion task for Google Model Garden Anthropic endpoint",
-                  "description": "Run `PUT _inference/completion/google_model_garden_anthropic_completion` to create an inference endpoint to perform a `completion` task using Anthropic's model hosted on Google Model Garden.",
+                  "summary": "A completion task for Google Model Garden Anthropic endpoint with separate URLs for streaming and non-streaming tasks",
+                  "description": "Run `PUT _inference/completion/google_model_garden_anthropic_completion` to create an inference endpoint to perform a `completion` task using Anthropic's model hosted on Google Model Garden with separate URLs for streaming and non-streaming tasks.",
                   "value": "{\n    \"service\": \"googlevertexai\",\n    \"service_settings\": {\n        \"provider\": \"anthropic\",\n        \"service_account_json\": \"service-account-json\",\n        \"url\": \"https://url:rawPredict\",\n        \"streaming_url\": \"https://streaming_url:streamRawPredict\"\n    },\n    \"task_settings\": {\n        \"max_tokens\": 128\n    }\n}"
                 },
                 "PutGoogleVertexAiRequestExample4": {
-                  "summary": "A chat_completion task for Google Model Garden Anthropic endpoint",
-                  "description": "Run `PUT _inference/chat_completion/google_model_garden_anthropic_chat_completion` to create an inference endpoint to perform a `chat_completion` task using Anthropic's model hosted on Google Model Garden.",
-                  "value": "{\n    \"service\": \"googlevertexai\",\n    \"service_settings\": {\n        \"provider\": \"anthropic\",\n        \"service_account_json\": \"service-account-json\",\n        \"url\": \"https://url:rawPredict\",\n        \"streaming_url\": \"https://streaming_url:streamRawPredict\"\n    },\n    \"task_settings\": {\n        \"max_tokens\": 128\n    }\n}"
+                  "summary": "A chat_completion task for Google Model Garden Anthropic endpoint with single streaming URL provided",
+                  "description": "Run `PUT _inference/chat_completion/google_model_garden_anthropic_chat_completion` to create an inference endpoint to perform a `chat_completion` task using Anthropic's model hosted on Google Model Garden with single streaming URL provided.",
+                  "value": "{\n    \"service\": \"googlevertexai\",\n    \"service_settings\": {\n        \"provider\": \"anthropic\",\n        \"service_account_json\": \"service-account-json\",\n        \"streaming_url\": \"https://streaming_url:streamRawPredict\"\n    },\n    \"task_settings\": {\n        \"max_tokens\": 128\n    }\n}"
                 },
                 "PutGoogleVertexAiRequestExample5": {
-                  "summary": "A completion task for Google Model Garden Meta endpoint with single URL provided",
-                  "description": "Run `PUT _inference/completion/google_model_garden_meta_completion` to create an inference endpoint to perform a `completion` task using Meta's model hosted on Google Model Garden with single URL provided.",
+                  "summary": "A completion task for Google Model Garden Meta endpoint with single URL provided for both streaming and non-streaming tasks",
+                  "description": "Run `PUT _inference/completion/google_model_garden_meta_completion` to create an inference endpoint to perform a `completion` task using Meta's model hosted on Google Model Garden with single URL provided for both streaming and non-streaming tasks.",
                   "value": "{\n    \"service\": \"googlevertexai\",\n    \"service_settings\": {\n        \"provider\": \"meta\",\n        \"model_id\": \"meta/llama-3.3-70b-instruct-maas\",\n        \"service_account_json\": \"service-account-json\",\n        \"url\": \"https://url/openapi/chat/completions\"\n    }\n}"
                 },
                 "PutGoogleVertexAiRequestExample6": {
                   "summary": "A chat_completion task for Google Model Garden Meta endpoint with single streaming URL provided",
                   "description": "Run `PUT _inference/chat_completion/google_model_garden_meta_chat_completion` to create an inference endpoint to perform a `chat_completion` task using Meta's model hosted on Google Model Garden with single streaming URL provided.",
                   "value": "{\n    \"service\": \"googlevertexai\",\n    \"service_settings\": {\n        \"provider\": \"meta\",\n        \"model_id\": \"meta/llama-3.3-70b-instruct-maas\",\n        \"service_account_json\": \"service-account-json\",\n        \"streaming_url\": \"https://url/openapi/chat/completions\"\n    }\n}"
+                },
+                "PutGoogleVertexAiRequestExample7": {
+                  "summary": "A completion task for Google Model Garden Hugging Face endpoint with single URL provided for both streaming and non-streaming tasks",
+                  "description": "Run `PUT _inference/completion/google_model_garden_hugging_face_completion` to create an inference endpoint to perform a `completion` task using Hugging Face's model hosted on Google Model Garden with single URL provided for both streaming and non-streaming tasks.",
+                  "value": "{\n    \"service\": \"googlevertexai\",\n    \"service_settings\": {\n        \"provider\": \"hugging_face\",\n        \"service_account_json\": \"service-account-json\",\n        \"url\": \"https://url/chat/completions\"\n    }\n}"
+                },
+                "PutGoogleVertexAiRequestExample8": {
+                  "summary": "A chat_completion task for Google Model Garden Hugging Face endpoint with single streaming URL provided",
+                  "description": "Run `PUT _inference/chat_completion/google_model_garden_hugging_face_chat_completion` to create an inference endpoint to perform a `chat_completion` task using Hugging Face's model hosted on Google Model Garden with single streaming URL provided.",
+                  "value": "{\n    \"service\": \"googlevertexai\",\n    \"service_settings\": {\n        \"provider\": \"hugging_face\",\n        \"service_account_json\": \"service-account-json\",\n        \"streaming_url\": \"https://url/chat/completions\"\n    }\n}"
+                },
+                "PutGoogleVertexAiRequestExample9": {
+                  "summary": "A completion task for Google Model Garden Mistral endpoint with separate URLs for streaming and non-streaming tasks",
+                  "description": "Run `PUT _inference/completion/google_model_garden_mistral_completion` to create an inference endpoint to perform a `completion` task using Mistral's model hosted on Google Model Garden with separate URLs for streaming and non-streaming tasks.",
+                  "value": "{\n    \"service\": \"googlevertexai\",\n    \"service_settings\": {\n        \"provider\": \"mistral\",\n        \"model_id\": \"mistral-small-2503\",\n        \"service_account_json\": \"service-account-json\",\n        \"url\": \"https://url:rawPredict\",\n        \"streaming_url\": \"https://url:streamRawPredict\"\n    }\n}"
                 }
               }
             }
diff --git a/output/openapi/elasticsearch-serverless-openapi.json b/output/openapi/elasticsearch-serverless-openapi.json
index 4126e46f68..4924a1db9b 100644
--- a/output/openapi/elasticsearch-serverless-openapi.json
+++ b/output/openapi/elasticsearch-serverless-openapi.json
@@ -13728,30 +13728,60 @@
                   "description": "Run `PUT _inference/text_embedding/google_vertex_ai_embeddings` to create an inference endpoint to perform a `text_embedding` task type.",
                   "value": "{\n    \"service\": \"googlevertexai\",\n    \"service_settings\": {\n        \"service_account_json\": \"service-account-json\",\n        \"model_id\": \"model-id\",\n        \"location\": \"location\",\n        \"project_id\": \"project-id\"\n    }\n}"
                 },
+                "PutGoogleVertexAiRequestExample10": {
+                  "summary": "A chat_completion task for Google Model Garden Mistral endpoint with single streaming URL provided",
+                  "description": "Run `PUT _inference/chat_completion/google_model_garden_mistral_chat_completion` to create an inference endpoint to perform a `chat_completion` task using Mistral's model hosted on Google Model Garden with single streaming URL provided.",
+                  "value": "{\n    \"service\": \"googlevertexai\",\n    \"service_settings\": {\n        \"provider\": \"mistral\",\n        \"model_id\": \"mistral-small-2503\",\n        \"service_account_json\": \"service-account-json\",\n        \"streaming_url\": \"https://url:streamRawPredict\"\n    }\n}"
+                },
+                "PutGoogleVertexAiRequestExample11": {
+                  "summary": "A completion task for Google Model Garden AI21 endpoint with separate URLs for streaming and non-streaming tasks",
+                  "description": "Run `PUT _inference/completion/google_model_garden_ai21_completion` to create an inference endpoint to perform a `completion` task using AI21's model hosted on Google Model Garden with separate URLs for streaming and non-streaming tasks.",
+                  "value": "{\n    \"service\": \"googlevertexai\",\n    \"service_settings\": {\n        \"provider\": \"ai21\",\n        \"service_account_json\": \"service-account-json\",\n        \"url\": \"https://url:rawPredict\",\n        \"streaming_url\": \"https://url:streamRawPredict\"\n    }\n}"
+                },
+                "PutGoogleVertexAiRequestExample12": {
+                  "summary": "A chat_completion task for Google Model Garden AI21 endpoint with single streaming URL provided",
+                  "description": "Run `PUT _inference/chat_completion/google_model_garden_ai21_chat_completion` to create an inference endpoint to perform a `chat_completion` task using AI21's model hosted on Google Model Garden with single streaming URL provided.",
+                  "value": "{\n    \"service\": \"googlevertexai\",\n    \"service_settings\": {\n        \"provider\": \"ai21\",\n        \"service_account_json\": \"service-account-json\",\n        \"streaming_url\": \"https://url:streamRawPredict\"\n    }\n}"
+                },
                 "PutGoogleVertexAiRequestExample2": {
                   "summary": "A rerank task",
                   "description": "Run `PUT _inference/rerank/google_vertex_ai_rerank` to create an inference endpoint to perform a `rerank` task type.",
                   "value": "{\n    \"service\": \"googlevertexai\",\n    \"service_settings\": {\n        \"service_account_json\": \"service-account-json\",\n        \"project_id\": \"project-id\"\n    }\n}"
                 },
                 "PutGoogleVertexAiRequestExample3": {
-                  "summary": "A completion task for Google Model Garden Anthropic endpoint",
-                  "description": "Run `PUT _inference/completion/google_model_garden_anthropic_completion` to create an inference endpoint to perform a `completion` task using Anthropic's model hosted on Google Model Garden.",
+                  "summary": "A completion task for Google Model Garden Anthropic endpoint with separate URLs for streaming and non-streaming tasks",
+                  "description": "Run `PUT _inference/completion/google_model_garden_anthropic_completion` to create an inference endpoint to perform a `completion` task using Anthropic's model hosted on Google Model Garden with separate URLs for streaming and non-streaming tasks.",
                   "value": "{\n    \"service\": \"googlevertexai\",\n    \"service_settings\": {\n        \"provider\": \"anthropic\",\n        \"service_account_json\": \"service-account-json\",\n        \"url\": \"https://url:rawPredict\",\n        \"streaming_url\": \"https://streaming_url:streamRawPredict\"\n    },\n    \"task_settings\": {\n        \"max_tokens\": 128\n    }\n}"
                 },
                 "PutGoogleVertexAiRequestExample4": {
-                  "summary": "A chat_completion task for Google Model Garden Anthropic endpoint",
-                  "description": "Run `PUT _inference/chat_completion/google_model_garden_anthropic_chat_completion` to create an inference endpoint to perform a `chat_completion` task using Anthropic's model hosted on Google Model Garden.",
-                  "value": "{\n    \"service\": \"googlevertexai\",\n    \"service_settings\": {\n        \"provider\": \"anthropic\",\n        \"service_account_json\": \"service-account-json\",\n        \"url\": \"https://url:rawPredict\",\n        \"streaming_url\": \"https://streaming_url:streamRawPredict\"\n    },\n    \"task_settings\": {\n        \"max_tokens\": 128\n    }\n}"
+                  "summary": "A chat_completion task for Google Model Garden Anthropic endpoint with single streaming URL provided",
+                  "description": "Run `PUT _inference/chat_completion/google_model_garden_anthropic_chat_completion` to create an inference endpoint to perform a `chat_completion` task using Anthropic's model hosted on Google Model Garden with single streaming URL provided.",
+                  "value": "{\n    \"service\": \"googlevertexai\",\n    \"service_settings\": {\n        \"provider\": \"anthropic\",\n        \"service_account_json\": \"service-account-json\",\n        \"streaming_url\": \"https://streaming_url:streamRawPredict\"\n    },\n    \"task_settings\": {\n        \"max_tokens\": 128\n    }\n}"
                 },
                 "PutGoogleVertexAiRequestExample5": {
-                  "summary": "A completion task for Google Model Garden Meta endpoint with single URL provided",
-                  "description": "Run `PUT _inference/completion/google_model_garden_meta_completion` to create an inference endpoint to perform a `completion` task using Meta's model hosted on Google Model Garden with single URL provided.",
+                  "summary": "A completion task for Google Model Garden Meta endpoint with single URL provided for both streaming and non-streaming tasks",
+                  "description": "Run `PUT _inference/completion/google_model_garden_meta_completion` to create an inference endpoint to perform a `completion` task using Meta's model hosted on Google Model Garden with single URL provided for both streaming and non-streaming tasks.",
                   "value": "{\n    \"service\": \"googlevertexai\",\n    \"service_settings\": {\n        \"provider\": \"meta\",\n        \"model_id\": \"meta/llama-3.3-70b-instruct-maas\",\n        \"service_account_json\": \"service-account-json\",\n        \"url\": \"https://url/openapi/chat/completions\"\n    }\n}"
                 },
                 "PutGoogleVertexAiRequestExample6": {
                   "summary": "A chat_completion task for Google Model Garden Meta endpoint with single streaming URL provided",
                   "description": "Run `PUT _inference/chat_completion/google_model_garden_meta_chat_completion` to create an inference endpoint to perform a `chat_completion` task using Meta's model hosted on Google Model Garden with single streaming URL provided.",
                   "value": "{\n    \"service\": \"googlevertexai\",\n    \"service_settings\": {\n        \"provider\": \"meta\",\n        \"model_id\": \"meta/llama-3.3-70b-instruct-maas\",\n        \"service_account_json\": \"service-account-json\",\n        \"streaming_url\": \"https://url/openapi/chat/completions\"\n    }\n}"
+                },
+                "PutGoogleVertexAiRequestExample7": {
+                  "summary": "A completion task for Google Model Garden Hugging Face endpoint with single URL provided for both streaming and non-streaming tasks",
+                  "description": "Run `PUT _inference/completion/google_model_garden_hugging_face_completion` to create an inference endpoint to perform a `completion` task using Hugging Face's model hosted on Google Model Garden with single URL provided for both streaming and non-streaming tasks.",
+                  "value": "{\n    \"service\": \"googlevertexai\",\n    \"service_settings\": {\n        \"provider\": \"hugging_face\",\n        \"service_account_json\": \"service-account-json\",\n        \"url\": \"https://url/chat/completions\"\n    }\n}"
+                },
+                "PutGoogleVertexAiRequestExample8": {
+                  "summary": "A chat_completion task for Google Model Garden Hugging Face endpoint with single streaming URL provided",
+                  "description": "Run `PUT _inference/chat_completion/google_model_garden_hugging_face_chat_completion` to create an inference endpoint to perform a `chat_completion` task using Hugging Face's model hosted on Google Model Garden with single streaming URL provided.",
+                  "value": "{\n    \"service\": \"googlevertexai\",\n    \"service_settings\": {\n        \"provider\": \"hugging_face\",\n        \"service_account_json\": \"service-account-json\",\n        \"streaming_url\": \"https://url/chat/completions\"\n    }\n}"
+                },
+                "PutGoogleVertexAiRequestExample9": {
+                  "summary": "A completion task for Google Model Garden Mistral endpoint with separate URLs for streaming and non-streaming tasks",
+                  "description": "Run `PUT _inference/completion/google_model_garden_mistral_completion` to create an inference endpoint to perform a `completion` task using Mistral's model hosted on Google Model Garden with separate URLs for streaming and non-streaming tasks.",
+                  "value": "{\n    \"service\": \"googlevertexai\",\n    \"service_settings\": {\n        \"provider\": \"mistral\",\n        \"model_id\": \"mistral-small-2503\",\n        \"service_account_json\": \"service-account-json\",\n        \"url\": \"https://url:rawPredict\",\n        \"streaming_url\": \"https://url:streamRawPredict\"\n    }\n}"
                 }
               }
             }
diff --git a/output/schema/schema.json b/output/schema/schema.json
index 285d8bd8a8..9b86d2519b 100644
--- a/output/schema/schema.json
+++ b/output/schema/schema.json
@@ -180441,6 +180441,24 @@
           "summary": "A text embedding task",
           "value": "{\n    \"service\": \"googlevertexai\",\n    \"service_settings\": {\n        \"service_account_json\": \"service-account-json\",\n        \"model_id\": \"model-id\",\n        \"location\": \"location\",\n        \"project_id\": \"project-id\"\n    }\n}"
         },
+        "PutGoogleVertexAiRequestExample10": {
+          "description": "Run `PUT _inference/chat_completion/google_model_garden_mistral_chat_completion` to create an inference endpoint to perform a `chat_completion` task using Mistral's model hosted on Google Model Garden with single streaming URL provided.",
+          "method_request": "PUT _inference/chat_completion/google_model_garden_mistral_chat_completion",
+          "summary": "A chat_completion task for Google Model Garden Mistral endpoint with single streaming URL provided",
+          "value": "{\n    \"service\": \"googlevertexai\",\n    \"service_settings\": {\n        \"provider\": \"mistral\",\n        \"model_id\": \"mistral-small-2503\",\n        \"service_account_json\": \"service-account-json\",\n        \"streaming_url\": \"https://url:streamRawPredict\"\n    }\n}"
+        },
+        "PutGoogleVertexAiRequestExample11": {
+          "description": "Run `PUT _inference/completion/google_model_garden_ai21_completion` to create an inference endpoint to perform a `completion` task using AI21's model hosted on Google Model Garden with separate URLs for streaming and non-streaming tasks.",
+          "method_request": "PUT _inference/completion/google_model_garden_ai21_completion",
+          "summary": "A completion task for Google Model Garden AI21 endpoint with separate URLs for streaming and non-streaming tasks",
+          "value": "{\n    \"service\": \"googlevertexai\",\n    \"service_settings\": {\n        \"provider\": \"ai21\",\n        \"service_account_json\": \"service-account-json\",\n        \"url\": \"https://url:rawPredict\",\n        \"streaming_url\": \"https://url:streamRawPredict\"\n    }\n}"
+        },
+        "PutGoogleVertexAiRequestExample12": {
+          "description": "Run `PUT _inference/chat_completion/google_model_garden_ai21_chat_completion` to create an inference endpoint to perform a `chat_completion` task using AI21's model hosted on Google Model Garden with single streaming URL provided.",
+          "method_request": "PUT _inference/chat_completion/google_model_garden_ai21_chat_completion",
+          "summary": "A chat_completion task for Google Model Garden AI21 endpoint with single streaming URL provided",
+          "value": "{\n    \"service\": \"googlevertexai\",\n    \"service_settings\": {\n        \"provider\": \"ai21\",\n        \"service_account_json\": \"service-account-json\",\n        \"streaming_url\": \"https://url:streamRawPredict\"\n    }\n}"
+        },
         "PutGoogleVertexAiRequestExample2": {
           "alternatives": [
             {
@@ -180470,21 +180488,21 @@
           "value": "{\n    \"service\": \"googlevertexai\",\n    \"service_settings\": {\n        \"service_account_json\": \"service-account-json\",\n        \"project_id\": \"project-id\"\n    }\n}"
         },
         "PutGoogleVertexAiRequestExample3": {
-          "description": "Run `PUT _inference/completion/google_model_garden_anthropic_completion` to create an inference endpoint to perform a `completion` task using Anthropic's model hosted on Google Model Garden.",
+          "description": "Run `PUT _inference/completion/google_model_garden_anthropic_completion` to create an inference endpoint to perform a `completion` task using Anthropic's model hosted on Google Model Garden with separate URLs for streaming and non-streaming tasks.",
           "method_request": "PUT _inference/completion/google_model_garden_anthropic_completion",
-          "summary": "A completion task for Google Model Garden Anthropic endpoint",
+          "summary": "A completion task for Google Model Garden Anthropic endpoint with separate URLs for streaming and non-streaming tasks",
           "value": "{\n    \"service\": \"googlevertexai\",\n    \"service_settings\": {\n        \"provider\": \"anthropic\",\n        \"service_account_json\": \"service-account-json\",\n        \"url\": \"https://url:rawPredict\",\n        \"streaming_url\": \"https://streaming_url:streamRawPredict\"\n    },\n    \"task_settings\": {\n        \"max_tokens\": 128\n    }\n}"
         },
         "PutGoogleVertexAiRequestExample4": {
-          "description": "Run `PUT _inference/chat_completion/google_model_garden_anthropic_chat_completion` to create an inference endpoint to perform a `chat_completion` task using Anthropic's model hosted on Google Model Garden.",
+          "description": "Run `PUT _inference/chat_completion/google_model_garden_anthropic_chat_completion` to create an inference endpoint to perform a `chat_completion` task using Anthropic's model hosted on Google Model Garden with single streaming URL provided.",
           "method_request": "PUT _inference/chat_completion/google_model_garden_anthropic_chat_completion",
-          "summary": "A chat_completion task for Google Model Garden Anthropic endpoint",
-          "value": "{\n    \"service\": \"googlevertexai\",\n    \"service_settings\": {\n        \"provider\": \"anthropic\",\n        \"service_account_json\": \"service-account-json\",\n        \"url\": \"https://url:rawPredict\",\n        \"streaming_url\": \"https://streaming_url:streamRawPredict\"\n    },\n    \"task_settings\": {\n        \"max_tokens\": 128\n    }\n}"
+          "summary": "A chat_completion task for Google Model Garden Anthropic endpoint with single streaming URL provided",
+          "value": "{\n    \"service\": \"googlevertexai\",\n    \"service_settings\": {\n        \"provider\": \"anthropic\",\n        \"service_account_json\": \"service-account-json\",\n        \"streaming_url\": \"https://streaming_url:streamRawPredict\"\n    },\n    \"task_settings\": {\n        \"max_tokens\": 128\n    }\n}"
         },
         "PutGoogleVertexAiRequestExample5": {
-          "description": "Run `PUT _inference/completion/google_model_garden_meta_completion` to create an inference endpoint to perform a `completion` task using Meta's model hosted on Google Model Garden with single URL provided.",
+          "description": "Run `PUT _inference/completion/google_model_garden_meta_completion` to create an inference endpoint to perform a `completion` task using Meta's model hosted on Google Model Garden with single URL provided for both streaming and non-streaming tasks.",
           "method_request": "PUT _inference/completion/google_model_garden_meta_completion",
-          "summary": "A completion task for Google Model Garden Meta endpoint with single URL provided",
+          "summary": "A completion task for Google Model Garden Meta endpoint with single URL provided for both streaming and non-streaming tasks",
           "value": "{\n    \"service\": \"googlevertexai\",\n    \"service_settings\": {\n        \"provider\": \"meta\",\n        \"model_id\": \"meta/llama-3.3-70b-instruct-maas\",\n        \"service_account_json\": \"service-account-json\",\n        \"url\": \"https://url/openapi/chat/completions\"\n    }\n}"
         },
         "PutGoogleVertexAiRequestExample6": {
@@ -180492,6 +180510,24 @@
           "method_request": "PUT _inference/chat_completion/google_model_garden_meta_chat_completion",
           "summary": "A chat_completion task for Google Model Garden Meta endpoint with single streaming URL provided",
           "value": "{\n    \"service\": \"googlevertexai\",\n    \"service_settings\": {\n        \"provider\": \"meta\",\n        \"model_id\": \"meta/llama-3.3-70b-instruct-maas\",\n        \"service_account_json\": \"service-account-json\",\n        \"streaming_url\": \"https://url/openapi/chat/completions\"\n    }\n}"
+        },
+        "PutGoogleVertexAiRequestExample7": {
+          "description": "Run `PUT _inference/completion/google_model_garden_hugging_face_completion` to create an inference endpoint to perform a `completion` task using Hugging Face's model hosted on Google Model Garden with single URL provided for both streaming and non-streaming tasks.",
+          "method_request": "PUT _inference/completion/google_model_garden_hugging_face_completion",
+          "summary": "A completion task for Google Model Garden Hugging Face endpoint with single URL provided for both streaming and non-streaming tasks",
+          "value": "{\n    \"service\": \"googlevertexai\",\n    \"service_settings\": {\n        \"provider\": \"hugging_face\",\n        \"service_account_json\": \"service-account-json\",\n        \"url\": \"https://url/chat/completions\"\n    }\n}"
+        },
+        "PutGoogleVertexAiRequestExample8": {
+          "description": "Run `PUT _inference/chat_completion/google_model_garden_hugging_face_chat_completion` to create an inference endpoint to perform a `chat_completion` task using Hugging Face's model hosted on Google Model Garden with single streaming URL provided.",
+          "method_request": "PUT _inference/chat_completion/google_model_garden_hugging_face_chat_completion",
+          "summary": "A chat_completion task for Google Model Garden Hugging Face endpoint with single streaming URL provided",
+          "value": "{\n    \"service\": \"googlevertexai\",\n    \"service_settings\": {\n        \"provider\": \"hugging_face\",\n        \"service_account_json\": \"service-account-json\",\n        \"streaming_url\": \"https://url/chat/completions\"\n    }\n}"
+        },
+        "PutGoogleVertexAiRequestExample9": {
+          "description": "Run `PUT _inference/completion/google_model_garden_mistral_completion` to create an inference endpoint to perform a `completion` task using Mistral's model hosted on Google Model Garden with separate URLs for streaming and non-streaming tasks.",
+          "method_request": "PUT _inference/completion/google_model_garden_mistral_completion",
+          "summary": "A completion task for Google Model Garden Mistral endpoint with separate URLs for streaming and non-streaming tasks",
+          "value": "{\n    \"service\": \"googlevertexai\",\n    \"service_settings\": {\n        \"provider\": \"mistral\",\n        \"model_id\": \"mistral-small-2503\",\n        \"service_account_json\": \"service-account-json\",\n        \"url\": \"https://url:rawPredict\",\n        \"streaming_url\": \"https://url:streamRawPredict\"\n    }\n}"
         }
       },
       "inherits": {
diff --git a/specification/inference/put_googlevertexai/examples/request/PutGoogleVertexAiRequestExample10.yaml b/specification/inference/put_googlevertexai/examples/request/PutGoogleVertexAiRequestExample10.yaml
new file mode 100644
index 0000000000..ad2ed0183a
--- /dev/null
+++ b/specification/inference/put_googlevertexai/examples/request/PutGoogleVertexAiRequestExample10.yaml
@@ -0,0 +1,14 @@
+summary: A chat_completion task for Google Model Garden Mistral endpoint with single streaming URL provided
+description: Run `PUT _inference/chat_completion/google_model_garden_mistral_chat_completion` to create an inference endpoint to perform a `chat_completion` task using Mistral's model hosted on Google Model Garden with single streaming URL provided.
+method_request: 'PUT _inference/chat_completion/google_model_garden_mistral_chat_completion'
+# type: "request"
+value: |-
+  {
+      "service": "googlevertexai",
+      "service_settings": {
+          "provider": "mistral",
+          "model_id": "mistral-small-2503",
+          "service_account_json": "service-account-json",
+          "streaming_url": "https://url:streamRawPredict"
+      }
+  }
diff --git a/specification/inference/put_googlevertexai/examples/request/PutGoogleVertexAiRequestExample11.yaml b/specification/inference/put_googlevertexai/examples/request/PutGoogleVertexAiRequestExample11.yaml
new file mode 100644
index 0000000000..00a076c727
--- /dev/null
+++ b/specification/inference/put_googlevertexai/examples/request/PutGoogleVertexAiRequestExample11.yaml
@@ -0,0 +1,14 @@
+summary: A completion task for Google Model Garden AI21 endpoint with separate URLs for streaming and non-streaming tasks
+description: Run `PUT _inference/completion/google_model_garden_ai21_completion` to create an inference endpoint to perform a `completion` task using AI21's model hosted on Google Model Garden with separate URLs for streaming and non-streaming tasks.
+method_request: 'PUT _inference/completion/google_model_garden_ai21_completion'
+# type: "request"
+value: |-
+  {
+      "service": "googlevertexai",
+      "service_settings": {
+          "provider": "ai21",
+          "service_account_json": "service-account-json",
+          "url": "https://url:rawPredict",
+          "streaming_url": "https://url:streamRawPredict"
+      }
+  }
diff --git a/specification/inference/put_googlevertexai/examples/request/PutGoogleVertexAiRequestExample12.yaml b/specification/inference/put_googlevertexai/examples/request/PutGoogleVertexAiRequestExample12.yaml
new file mode 100644
index 0000000000..ceadc43079
--- /dev/null
+++ b/specification/inference/put_googlevertexai/examples/request/PutGoogleVertexAiRequestExample12.yaml
@@ -0,0 +1,13 @@
+summary: A chat_completion task for Google Model Garden AI21 endpoint with single streaming URL provided
+description: Run `PUT _inference/chat_completion/google_model_garden_ai21_chat_completion` to create an inference endpoint to perform a `chat_completion` task using AI21's model hosted on Google Model Garden with single streaming URL provided.
+method_request: 'PUT _inference/chat_completion/google_model_garden_ai21_chat_completion'
+# type: "request"
+value: |-
+  {
+      "service": "googlevertexai",
+      "service_settings": {
+          "provider": "ai21",
+          "service_account_json": "service-account-json",
+          "streaming_url": "https://url:streamRawPredict"
+      }
+  }
diff --git a/specification/inference/put_googlevertexai/examples/request/PutGoogleVertexAiRequestExample3.yaml b/specification/inference/put_googlevertexai/examples/request/PutGoogleVertexAiRequestExample3.yaml
index 5cb79753dc..224eec318a 100644
--- a/specification/inference/put_googlevertexai/examples/request/PutGoogleVertexAiRequestExample3.yaml
+++ b/specification/inference/put_googlevertexai/examples/request/PutGoogleVertexAiRequestExample3.yaml
@@ -1,5 +1,5 @@
-summary: A completion task for Google Model Garden Anthropic endpoint
-description: Run `PUT _inference/completion/google_model_garden_anthropic_completion` to create an inference endpoint to perform a `completion` task using Anthropic's model hosted on Google Model Garden.
+summary: A completion task for Google Model Garden Anthropic endpoint with separate URLs for streaming and non-streaming tasks
+description: Run `PUT _inference/completion/google_model_garden_anthropic_completion` to create an inference endpoint to perform a `completion` task using Anthropic's model hosted on Google Model Garden with separate URLs for streaming and non-streaming tasks.
 method_request: 'PUT _inference/completion/google_model_garden_anthropic_completion'
 # type: "request"
 value: |-
diff --git a/specification/inference/put_googlevertexai/examples/request/PutGoogleVertexAiRequestExample4.yaml b/specification/inference/put_googlevertexai/examples/request/PutGoogleVertexAiRequestExample4.yaml
index 52b7ececd8..a8ae043de1 100644
--- a/specification/inference/put_googlevertexai/examples/request/PutGoogleVertexAiRequestExample4.yaml
+++ b/specification/inference/put_googlevertexai/examples/request/PutGoogleVertexAiRequestExample4.yaml
@@ -1,5 +1,5 @@
-summary: A chat_completion task for Google Model Garden Anthropic endpoint
-description: Run `PUT _inference/chat_completion/google_model_garden_anthropic_chat_completion` to create an inference endpoint to perform a `chat_completion` task using Anthropic's model hosted on Google Model Garden.
+summary: A chat_completion task for Google Model Garden Anthropic endpoint with single streaming URL provided
+description: Run `PUT _inference/chat_completion/google_model_garden_anthropic_chat_completion` to create an inference endpoint to perform a `chat_completion` task using Anthropic's model hosted on Google Model Garden with single streaming URL provided.
 method_request: 'PUT _inference/chat_completion/google_model_garden_anthropic_chat_completion'
 # type: "request"
 value: |-
@@ -8,7 +8,6 @@ value: |-
       "service_settings": {
           "provider": "anthropic",
           "service_account_json": "service-account-json",
-          "url": "https://url:rawPredict",
           "streaming_url": "https://streaming_url:streamRawPredict"
       },
       "task_settings": {
diff --git a/specification/inference/put_googlevertexai/examples/request/PutGoogleVertexAiRequestExample5.yaml b/specification/inference/put_googlevertexai/examples/request/PutGoogleVertexAiRequestExample5.yaml
index 750fc0eaf7..19f4ce4de5 100644
--- a/specification/inference/put_googlevertexai/examples/request/PutGoogleVertexAiRequestExample5.yaml
+++ b/specification/inference/put_googlevertexai/examples/request/PutGoogleVertexAiRequestExample5.yaml
@@ -1,5 +1,5 @@
-summary: A completion task for Google Model Garden Meta endpoint with single URL provided
-description: Run `PUT _inference/completion/google_model_garden_meta_completion` to create an inference endpoint to perform a `completion` task using Meta's model hosted on Google Model Garden with single URL provided.
+summary: A completion task for Google Model Garden Meta endpoint with single URL provided for both streaming and non-streaming tasks
+description: Run `PUT _inference/completion/google_model_garden_meta_completion` to create an inference endpoint to perform a `completion` task using Meta's model hosted on Google Model Garden with single URL provided for both streaming and non-streaming tasks.
 method_request: 'PUT _inference/completion/google_model_garden_meta_completion'
 # type: "request"
 value: |-
diff --git a/specification/inference/put_googlevertexai/examples/request/PutGoogleVertexAiRequestExample7.yaml b/specification/inference/put_googlevertexai/examples/request/PutGoogleVertexAiRequestExample7.yaml
new file mode 100644
index 0000000000..e503c99756
--- /dev/null
+++ b/specification/inference/put_googlevertexai/examples/request/PutGoogleVertexAiRequestExample7.yaml
@@ -0,0 +1,13 @@
+summary: A completion task for Google Model Garden Hugging Face endpoint with single URL provided for both streaming and non-streaming tasks
+description: Run `PUT _inference/completion/google_model_garden_hugging_face_completion` to create an inference endpoint to perform a `completion` task using Hugging Face's model hosted on Google Model Garden with single URL provided for both streaming and non-streaming tasks.
+method_request: 'PUT _inference/completion/google_model_garden_hugging_face_completion'
+# type: "request"
+value: |-
+  {
+      "service": "googlevertexai",
+      "service_settings": {
+          "provider": "hugging_face",
+          "service_account_json": "service-account-json",
+          "url": "https://url/chat/completions"
+      }
+  }
diff --git a/specification/inference/put_googlevertexai/examples/request/PutGoogleVertexAiRequestExample8.yaml b/specification/inference/put_googlevertexai/examples/request/PutGoogleVertexAiRequestExample8.yaml
new file mode 100644
index 0000000000..59c9c14010
--- /dev/null
+++ b/specification/inference/put_googlevertexai/examples/request/PutGoogleVertexAiRequestExample8.yaml
@@ -0,0 +1,13 @@
+summary: A chat_completion task for Google Model Garden Hugging Face endpoint with single streaming URL provided
+description: Run `PUT _inference/chat_completion/google_model_garden_hugging_face_chat_completion` to create an inference endpoint to perform a `chat_completion` task using Hugging Face's model hosted on Google Model Garden with single streaming URL provided.
+method_request: 'PUT _inference/chat_completion/google_model_garden_hugging_face_chat_completion'
+# type: "request"
+value: |-
+  {
+      "service": "googlevertexai",
+      "service_settings": {
+          "provider": "hugging_face",
+          "service_account_json": "service-account-json",
+          "streaming_url": "https://url/chat/completions"
+      }
+  }
diff --git a/specification/inference/put_googlevertexai/examples/request/PutGoogleVertexAiRequestExample9.yaml b/specification/inference/put_googlevertexai/examples/request/PutGoogleVertexAiRequestExample9.yaml
new file mode 100644
index 0000000000..17c69126b5
--- /dev/null
+++ b/specification/inference/put_googlevertexai/examples/request/PutGoogleVertexAiRequestExample9.yaml
@@ -0,0 +1,15 @@
+summary: A completion task for Google Model Garden Mistral endpoint with separate URLs for streaming and non-streaming tasks
+description: Run `PUT _inference/completion/google_model_garden_mistral_completion` to create an inference endpoint to perform a `completion` task using Mistral's model hosted on Google Model Garden with separate URLs for streaming and non-streaming tasks.
+method_request: 'PUT _inference/completion/google_model_garden_mistral_completion'
+# type: "request"
+value: |-
+  {
+      "service": "googlevertexai",
+      "service_settings": {
+          "provider": "mistral",
+          "model_id": "mistral-small-2503",
+          "service_account_json": "service-account-json",
+          "url": "https://url:rawPredict",
+          "streaming_url": "https://url:streamRawPredict"
+      }
+  }

From 918321e02be6e220f7d9908d4055a56630194d8e Mon Sep 17 00:00:00 2001
From: Jan Kazlouski <jan.kazlouski@elastic.co>
Date: Fri, 24 Oct 2025 12:08:27 +0000
Subject: [PATCH 3/4] Update CommonTypes.ts to clarify URL requirements for
 various providers

---
 output/openapi/elasticsearch-openapi.json     |  4 +-
 .../elasticsearch-serverless-openapi.json     |  4 +-
 output/schema/schema.json                     | 72 +++++++++----------
 specification/inference/_types/CommonTypes.ts |  2 +
 4 files changed, 42 insertions(+), 40 deletions(-)

diff --git a/output/openapi/elasticsearch-openapi.json b/output/openapi/elasticsearch-openapi.json
index b038c6b679..8cbb2651b7 100644
--- a/output/openapi/elasticsearch-openapi.json
+++ b/output/openapi/elasticsearch-openapi.json
@@ -103017,11 +103017,11 @@
             ]
           },
           "url": {
-            "description": "The URL for non-streaming `completion` requests to a Google Model Garden provider endpoint.\nIf both `url` and `streaming_url` are provided, each is used for its respective mode.\nIf `streaming_url` is not provided, `url` is also used for streaming `completion` and `chat_completion`.\nIf `provider` is not provided or set to `google` (Google Vertex AI), do not set `url` (or `streaming_url`).\nAt least one of `url` or `streaming_url` must be provided for Google Model Garden endpoint usage.",
+            "description": "The URL for non-streaming `completion` requests to a Google Model Garden provider endpoint.\nIf both `url` and `streaming_url` are provided, each is used for its respective mode.\nIf `streaming_url` is not provided, `url` is also used for streaming `completion` and `chat_completion`.\nIf `provider` is not provided or set to `google` (Google Vertex AI), do not set `url` (or `streaming_url`).\nAt least one of `url` or `streaming_url` must be provided for Google Model Garden endpoint usage.\nCertain providers require separate URLs for streaming and non-streaming operations (e.g., Anthropic, Mistral, AI21). Others support both operation types through a single URL (e.g., Meta, Hugging Face).",
             "type": "string"
           },
           "streaming_url": {
-            "description": "The URL for streaming `completion` and `chat_completion` requests to a Google Model Garden provider endpoint.\nIf both `streaming_url` and `url` are provided, each is used for its respective mode.\nIf `url` is not provided, `streaming_url` is also used for non-streaming `completion` requests.\nIf `provider` is not provided or set to `google` (Google Vertex AI), do not set `streaming_url` (or `url`).\nAt least one of `streaming_url` or `url` must be provided for Google Model Garden endpoint usage.",
+            "description": "The URL for streaming `completion` and `chat_completion` requests to a Google Model Garden provider endpoint.\nIf both `streaming_url` and `url` are provided, each is used for its respective mode.\nIf `url` is not provided, `streaming_url` is also used for non-streaming `completion` requests.\nIf `provider` is not provided or set to `google` (Google Vertex AI), do not set `streaming_url` (or `url`).\nAt least one of `streaming_url` or `url` must be provided for Google Model Garden endpoint usage.\nCertain providers require separate URLs for streaming and non-streaming operations (e.g., Anthropic, Mistral, AI21). Others support both operation types through a single URL (e.g., Meta, Hugging Face).",
             "type": "string"
           },
           "location": {
diff --git a/output/openapi/elasticsearch-serverless-openapi.json b/output/openapi/elasticsearch-serverless-openapi.json
index 4924a1db9b..32d051c61b 100644
--- a/output/openapi/elasticsearch-serverless-openapi.json
+++ b/output/openapi/elasticsearch-serverless-openapi.json
@@ -67097,11 +67097,11 @@
             ]
           },
           "url": {
-            "description": "The URL for non-streaming `completion` requests to a Google Model Garden provider endpoint.\nIf both `url` and `streaming_url` are provided, each is used for its respective mode.\nIf `streaming_url` is not provided, `url` is also used for streaming `completion` and `chat_completion`.\nIf `provider` is not provided or set to `google` (Google Vertex AI), do not set `url` (or `streaming_url`).\nAt least one of `url` or `streaming_url` must be provided for Google Model Garden endpoint usage.",
+            "description": "The URL for non-streaming `completion` requests to a Google Model Garden provider endpoint.\nIf both `url` and `streaming_url` are provided, each is used for its respective mode.\nIf `streaming_url` is not provided, `url` is also used for streaming `completion` and `chat_completion`.\nIf `provider` is not provided or set to `google` (Google Vertex AI), do not set `url` (or `streaming_url`).\nAt least one of `url` or `streaming_url` must be provided for Google Model Garden endpoint usage.\nCertain providers require separate URLs for streaming and non-streaming operations (e.g., Anthropic, Mistral, AI21). Others support both operation types through a single URL (e.g., Meta, Hugging Face).",
             "type": "string"
           },
           "streaming_url": {
-            "description": "The URL for streaming `completion` and `chat_completion` requests to a Google Model Garden provider endpoint.\nIf both `streaming_url` and `url` are provided, each is used for its respective mode.\nIf `url` is not provided, `streaming_url` is also used for non-streaming `completion` requests.\nIf `provider` is not provided or set to `google` (Google Vertex AI), do not set `streaming_url` (or `url`).\nAt least one of `streaming_url` or `url` must be provided for Google Model Garden endpoint usage.",
+            "description": "The URL for streaming `completion` and `chat_completion` requests to a Google Model Garden provider endpoint.\nIf both `streaming_url` and `url` are provided, each is used for its respective mode.\nIf `url` is not provided, `streaming_url` is also used for non-streaming `completion` requests.\nIf `provider` is not provided or set to `google` (Google Vertex AI), do not set `streaming_url` (or `url`).\nAt least one of `streaming_url` or `url` must be provided for Google Model Garden endpoint usage.\nCertain providers require separate URLs for streaming and non-streaming operations (e.g., Anthropic, Mistral, AI21). Others support both operation types through a single URL (e.g., Meta, Hugging Face).",
             "type": "string"
           },
           "location": {
diff --git a/output/schema/schema.json b/output/schema/schema.json
index 9b86d2519b..49ba042117 100644
--- a/output/schema/schema.json
+++ b/output/schema/schema.json
@@ -173344,7 +173344,7 @@
         "name": "GoogleModelGardenProvider",
         "namespace": "inference._types"
       },
-      "specLocation": "inference/_types/CommonTypes.ts#L1498-L1505"
+      "specLocation": "inference/_types/CommonTypes.ts#L1500-L1507"
     },
     {
       "kind": "interface",
@@ -173366,7 +173366,7 @@
           }
         },
         {
-          "description": "The URL for non-streaming `completion` requests to a Google Model Garden provider endpoint.\nIf both `url` and `streaming_url` are provided, each is used for its respective mode.\nIf `streaming_url` is not provided, `url` is also used for streaming `completion` and `chat_completion`.\nIf `provider` is not provided or set to `google` (Google Vertex AI), do not set `url` (or `streaming_url`).\nAt least one of `url` or `streaming_url` must be provided for Google Model Garden endpoint usage.",
+          "description": "The URL for non-streaming `completion` requests to a Google Model Garden provider endpoint.\nIf both `url` and `streaming_url` are provided, each is used for its respective mode.\nIf `streaming_url` is not provided, `url` is also used for streaming `completion` and `chat_completion`.\nIf `provider` is not provided or set to `google` (Google Vertex AI), do not set `url` (or `streaming_url`).\nAt least one of `url` or `streaming_url` must be provided for Google Model Garden endpoint usage.\nCertain providers require separate URLs for streaming and non-streaming operations (e.g., Anthropic, Mistral, AI21). Others support both operation types through a single URL (e.g., Meta, Hugging Face).",
           "name": "url",
           "required": false,
           "type": {
@@ -173378,7 +173378,7 @@
           }
         },
         {
-          "description": "The URL for streaming `completion` and `chat_completion` requests to a Google Model Garden provider endpoint.\nIf both `streaming_url` and `url` are provided, each is used for its respective mode.\nIf `url` is not provided, `streaming_url` is also used for non-streaming `completion` requests.\nIf `provider` is not provided or set to `google` (Google Vertex AI), do not set `streaming_url` (or `url`).\nAt least one of `streaming_url` or `url` must be provided for Google Model Garden endpoint usage.",
+          "description": "The URL for streaming `completion` and `chat_completion` requests to a Google Model Garden provider endpoint.\nIf both `streaming_url` and `url` are provided, each is used for its respective mode.\nIf `url` is not provided, `streaming_url` is also used for non-streaming `completion` requests.\nIf `provider` is not provided or set to `google` (Google Vertex AI), do not set `streaming_url` (or `url`).\nAt least one of `streaming_url` or `url` must be provided for Google Model Garden endpoint usage.\nCertain providers require separate URLs for streaming and non-streaming operations (e.g., Anthropic, Mistral, AI21). Others support both operation types through a single URL (e.g., Meta, Hugging Face).",
           "name": "streaming_url",
           "required": false,
           "type": {
@@ -173468,7 +173468,7 @@
           }
         }
       ],
-      "specLocation": "inference/_types/CommonTypes.ts#L1433-L1496"
+      "specLocation": "inference/_types/CommonTypes.ts#L1433-L1498"
     },
     {
       "kind": "enum",
@@ -173481,7 +173481,7 @@
         "name": "GoogleVertexAIServiceType",
         "namespace": "inference._types"
       },
-      "specLocation": "inference/_types/CommonTypes.ts#L1546-L1548"
+      "specLocation": "inference/_types/CommonTypes.ts#L1548-L1550"
     },
     {
       "kind": "interface",
@@ -173543,7 +173543,7 @@
           }
         }
       ],
-      "specLocation": "inference/_types/CommonTypes.ts#L1507-L1530"
+      "specLocation": "inference/_types/CommonTypes.ts#L1509-L1532"
     },
     {
       "kind": "enum",
@@ -173565,7 +173565,7 @@
         "name": "GoogleVertexAITaskType",
         "namespace": "inference._types"
       },
-      "specLocation": "inference/_types/CommonTypes.ts#L1539-L1544"
+      "specLocation": "inference/_types/CommonTypes.ts#L1541-L1546"
     },
     {
       "kind": "interface",
@@ -173627,7 +173627,7 @@
           }
         }
       ],
-      "specLocation": "inference/_types/CommonTypes.ts#L1550-L1582"
+      "specLocation": "inference/_types/CommonTypes.ts#L1552-L1584"
     },
     {
       "kind": "enum",
@@ -173640,7 +173640,7 @@
         "name": "HuggingFaceServiceType",
         "namespace": "inference._types"
       },
-      "specLocation": "inference/_types/CommonTypes.ts#L1603-L1605"
+      "specLocation": "inference/_types/CommonTypes.ts#L1605-L1607"
     },
     {
       "kind": "interface",
@@ -173674,7 +173674,7 @@
           }
         }
       ],
-      "specLocation": "inference/_types/CommonTypes.ts#L1584-L1594"
+      "specLocation": "inference/_types/CommonTypes.ts#L1586-L1596"
     },
     {
       "kind": "enum",
@@ -173696,7 +173696,7 @@
         "name": "HuggingFaceTaskType",
         "namespace": "inference._types"
       },
-      "specLocation": "inference/_types/CommonTypes.ts#L1596-L1601"
+      "specLocation": "inference/_types/CommonTypes.ts#L1598-L1603"
     },
     {
       "kind": "interface",
@@ -174928,7 +174928,7 @@
           }
         }
       ],
-      "specLocation": "inference/_types/CommonTypes.ts#L1607-L1636"
+      "specLocation": "inference/_types/CommonTypes.ts#L1609-L1638"
     },
     {
       "kind": "enum",
@@ -174941,7 +174941,7 @@
         "name": "JinaAIServiceType",
         "namespace": "inference._types"
       },
-      "specLocation": "inference/_types/CommonTypes.ts#L1666-L1668"
+      "specLocation": "inference/_types/CommonTypes.ts#L1668-L1670"
     },
     {
       "kind": "enum",
@@ -174960,7 +174960,7 @@
         "name": "JinaAISimilarityType",
         "namespace": "inference._types"
       },
-      "specLocation": "inference/_types/CommonTypes.ts#L1670-L1674"
+      "specLocation": "inference/_types/CommonTypes.ts#L1672-L1676"
     },
     {
       "kind": "interface",
@@ -175006,7 +175006,7 @@
           }
         }
       ],
-      "specLocation": "inference/_types/CommonTypes.ts#L1638-L1659"
+      "specLocation": "inference/_types/CommonTypes.ts#L1640-L1661"
     },
     {
       "kind": "enum",
@@ -175022,7 +175022,7 @@
         "name": "JinaAITaskType",
         "namespace": "inference._types"
       },
-      "specLocation": "inference/_types/CommonTypes.ts#L1661-L1664"
+      "specLocation": "inference/_types/CommonTypes.ts#L1663-L1666"
     },
     {
       "kind": "enum",
@@ -175044,7 +175044,7 @@
         "name": "JinaAITextEmbeddingTask",
         "namespace": "inference._types"
       },
-      "specLocation": "inference/_types/CommonTypes.ts#L1676-L1681"
+      "specLocation": "inference/_types/CommonTypes.ts#L1678-L1683"
     },
     {
       "kind": "interface",
@@ -175116,7 +175116,7 @@
           }
         }
       ],
-      "specLocation": "inference/_types/CommonTypes.ts#L1683-L1713"
+      "specLocation": "inference/_types/CommonTypes.ts#L1685-L1715"
     },
     {
       "kind": "enum",
@@ -175129,7 +175129,7 @@
         "name": "LlamaServiceType",
         "namespace": "inference._types"
       },
-      "specLocation": "inference/_types/CommonTypes.ts#L1721-L1723"
+      "specLocation": "inference/_types/CommonTypes.ts#L1723-L1725"
     },
     {
       "kind": "enum",
@@ -175148,7 +175148,7 @@
         "name": "LlamaSimilarityType",
         "namespace": "inference._types"
       },
-      "specLocation": "inference/_types/CommonTypes.ts#L1725-L1729"
+      "specLocation": "inference/_types/CommonTypes.ts#L1727-L1731"
     },
     {
       "kind": "enum",
@@ -175167,7 +175167,7 @@
         "name": "LlamaTaskType",
         "namespace": "inference._types"
       },
-      "specLocation": "inference/_types/CommonTypes.ts#L1715-L1719"
+      "specLocation": "inference/_types/CommonTypes.ts#L1717-L1721"
     },
     {
       "kind": "interface",
@@ -175325,7 +175325,7 @@
           }
         }
       ],
-      "specLocation": "inference/_types/CommonTypes.ts#L1731-L1758"
+      "specLocation": "inference/_types/CommonTypes.ts#L1733-L1760"
     },
     {
       "kind": "enum",
@@ -175338,7 +175338,7 @@
         "name": "MistralServiceType",
         "namespace": "inference._types"
       },
-      "specLocation": "inference/_types/CommonTypes.ts#L1766-L1768"
+      "specLocation": "inference/_types/CommonTypes.ts#L1768-L1770"
     },
     {
       "kind": "enum",
@@ -175357,7 +175357,7 @@
         "name": "MistralTaskType",
         "namespace": "inference._types"
       },
-      "specLocation": "inference/_types/CommonTypes.ts#L1760-L1764"
+      "specLocation": "inference/_types/CommonTypes.ts#L1762-L1766"
     },
     {
       "kind": "interface",
@@ -175444,7 +175444,7 @@
           }
         }
       ],
-      "specLocation": "inference/_types/CommonTypes.ts#L1770-L1812"
+      "specLocation": "inference/_types/CommonTypes.ts#L1772-L1814"
     },
     {
       "kind": "enum",
@@ -175457,7 +175457,7 @@
         "name": "OpenAIServiceType",
         "namespace": "inference._types"
       },
-      "specLocation": "inference/_types/CommonTypes.ts#L1839-L1841"
+      "specLocation": "inference/_types/CommonTypes.ts#L1841-L1843"
     },
     {
       "kind": "interface",
@@ -175487,7 +175487,7 @@
           }
         }
       ],
-      "specLocation": "inference/_types/CommonTypes.ts#L1814-L1831"
+      "specLocation": "inference/_types/CommonTypes.ts#L1816-L1833"
     },
     {
       "kind": "enum",
@@ -175506,7 +175506,7 @@
         "name": "OpenAITaskType",
         "namespace": "inference._types"
       },
-      "specLocation": "inference/_types/CommonTypes.ts#L1833-L1837"
+      "specLocation": "inference/_types/CommonTypes.ts#L1835-L1839"
     },
     {
       "kind": "interface",
@@ -176352,7 +176352,7 @@
           }
         }
       ],
-      "specLocation": "inference/_types/CommonTypes.ts#L1532-L1537"
+      "specLocation": "inference/_types/CommonTypes.ts#L1534-L1539"
     },
     {
       "kind": "interface",
@@ -176498,7 +176498,7 @@
           }
         }
       ],
-      "specLocation": "inference/_types/CommonTypes.ts#L1843-L1874"
+      "specLocation": "inference/_types/CommonTypes.ts#L1845-L1876"
     },
     {
       "kind": "enum",
@@ -176511,7 +176511,7 @@
         "name": "VoyageAIServiceType",
         "namespace": "inference._types"
       },
-      "specLocation": "inference/_types/CommonTypes.ts#L1907-L1909"
+      "specLocation": "inference/_types/CommonTypes.ts#L1909-L1911"
     },
     {
       "kind": "interface",
@@ -176571,7 +176571,7 @@
           }
         }
       ],
-      "specLocation": "inference/_types/CommonTypes.ts#L1876-L1900"
+      "specLocation": "inference/_types/CommonTypes.ts#L1878-L1902"
     },
     {
       "kind": "enum",
@@ -176587,7 +176587,7 @@
         "name": "VoyageAITaskType",
         "namespace": "inference._types"
       },
-      "specLocation": "inference/_types/CommonTypes.ts#L1902-L1905"
+      "specLocation": "inference/_types/CommonTypes.ts#L1904-L1907"
     },
     {
       "kind": "interface",
@@ -176675,7 +176675,7 @@
           }
         }
       ],
-      "specLocation": "inference/_types/CommonTypes.ts#L1911-L1949"
+      "specLocation": "inference/_types/CommonTypes.ts#L1913-L1951"
     },
     {
       "kind": "enum",
@@ -176688,7 +176688,7 @@
         "name": "WatsonxServiceType",
         "namespace": "inference._types"
       },
-      "specLocation": "inference/_types/CommonTypes.ts#L1957-L1959"
+      "specLocation": "inference/_types/CommonTypes.ts#L1959-L1961"
     },
     {
       "kind": "enum",
@@ -176707,7 +176707,7 @@
         "name": "WatsonxTaskType",
         "namespace": "inference._types"
       },
-      "specLocation": "inference/_types/CommonTypes.ts#L1951-L1955"
+      "specLocation": "inference/_types/CommonTypes.ts#L1953-L1957"
     },
     {
       "kind": "request",
diff --git a/specification/inference/_types/CommonTypes.ts b/specification/inference/_types/CommonTypes.ts
index 91cb66d192..645cc3d275 100644
--- a/specification/inference/_types/CommonTypes.ts
+++ b/specification/inference/_types/CommonTypes.ts
@@ -1445,6 +1445,7 @@ export class GoogleVertexAIServiceSettings {
    * If `streaming_url` is not provided, `url` is also used for streaming `completion` and `chat_completion`.
    * If `provider` is not provided or set to `google` (Google Vertex AI), do not set `url` (or `streaming_url`).
    * At least one of `url` or `streaming_url` must be provided for Google Model Garden endpoint usage.
+   * Certain providers require separate URLs for streaming and non-streaming operations (e.g., Anthropic, Mistral, AI21). Others support both operation types through a single URL (e.g., Meta, Hugging Face).
    */
   url?: string
   /**
@@ -1453,6 +1454,7 @@ export class GoogleVertexAIServiceSettings {
    * If `url` is not provided, `streaming_url` is also used for non-streaming `completion` requests.
    * If `provider` is not provided or set to `google` (Google Vertex AI), do not set `streaming_url` (or `url`).
    * At least one of `streaming_url` or `url` must be provided for Google Model Garden endpoint usage.
+   * Certain providers require separate URLs for streaming and non-streaming operations (e.g., Anthropic, Mistral, AI21). Others support both operation types through a single URL (e.g., Meta, Hugging Face).
    */
   streaming_url?: string
   /**

From 2c4bcb59c42b30f51d3a2d5dbd9071404838c249 Mon Sep 17 00:00:00 2001
From: Jan Kazlouski <jan.kazlouski@elastic.co>
Date: Wed, 29 Oct 2025 12:22:26 +0000
Subject: [PATCH 4/4] Add examples for chat_completion and completion tasks
 using various Google Model Garden providers with updated URL formats

---
 output/openapi/elasticsearch-openapi.json     | 114 +++++++---
 .../elasticsearch-serverless-openapi.json     | 114 +++++++---
 output/schema/schema.json                     | 204 +++++++++++-------
 specification/inference/_types/CommonTypes.ts |   2 +
 .../PutGoogleVertexAiRequestExample10.yaml    |  11 +-
 .../PutGoogleVertexAiRequestExample11.yaml    |  11 +-
 .../PutGoogleVertexAiRequestExample12.yaml    |  10 +-
 .../PutGoogleVertexAiRequestExample13.yaml    |  13 ++
 .../PutGoogleVertexAiRequestExample14.yaml    |  13 ++
 .../PutGoogleVertexAiRequestExample15.yaml    |  15 ++
 .../PutGoogleVertexAiRequestExample16.yaml    |  14 ++
 .../PutGoogleVertexAiRequestExample17.yaml    |  13 ++
 .../PutGoogleVertexAiRequestExample18.yaml    |  13 ++
 .../PutGoogleVertexAiRequestExample19.yaml    |  13 ++
 .../PutGoogleVertexAiRequestExample20.yaml    |  13 ++
 .../PutGoogleVertexAiRequestExample21.yaml    |  14 ++
 .../PutGoogleVertexAiRequestExample22.yaml    |  13 ++
 .../PutGoogleVertexAiRequestExample3.yaml     |   8 +-
 .../PutGoogleVertexAiRequestExample4.yaml     |   6 +-
 .../PutGoogleVertexAiRequestExample5.yaml     |   6 +-
 .../PutGoogleVertexAiRequestExample6.yaml     |   6 +-
 .../PutGoogleVertexAiRequestExample7.yaml     |  10 +-
 .../PutGoogleVertexAiRequestExample8.yaml     |  10 +-
 .../PutGoogleVertexAiRequestExample9.yaml     |  12 +-
 24 files changed, 475 insertions(+), 183 deletions(-)
 create mode 100644 specification/inference/put_googlevertexai/examples/request/PutGoogleVertexAiRequestExample13.yaml
 create mode 100644 specification/inference/put_googlevertexai/examples/request/PutGoogleVertexAiRequestExample14.yaml
 create mode 100644 specification/inference/put_googlevertexai/examples/request/PutGoogleVertexAiRequestExample15.yaml
 create mode 100644 specification/inference/put_googlevertexai/examples/request/PutGoogleVertexAiRequestExample16.yaml
 create mode 100644 specification/inference/put_googlevertexai/examples/request/PutGoogleVertexAiRequestExample17.yaml
 create mode 100644 specification/inference/put_googlevertexai/examples/request/PutGoogleVertexAiRequestExample18.yaml
 create mode 100644 specification/inference/put_googlevertexai/examples/request/PutGoogleVertexAiRequestExample19.yaml
 create mode 100644 specification/inference/put_googlevertexai/examples/request/PutGoogleVertexAiRequestExample20.yaml
 create mode 100644 specification/inference/put_googlevertexai/examples/request/PutGoogleVertexAiRequestExample21.yaml
 create mode 100644 specification/inference/put_googlevertexai/examples/request/PutGoogleVertexAiRequestExample22.yaml

diff --git a/output/openapi/elasticsearch-openapi.json b/output/openapi/elasticsearch-openapi.json
index 92eba80f75..f0fa323701 100644
--- a/output/openapi/elasticsearch-openapi.json
+++ b/output/openapi/elasticsearch-openapi.json
@@ -22777,59 +22777,109 @@
                   "value": "{\n    \"service\": \"googlevertexai\",\n    \"service_settings\": {\n        \"service_account_json\": \"service-account-json\",\n        \"model_id\": \"model-id\",\n        \"location\": \"location\",\n        \"project_id\": \"project-id\"\n    }\n}"
                 },
                 "PutGoogleVertexAiRequestExample10": {
-                  "summary": "A chat_completion task for Google Model Garden Mistral endpoint with single streaming URL provided",
-                  "description": "Run `PUT _inference/chat_completion/google_model_garden_mistral_chat_completion` to create an inference endpoint to perform a `chat_completion` task using Mistral's model hosted on Google Model Garden with single streaming URL provided.",
-                  "value": "{\n    \"service\": \"googlevertexai\",\n    \"service_settings\": {\n        \"provider\": \"mistral\",\n        \"model_id\": \"mistral-small-2503\",\n        \"service_account_json\": \"service-account-json\",\n        \"streaming_url\": \"https://url:streamRawPredict\"\n    }\n}"
+                  "summary": "A chat_completion task for Google Model Garden Meta shared endpoint with single streaming URL provided",
+                  "description": "Run `PUT _inference/chat_completion/google_model_garden_meta_chat_completion` to create an inference endpoint to perform a `chat_completion` task using Meta's model hosted on Google Model Garden shared endpoint with single streaming URL provided. See the endpoint's `Sample request` page for the variable values used in the URL.",
+                  "value": "{\n    \"service\": \"googlevertexai\",\n    \"service_settings\": {\n        \"provider\": \"meta\",\n        \"service_account_json\": \"service-account-json\",\n        \"streaming_url\": \"https://%LOCATION_ID%-aiplatform.googleapis.com/v1/projects/%PROJECT_ID%/locations/%LOCATION_ID%/endpoints/%ENDPOINT_ID%/chat/completions\"\n    }\n}"
                 },
                 "PutGoogleVertexAiRequestExample11": {
-                  "summary": "A completion task for Google Model Garden AI21 endpoint with separate URLs for streaming and non-streaming tasks",
-                  "description": "Run `PUT _inference/completion/google_model_garden_ai21_completion` to create an inference endpoint to perform a `completion` task using AI21's model hosted on Google Model Garden with separate URLs for streaming and non-streaming tasks.",
-                  "value": "{\n    \"service\": \"googlevertexai\",\n    \"service_settings\": {\n        \"provider\": \"ai21\",\n        \"service_account_json\": \"service-account-json\",\n        \"url\": \"https://url:rawPredict\",\n        \"streaming_url\": \"https://url:streamRawPredict\"\n    }\n}"
+                  "summary": "A completion task for Google Model Garden Hugging Face dedicated endpoint with single URL provided for both streaming and non-streaming tasks",
+                  "description": "Run `PUT _inference/completion/google_model_garden_hugging_face_completion` to create an inference endpoint to perform a `completion` task using Hugging Face's model hosted on Google Model Garden dedicated endpoint with single URL provided for both streaming and non-streaming tasks. See the endpoint's `Sample request` page for the variable values used in the URL.",
+                  "value": "{\n    \"service\": \"googlevertexai\",\n    \"service_settings\": {\n        \"provider\": \"hugging_face\",\n        \"service_account_json\": \"service-account-json\",\n        \"url\": \"https://%ENDPOINT_ID%.%LOCATION_ID%-%PROJECT_ID%.prediction.vertexai.goog/v1/projects/%PROJECT_ID%/locations/%LOCATION_ID%/endpoints/%ENDPOINT_ID%/chat/completions\"\n    }\n}"
                 },
                 "PutGoogleVertexAiRequestExample12": {
-                  "summary": "A chat_completion task for Google Model Garden AI21 endpoint with single streaming URL provided",
-                  "description": "Run `PUT _inference/chat_completion/google_model_garden_ai21_chat_completion` to create an inference endpoint to perform a `chat_completion` task using AI21's model hosted on Google Model Garden with single streaming URL provided.",
-                  "value": "{\n    \"service\": \"googlevertexai\",\n    \"service_settings\": {\n        \"provider\": \"ai21\",\n        \"service_account_json\": \"service-account-json\",\n        \"streaming_url\": \"https://url:streamRawPredict\"\n    }\n}"
+                  "summary": "A chat_completion task for Google Model Garden Hugging Face dedicated endpoint with single streaming URL provided",
+                  "description": "Run `PUT _inference/chat_completion/google_model_garden_hugging_face_chat_completion` to create an inference endpoint to perform a `chat_completion` task using Hugging Face's model hosted on Google Model Garden dedicated endpoint with single streaming URL provided. See the endpoint's `Sample request` page for the variable values used in the URL.",
+                  "value": "{\n    \"service\": \"googlevertexai\",\n    \"service_settings\": {\n        \"provider\": \"hugging_face\",\n        \"service_account_json\": \"service-account-json\",\n        \"streaming_url\": \"https://%ENDPOINT_ID%.%LOCATION_ID%-%PROJECT_ID%.prediction.vertexai.goog/v1/projects/%PROJECT_ID%/locations/%LOCATION_ID%/endpoints/%ENDPOINT_ID%/chat/completions\"\n    }\n}"
+                },
+                "PutGoogleVertexAiRequestExample13": {
+                  "summary": "A completion task for Google Model Garden Hugging Face shared endpoint with single URL provided for both streaming and non-streaming tasks",
+                  "description": "Run `PUT _inference/completion/google_model_garden_hugging_face_completion` to create an inference endpoint to perform a `completion` task using Hugging Face's model hosted on Google Model Garden shared endpoint with single URL provided for both streaming and non-streaming tasks. See the endpoint's `Sample request` page for the variable values used in the URL.",
+                  "value": "{\n    \"service\": \"googlevertexai\",\n    \"service_settings\": {\n        \"provider\": \"hugging_face\",\n        \"service_account_json\": \"service-account-json\",\n        \"url\": \"https://%LOCATION_ID%-aiplatform.googleapis.com/v1/projects/%PROJECT_ID%/locations/%LOCATION_ID%/endpoints/%ENDPOINT_ID%/chat/completions\"\n    }\n}"
+                },
+                "PutGoogleVertexAiRequestExample14": {
+                  "summary": "A chat_completion task for Google Model Garden Hugging Face shared endpoint with single streaming URL provided",
+                  "description": "Run `PUT _inference/chat_completion/google_model_garden_hugging_face_chat_completion` to create an inference endpoint to perform a `chat_completion` task using Hugging Face's model hosted on Google Model Garden shared endpoint with single streaming URL provided. See the endpoint's `Sample request` page for the variable values used in the URL.",
+                  "value": "{\n    \"service\": \"googlevertexai\",\n    \"service_settings\": {\n        \"provider\": \"hugging_face\",\n        \"service_account_json\": \"service-account-json\",\n        \"streaming_url\": \"https://%LOCATION_ID%-aiplatform.googleapis.com/v1/projects/%PROJECT_ID%/locations/%LOCATION_ID%/endpoints/%ENDPOINT_ID%/chat/completions\"\n    }\n}"
+                },
+                "PutGoogleVertexAiRequestExample15": {
+                  "summary": "A completion task for Google Model Garden Mistral serverless endpoint with separate URLs for streaming and non-streaming tasks",
+                  "description": "Run `PUT _inference/completion/google_model_garden_mistral_completion` to create an inference endpoint to perform a `completion` task using Mistral's serverless model hosted on Google Model Garden with separate URLs for streaming and non-streaming tasks. See the Mistral model documentation for instructions on how to construct URLs.",
+                  "value": "{\n    \"service\": \"googlevertexai\",\n    \"service_settings\": {\n        \"provider\": \"mistral\",\n        \"model_id\": \"mistral-small-2503\",\n        \"service_account_json\": \"service-account-json\",\n        \"url\": \"https://%LOCATION_ID%-aiplatform.googleapis.com/v1/projects/%PROJECT_ID%/locations/%LOCATION_ID%/publishers/mistralai/models/%MODEL_ID%:rawPredict\",\n        \"streaming_url\": \"https://%LOCATION_ID%-aiplatform.googleapis.com/v1/projects/%PROJECT_ID%/locations/%LOCATION_ID%/publishers/mistralai/models/%MODEL_ID%:streamRawPredict\"\n    }\n}"
+                },
+                "PutGoogleVertexAiRequestExample16": {
+                  "summary": "A chat_completion task for Google Model Garden Mistral serverless endpoint with single streaming URL provided",
+                  "description": "Run `PUT _inference/chat_completion/google_model_garden_mistral_chat_completion` to create an inference endpoint to perform a `chat_completion` task using Mistral's serverless model hosted on Google Model Garden with single streaming URL provided. See the Mistral model documentation for instructions on how to construct the URL.",
+                  "value": "{\n    \"service\": \"googlevertexai\",\n    \"service_settings\": {\n        \"provider\": \"mistral\",\n        \"model_id\": \"mistral-small-2503\",\n        \"service_account_json\": \"service-account-json\",\n        \"streaming_url\": \"https://%LOCATION_ID%-aiplatform.googleapis.com/v1/projects/%PROJECT_ID%/locations/%LOCATION_ID%/publishers/mistralai/models/%MODEL_ID%:streamRawPredict\"\n    }\n}"
+                },
+                "PutGoogleVertexAiRequestExample17": {
+                  "summary": "A completion task for Google Model Garden Mistral dedicated endpoint with single URL provided for both streaming and non-streaming tasks",
+                  "description": "Run `PUT _inference/completion/google_model_garden_mistral_completion` to create an inference endpoint to perform a `completion` task using Mistral's model hosted on Google Model Garden dedicated endpoint with single URL provided for both streaming and non-streaming tasks. See the endpoint's `Sample request` page for the variable values used in the URL.",
+                  "value": "{\n    \"service\": \"googlevertexai\",\n    \"service_settings\": {\n        \"provider\": \"mistral\",\n        \"service_account_json\": \"service-account-json\",\n        \"url\": \"https://%ENDPOINT_ID%.%LOCATION_ID%-%PROJECT_ID%.prediction.vertexai.goog/v1/projects/%PROJECT_ID%/locations/%LOCATION_ID%/endpoints/%ENDPOINT_ID%/chat/completions\"\n    }\n}"
+                },
+                "PutGoogleVertexAiRequestExample18": {
+                  "summary": "A chat_completion task for Google Model Garden Mistral dedicated endpoint with single streaming URL provided",
+                  "description": "Run `PUT _inference/chat_completion/google_model_garden_mistral_chat_completion` to create an inference endpoint to perform a `chat_completion` task using Mistral's model hosted on Google Model Garden dedicated endpoint with single streaming URL provided. See the endpoint's `Sample request` page for the variable values used in the URL.",
+                  "value": "{\n    \"service\": \"googlevertexai\",\n    \"service_settings\": {\n        \"provider\": \"mistral\",\n        \"service_account_json\": \"service-account-json\",\n        \"streaming_url\": \"https://%ENDPOINT_ID%.%LOCATION_ID%-%PROJECT_ID%.prediction.vertexai.goog/v1/projects/%PROJECT_ID%/locations/%LOCATION_ID%/endpoints/%ENDPOINT_ID%/chat/completions\"\n    }\n}"
+                },
+                "PutGoogleVertexAiRequestExample19": {
+                  "summary": "A completion task for Google Model Garden Mistral shared endpoint with single URL provided for both streaming and non-streaming tasks",
+                  "description": "Run `PUT _inference/completion/google_model_garden_mistral_completion` to create an inference endpoint to perform a `completion` task using Mistral's model hosted on Google Model Garden shared endpoint with single URL provided for both streaming and non-streaming tasks. See the endpoint's `Sample request` page for the variable values used in the URL.",
+                  "value": "{\n    \"service\": \"googlevertexai\",\n    \"service_settings\": {\n        \"provider\": \"mistral\",\n        \"service_account_json\": \"service-account-json\",\n        \"url\": \"https://%LOCATION_ID%-aiplatform.googleapis.com/v1/projects/%PROJECT_ID%/locations/%LOCATION_ID%/endpoints/%ENDPOINT_ID%/chat/completions\"\n    }\n}"
                 },
                 "PutGoogleVertexAiRequestExample2": {
                   "summary": "A rerank task",
                   "description": "Run `PUT _inference/rerank/google_vertex_ai_rerank` to create an inference endpoint to perform a `rerank` task type.",
                   "value": "{\n    \"service\": \"googlevertexai\",\n    \"service_settings\": {\n        \"service_account_json\": \"service-account-json\",\n        \"project_id\": \"project-id\"\n    }\n}"
                 },
+                "PutGoogleVertexAiRequestExample20": {
+                  "summary": "A chat_completion task for Google Model Garden Mistral shared endpoint with single streaming URL provided",
+                  "description": "Run `PUT _inference/chat_completion/google_model_garden_mistral_chat_completion` to create an inference endpoint to perform a `chat_completion` task using Mistral's model hosted on Google Model Garden shared endpoint with single streaming URL provided. See the endpoint's `Sample request` page for the variable values used in the URL.",
+                  "value": "{\n    \"service\": \"googlevertexai\",\n    \"service_settings\": {\n        \"provider\": \"mistral\",\n        \"service_account_json\": \"service-account-json\",\n        \"streaming_url\": \"https://%LOCATION_ID%-aiplatform.googleapis.com/v1/projects/%PROJECT_ID%/locations/%LOCATION_ID%/endpoints/%ENDPOINT_ID%/chat/completions\"\n    }\n}"
+                },
+                "PutGoogleVertexAiRequestExample21": {
+                  "summary": "A completion task for Google Model Garden AI21 serverless endpoint with separate URLs for streaming and non-streaming tasks",
+                  "description": "Run `PUT _inference/completion/google_model_garden_ai21_completion` to create an inference endpoint to perform a `completion` task using AI21's model hosted on Google Model Garden serverless endpoint with separate URLs for streaming and non-streaming tasks. See the AI21 model documentation for instructions on how to construct URLs.",
+                  "value": "{\n    \"service\": \"googlevertexai\",\n    \"service_settings\": {\n        \"provider\": \"ai21\",\n        \"service_account_json\": \"service-account-json\",\n        \"url\": \"https://%LOCATION_ID%-aiplatform.googleapis.com/v1/projects/%PROJECT_ID%/locations/%LOCATION_ID%/publishers/ai21/models/%MODEL_ID%:rawPredict\",\n        \"streaming_url\": \"https://%LOCATION_ID%-aiplatform.googleapis.com/v1/projects/%PROJECT_ID%/locations/%LOCATION_ID%/publishers/ai21/models/%MODEL_ID%:streamRawPredict\"\n    }\n}"
+                },
+                "PutGoogleVertexAiRequestExample22": {
+                  "summary": "A chat_completion task for Google Model Garden AI21 serverless endpoint with single streaming URL provided",
+                  "description": "Run `PUT _inference/chat_completion/google_model_garden_ai21_chat_completion` to create an inference endpoint to perform a `chat_completion` task using AI21's model hosted on Google Model Garden serverless endpoint with single streaming URL provided. See the AI21 model documentation for instructions on how to construct URLs.",
+                  "value": "{\n    \"service\": \"googlevertexai\",\n    \"service_settings\": {\n        \"provider\": \"ai21\",\n        \"service_account_json\": \"service-account-json\",\n        \"streaming_url\": \"https://%LOCATION_ID%-aiplatform.googleapis.com/v1/projects/%PROJECT_ID%/locations/%LOCATION_ID%/publishers/ai21/models/%MODEL_ID%:streamRawPredict\"\n    }\n}"
+                },
                 "PutGoogleVertexAiRequestExample3": {
-                  "summary": "A completion task for Google Model Garden Anthropic endpoint with separate URLs for streaming and non-streaming tasks",
-                  "description": "Run `PUT _inference/completion/google_model_garden_anthropic_completion` to create an inference endpoint to perform a `completion` task using Anthropic's model hosted on Google Model Garden with separate URLs for streaming and non-streaming tasks.",
-                  "value": "{\n    \"service\": \"googlevertexai\",\n    \"service_settings\": {\n        \"provider\": \"anthropic\",\n        \"service_account_json\": \"service-account-json\",\n        \"url\": \"https://url:rawPredict\",\n        \"streaming_url\": \"https://streaming_url:streamRawPredict\"\n    },\n    \"task_settings\": {\n        \"max_tokens\": 128\n    }\n}"
+                  "summary": "A completion task for Google Model Garden Anthropic serverless endpoint with separate URLs for streaming and non-streaming tasks",
+                  "description": "Run `PUT _inference/completion/google_model_garden_anthropic_completion` to create an inference endpoint to perform a `completion` task using Anthropic's serverless model hosted on Google Model Garden with separate URLs for streaming and non-streaming tasks. See the Anthropic model documentation for instructions on how to construct URLs.",
+                  "value": "{\n    \"service\": \"googlevertexai\",\n    \"service_settings\": {\n        \"provider\": \"anthropic\",\n        \"service_account_json\": \"service-account-json\",\n        \"url\": \"https://%LOCATION_ID%-aiplatform.googleapis.com/v1/projects/%PROJECT_ID%/locations/%LOCATION_ID%/publishers/anthropic/models/%MODEL_ID%:rawPredict\",\n        \"streaming_url\": \"https://%LOCATION_ID%-aiplatform.googleapis.com/v1/projects/%PROJECT_ID%/locations/%LOCATION_ID%/publishers/anthropic/models/%MODEL_ID%:streamRawPredict\"\n    },\n    \"task_settings\": {\n        \"max_tokens\": 128\n    }\n}"
                 },
                 "PutGoogleVertexAiRequestExample4": {
-                  "summary": "A chat_completion task for Google Model Garden Anthropic endpoint with single streaming URL provided",
-                  "description": "Run `PUT _inference/chat_completion/google_model_garden_anthropic_chat_completion` to create an inference endpoint to perform a `chat_completion` task using Anthropic's model hosted on Google Model Garden with single streaming URL provided.",
-                  "value": "{\n    \"service\": \"googlevertexai\",\n    \"service_settings\": {\n        \"provider\": \"anthropic\",\n        \"service_account_json\": \"service-account-json\",\n        \"streaming_url\": \"https://streaming_url:streamRawPredict\"\n    },\n    \"task_settings\": {\n        \"max_tokens\": 128\n    }\n}"
+                  "summary": "A chat_completion task for Google Model Garden Anthropic serverless endpoint with single streaming URL provided",
+                  "description": "Run `PUT _inference/chat_completion/google_model_garden_anthropic_chat_completion` to create an inference endpoint to perform a `chat_completion` task using Anthropic's serverless model hosted on Google Model Garden with single streaming URL provided. See the Anthropic model documentation for instructions on how to construct the URL.",
+                  "value": "{\n    \"service\": \"googlevertexai\",\n    \"service_settings\": {\n        \"provider\": \"anthropic\",\n        \"service_account_json\": \"service-account-json\",\n        \"streaming_url\": \"https://%LOCATION_ID%-aiplatform.googleapis.com/v1/projects/%PROJECT_ID%/locations/%LOCATION_ID%/publishers/anthropic/models/%MODEL_ID%:streamRawPredict\"\n    },\n    \"task_settings\": {\n        \"max_tokens\": 128\n    }\n}"
                 },
                 "PutGoogleVertexAiRequestExample5": {
-                  "summary": "A completion task for Google Model Garden Meta endpoint with single URL provided for both streaming and non-streaming tasks",
-                  "description": "Run `PUT _inference/completion/google_model_garden_meta_completion` to create an inference endpoint to perform a `completion` task using Meta's model hosted on Google Model Garden with single URL provided for both streaming and non-streaming tasks.",
-                  "value": "{\n    \"service\": \"googlevertexai\",\n    \"service_settings\": {\n        \"provider\": \"meta\",\n        \"model_id\": \"meta/llama-3.3-70b-instruct-maas\",\n        \"service_account_json\": \"service-account-json\",\n        \"url\": \"https://url/openapi/chat/completions\"\n    }\n}"
+                  "summary": "A completion task for Google Model Garden Meta serverless endpoint with single URL provided for both streaming and non-streaming tasks",
+                  "description": "Run `PUT _inference/completion/google_model_garden_meta_completion` to create an inference endpoint to perform a `completion` task using Meta's serverless model hosted on Google Model Garden with single URL provided for both streaming and non-streaming tasks. See the Meta model documentation for instructions on how to construct the URL.",
+                  "value": "{\n    \"service\": \"googlevertexai\",\n    \"service_settings\": {\n        \"provider\": \"meta\",\n        \"model_id\": \"meta/llama-3.3-70b-instruct-maas\",\n        \"service_account_json\": \"service-account-json\",\n        \"url\": \"https://%LOCATION_ID%-aiplatform.googleapis.com/v1/projects/%PROJECT_ID%/locations/%LOCATION_ID%/endpoints/openapi/chat/completions\"\n    }\n}"
                 },
                 "PutGoogleVertexAiRequestExample6": {
-                  "summary": "A chat_completion task for Google Model Garden Meta endpoint with single streaming URL provided",
-                  "description": "Run `PUT _inference/chat_completion/google_model_garden_meta_chat_completion` to create an inference endpoint to perform a `chat_completion` task using Meta's model hosted on Google Model Garden with single streaming URL provided.",
-                  "value": "{\n    \"service\": \"googlevertexai\",\n    \"service_settings\": {\n        \"provider\": \"meta\",\n        \"model_id\": \"meta/llama-3.3-70b-instruct-maas\",\n        \"service_account_json\": \"service-account-json\",\n        \"streaming_url\": \"https://url/openapi/chat/completions\"\n    }\n}"
+                  "summary": "A chat_completion task for Google Model Garden Meta serverless endpoint with single streaming URL provided",
+                  "description": "Run `PUT _inference/chat_completion/google_model_garden_meta_chat_completion` to create an inference endpoint to perform a `chat_completion` task using Meta's serverless model hosted on Google Model Garden with single streaming URL provided. See the Meta model documentation for instructions on how to construct the URL.",
+                  "value": "{\n    \"service\": \"googlevertexai\",\n    \"service_settings\": {\n        \"provider\": \"meta\",\n        \"model_id\": \"meta/llama-3.3-70b-instruct-maas\",\n        \"service_account_json\": \"service-account-json\",\n        \"streaming_url\": \"https://%LOCATION_ID%-aiplatform.googleapis.com/v1/projects/%PROJECT_ID%/locations/%LOCATION_ID%/endpoints/openapi/chat/completions\"\n    }\n}"
                 },
                 "PutGoogleVertexAiRequestExample7": {
-                  "summary": "A completion task for Google Model Garden Hugging Face endpoint with single URL provided for both streaming and non-streaming tasks",
-                  "description": "Run `PUT _inference/completion/google_model_garden_hugging_face_completion` to create an inference endpoint to perform a `completion` task using Hugging Face's model hosted on Google Model Garden with single URL provided for both streaming and non-streaming tasks.",
-                  "value": "{\n    \"service\": \"googlevertexai\",\n    \"service_settings\": {\n        \"provider\": \"hugging_face\",\n        \"service_account_json\": \"service-account-json\",\n        \"url\": \"https://url/chat/completions\"\n    }\n}"
+                  "summary": "A completion task for Google Model Garden Meta dedicated endpoint with single URL provided for both streaming and non-streaming tasks",
+                  "description": "Run `PUT _inference/completion/google_model_garden_meta_completion` to create an inference endpoint to perform a `completion` task using Meta's model hosted on Google Model Garden dedicated endpoint with single URL provided for both streaming and non-streaming tasks. See the endpoint's `Sample request` page for the variable values used in the URL.",
+                  "value": "{\n    \"service\": \"googlevertexai\",\n    \"service_settings\": {\n        \"provider\": \"meta\",\n        \"service_account_json\": \"service-account-json\",\n        \"url\": \"https://%ENDPOINT_ID%.%LOCATION_ID%-fasttryout.prediction.vertexai.goog/v1/projects/%PROJECT_ID%/locations/%LOCATION_ID%/endpoints/%ENDPOINT_ID%/chat/completions\"\n    }\n}"
                 },
                 "PutGoogleVertexAiRequestExample8": {
-                  "summary": "A chat_completion task for Google Model Garden Hugging Face endpoint with single streaming URL provided",
-                  "description": "Run `PUT _inference/chat_completion/google_model_garden_hugging_face_chat_completion` to create an inference endpoint to perform a `chat_completion` task using Hugging Face's model hosted on Google Model Garden with single streaming URL provided.",
-                  "value": "{\n    \"service\": \"googlevertexai\",\n    \"service_settings\": {\n        \"provider\": \"hugging_face\",\n        \"service_account_json\": \"service-account-json\",\n        \"streaming_url\": \"https://url/chat/completions\"\n    }\n}"
+                  "summary": "A chat_completion task for Google Model Garden Meta dedicated endpoint with single streaming URL provided",
+                  "description": "Run `PUT _inference/chat_completion/google_model_garden_meta_chat_completion` to create an inference endpoint to perform a `chat_completion` task using Meta's model hosted on Google Model Garden dedicated endpoint with single streaming URL provided. See the endpoint's `Sample request` page for the variable values used in the URL.",
+                  "value": "{\n    \"service\": \"googlevertexai\",\n    \"service_settings\": {\n        \"provider\": \"meta\",\n        \"service_account_json\": \"service-account-json\",\n        \"streaming_url\": \"https://%ENDPOINT_ID%.%LOCATION_ID%-fasttryout.prediction.vertexai.goog/v1/projects/%PROJECT_ID%/locations/%LOCATION_ID%/endpoints/%ENDPOINT_ID%/chat/completions\"\n    }\n}"
                 },
                 "PutGoogleVertexAiRequestExample9": {
-                  "summary": "A completion task for Google Model Garden Mistral endpoint with separate URLs for streaming and non-streaming tasks",
-                  "description": "Run `PUT _inference/completion/google_model_garden_mistral_completion` to create an inference endpoint to perform a `completion` task using Mistral's model hosted on Google Model Garden with separate URLs for streaming and non-streaming tasks.",
-                  "value": "{\n    \"service\": \"googlevertexai\",\n    \"service_settings\": {\n        \"provider\": \"mistral\",\n        \"model_id\": \"mistral-small-2503\",\n        \"service_account_json\": \"service-account-json\",\n        \"url\": \"https://url:rawPredict\",\n        \"streaming_url\": \"https://url:streamRawPredict\"\n    }\n}"
+                  "summary": "A completion task for Google Model Garden Meta shared endpoint with single URL provided for both streaming and non-streaming tasks",
+                  "description": "Run `PUT _inference/completion/google_model_garden_meta_completion` to create an inference endpoint to perform a `completion` task using Meta's model hosted on Google Model Garden shared endpoint with single URL provided for both streaming and non-streaming tasks. See the endpoint's `Sample request` page for the variable values used in the URL.",
+                  "value": "{\n    \"service\": \"googlevertexai\",\n    \"service_settings\": {\n        \"provider\": \"meta\",\n        \"service_account_json\": \"service-account-json\",\n        \"url\": \"https://%LOCATION_ID%-aiplatform.googleapis.com/v1/projects/%PROJECT_ID%/locations/%LOCATION_ID%/endpoints/%ENDPOINT_ID%/chat/completions\"\n    }\n}"
                 }
               }
             }
@@ -103077,11 +103127,11 @@
             ]
           },
           "url": {
-            "description": "The URL for non-streaming `completion` requests to a Google Model Garden provider endpoint.\nIf both `url` and `streaming_url` are provided, each is used for its respective mode.\nIf `streaming_url` is not provided, `url` is also used for streaming `completion` and `chat_completion`.\nIf `provider` is not provided or set to `google` (Google Vertex AI), do not set `url` (or `streaming_url`).\nAt least one of `url` or `streaming_url` must be provided for Google Model Garden endpoint usage.\nCertain providers require separate URLs for streaming and non-streaming operations (e.g., Anthropic, Mistral, AI21). Others support both operation types through a single URL (e.g., Meta, Hugging Face).",
+            "description": "The URL for non-streaming `completion` requests to a Google Model Garden provider endpoint.\nIf both `url` and `streaming_url` are provided, each is used for its respective mode.\nIf `streaming_url` is not provided, `url` is also used for streaming `completion` and `chat_completion`.\nIf `provider` is not provided or set to `google` (Google Vertex AI), do not set `url` (or `streaming_url`).\nAt least one of `url` or `streaming_url` must be provided for Google Model Garden endpoint usage.\nCertain providers require separate URLs for streaming and non-streaming operations (e.g., Anthropic, Mistral, AI21). Others support both operation types through a single URL (e.g., Meta, Hugging Face).\nInformation on constructing the URL for various providers can be found in the Google Model Garden documentation for the model, or on the endpoint’s `Sample request` page. The request examples also illustrate the proper formatting for the `url`.",
             "type": "string"
           },
           "streaming_url": {
-            "description": "The URL for streaming `completion` and `chat_completion` requests to a Google Model Garden provider endpoint.\nIf both `streaming_url` and `url` are provided, each is used for its respective mode.\nIf `url` is not provided, `streaming_url` is also used for non-streaming `completion` requests.\nIf `provider` is not provided or set to `google` (Google Vertex AI), do not set `streaming_url` (or `url`).\nAt least one of `streaming_url` or `url` must be provided for Google Model Garden endpoint usage.\nCertain providers require separate URLs for streaming and non-streaming operations (e.g., Anthropic, Mistral, AI21). Others support both operation types through a single URL (e.g., Meta, Hugging Face).",
+            "description": "The URL for streaming `completion` and `chat_completion` requests to a Google Model Garden provider endpoint.\nIf both `streaming_url` and `url` are provided, each is used for its respective mode.\nIf `url` is not provided, `streaming_url` is also used for non-streaming `completion` requests.\nIf `provider` is not provided or set to `google` (Google Vertex AI), do not set `streaming_url` (or `url`).\nAt least one of `streaming_url` or `url` must be provided for Google Model Garden endpoint usage.\nCertain providers require separate URLs for streaming and non-streaming operations (e.g., Anthropic, Mistral, AI21). Others support both operation types through a single URL (e.g., Meta, Hugging Face).\nInformation on constructing the URL for various providers can be found in the Google Model Garden documentation for the model, or on the endpoint’s `Sample request` page. The request examples also illustrate the proper formatting for the `streaming_url`.",
             "type": "string"
           },
           "location": {
diff --git a/output/openapi/elasticsearch-serverless-openapi.json b/output/openapi/elasticsearch-serverless-openapi.json
index f8a68eeab1..7c98738466 100644
--- a/output/openapi/elasticsearch-serverless-openapi.json
+++ b/output/openapi/elasticsearch-serverless-openapi.json
@@ -13729,59 +13729,109 @@
                   "value": "{\n    \"service\": \"googlevertexai\",\n    \"service_settings\": {\n        \"service_account_json\": \"service-account-json\",\n        \"model_id\": \"model-id\",\n        \"location\": \"location\",\n        \"project_id\": \"project-id\"\n    }\n}"
                 },
                 "PutGoogleVertexAiRequestExample10": {
-                  "summary": "A chat_completion task for Google Model Garden Mistral endpoint with single streaming URL provided",
-                  "description": "Run `PUT _inference/chat_completion/google_model_garden_mistral_chat_completion` to create an inference endpoint to perform a `chat_completion` task using Mistral's model hosted on Google Model Garden with single streaming URL provided.",
-                  "value": "{\n    \"service\": \"googlevertexai\",\n    \"service_settings\": {\n        \"provider\": \"mistral\",\n        \"model_id\": \"mistral-small-2503\",\n        \"service_account_json\": \"service-account-json\",\n        \"streaming_url\": \"https://url:streamRawPredict\"\n    }\n}"
+                  "summary": "A chat_completion task for Google Model Garden Meta shared endpoint with single streaming URL provided",
+                  "description": "Run `PUT _inference/chat_completion/google_model_garden_meta_chat_completion` to create an inference endpoint to perform a `chat_completion` task using Meta's model hosted on Google Model Garden shared endpoint with single streaming URL provided. See the endpoint's `Sample request` page for the variable values used in the URL.",
+                  "value": "{\n    \"service\": \"googlevertexai\",\n    \"service_settings\": {\n        \"provider\": \"meta\",\n        \"service_account_json\": \"service-account-json\",\n        \"streaming_url\": \"https://%LOCATION_ID%-aiplatform.googleapis.com/v1/projects/%PROJECT_ID%/locations/%LOCATION_ID%/endpoints/%ENDPOINT_ID%/chat/completions\"\n    }\n}"
                 },
                 "PutGoogleVertexAiRequestExample11": {
-                  "summary": "A completion task for Google Model Garden AI21 endpoint with separate URLs for streaming and non-streaming tasks",
-                  "description": "Run `PUT _inference/completion/google_model_garden_ai21_completion` to create an inference endpoint to perform a `completion` task using AI21's model hosted on Google Model Garden with separate URLs for streaming and non-streaming tasks.",
-                  "value": "{\n    \"service\": \"googlevertexai\",\n    \"service_settings\": {\n        \"provider\": \"ai21\",\n        \"service_account_json\": \"service-account-json\",\n        \"url\": \"https://url:rawPredict\",\n        \"streaming_url\": \"https://url:streamRawPredict\"\n    }\n}"
+                  "summary": "A completion task for Google Model Garden Hugging Face dedicated endpoint with single URL provided for both streaming and non-streaming tasks",
+                  "description": "Run `PUT _inference/completion/google_model_garden_hugging_face_completion` to create an inference endpoint to perform a `completion` task using Hugging Face's model hosted on Google Model Garden dedicated endpoint with single URL provided for both streaming and non-streaming tasks. See the endpoint's `Sample request` page for the variable values used in the URL.",
+                  "value": "{\n    \"service\": \"googlevertexai\",\n    \"service_settings\": {\n        \"provider\": \"hugging_face\",\n        \"service_account_json\": \"service-account-json\",\n        \"url\": \"https://%ENDPOINT_ID%.%LOCATION_ID%-%PROJECT_ID%.prediction.vertexai.goog/v1/projects/%PROJECT_ID%/locations/%LOCATION_ID%/endpoints/%ENDPOINT_ID%/chat/completions\"\n    }\n}"
                 },
                 "PutGoogleVertexAiRequestExample12": {
-                  "summary": "A chat_completion task for Google Model Garden AI21 endpoint with single streaming URL provided",
-                  "description": "Run `PUT _inference/chat_completion/google_model_garden_ai21_chat_completion` to create an inference endpoint to perform a `chat_completion` task using AI21's model hosted on Google Model Garden with single streaming URL provided.",
-                  "value": "{\n    \"service\": \"googlevertexai\",\n    \"service_settings\": {\n        \"provider\": \"ai21\",\n        \"service_account_json\": \"service-account-json\",\n        \"streaming_url\": \"https://url:streamRawPredict\"\n    }\n}"
+                  "summary": "A chat_completion task for Google Model Garden Hugging Face dedicated endpoint with single streaming URL provided",
+                  "description": "Run `PUT _inference/chat_completion/google_model_garden_hugging_face_chat_completion` to create an inference endpoint to perform a `chat_completion` task using Hugging Face's model hosted on Google Model Garden dedicated endpoint with single streaming URL provided. See the endpoint's `Sample request` page for the variable values used in the URL.",
+                  "value": "{\n    \"service\": \"googlevertexai\",\n    \"service_settings\": {\n        \"provider\": \"hugging_face\",\n        \"service_account_json\": \"service-account-json\",\n        \"streaming_url\": \"https://%ENDPOINT_ID%.%LOCATION_ID%-%PROJECT_ID%.prediction.vertexai.goog/v1/projects/%PROJECT_ID%/locations/%LOCATION_ID%/endpoints/%ENDPOINT_ID%/chat/completions\"\n    }\n}"
+                },
+                "PutGoogleVertexAiRequestExample13": {
+                  "summary": "A completion task for Google Model Garden Hugging Face shared endpoint with single URL provided for both streaming and non-streaming tasks",
+                  "description": "Run `PUT _inference/completion/google_model_garden_hugging_face_completion` to create an inference endpoint to perform a `completion` task using Hugging Face's model hosted on Google Model Garden shared endpoint with single URL provided for both streaming and non-streaming tasks. See the endpoint's `Sample request` page for the variable values used in the URL.",
+                  "value": "{\n    \"service\": \"googlevertexai\",\n    \"service_settings\": {\n        \"provider\": \"hugging_face\",\n        \"service_account_json\": \"service-account-json\",\n        \"url\": \"https://%LOCATION_ID%-aiplatform.googleapis.com/v1/projects/%PROJECT_ID%/locations/%LOCATION_ID%/endpoints/%ENDPOINT_ID%/chat/completions\"\n    }\n}"
+                },
+                "PutGoogleVertexAiRequestExample14": {
+                  "summary": "A chat_completion task for Google Model Garden Hugging Face shared endpoint with single streaming URL provided",
+                  "description": "Run `PUT _inference/chat_completion/google_model_garden_hugging_face_chat_completion` to create an inference endpoint to perform a `chat_completion` task using Hugging Face's model hosted on Google Model Garden shared endpoint with single streaming URL provided. See the endpoint's `Sample request` page for the variable values used in the URL.",
+                  "value": "{\n    \"service\": \"googlevertexai\",\n    \"service_settings\": {\n        \"provider\": \"hugging_face\",\n        \"service_account_json\": \"service-account-json\",\n        \"streaming_url\": \"https://%LOCATION_ID%-aiplatform.googleapis.com/v1/projects/%PROJECT_ID%/locations/%LOCATION_ID%/endpoints/%ENDPOINT_ID%/chat/completions\"\n    }\n}"
+                },
+                "PutGoogleVertexAiRequestExample15": {
+                  "summary": "A completion task for Google Model Garden Mistral serverless endpoint with separate URLs for streaming and non-streaming tasks",
+                  "description": "Run `PUT _inference/completion/google_model_garden_mistral_completion` to create an inference endpoint to perform a `completion` task using Mistral's serverless model hosted on Google Model Garden with separate URLs for streaming and non-streaming tasks. See the Mistral model documentation for instructions on how to construct URLs.",
+                  "value": "{\n    \"service\": \"googlevertexai\",\n    \"service_settings\": {\n        \"provider\": \"mistral\",\n        \"model_id\": \"mistral-small-2503\",\n        \"service_account_json\": \"service-account-json\",\n        \"url\": \"https://%LOCATION_ID%-aiplatform.googleapis.com/v1/projects/%PROJECT_ID%/locations/%LOCATION_ID%/publishers/mistralai/models/%MODEL_ID%:rawPredict\",\n        \"streaming_url\": \"https://%LOCATION_ID%-aiplatform.googleapis.com/v1/projects/%PROJECT_ID%/locations/%LOCATION_ID%/publishers/mistralai/models/%MODEL_ID%:streamRawPredict\"\n    }\n}"
+                },
+                "PutGoogleVertexAiRequestExample16": {
+                  "summary": "A chat_completion task for Google Model Garden Mistral serverless endpoint with single streaming URL provided",
+                  "description": "Run `PUT _inference/chat_completion/google_model_garden_mistral_chat_completion` to create an inference endpoint to perform a `chat_completion` task using Mistral's serverless model hosted on Google Model Garden with single streaming URL provided. See the Mistral model documentation for instructions on how to construct the URL.",
+                  "value": "{\n    \"service\": \"googlevertexai\",\n    \"service_settings\": {\n        \"provider\": \"mistral\",\n        \"model_id\": \"mistral-small-2503\",\n        \"service_account_json\": \"service-account-json\",\n        \"streaming_url\": \"https://%LOCATION_ID%-aiplatform.googleapis.com/v1/projects/%PROJECT_ID%/locations/%LOCATION_ID%/publishers/mistralai/models/%MODEL_ID%:streamRawPredict\"\n    }\n}"
+                },
+                "PutGoogleVertexAiRequestExample17": {
+                  "summary": "A completion task for Google Model Garden Mistral dedicated endpoint with single URL provided for both streaming and non-streaming tasks",
+                  "description": "Run `PUT _inference/completion/google_model_garden_mistral_completion` to create an inference endpoint to perform a `completion` task using Mistral's model hosted on Google Model Garden dedicated endpoint with single URL provided for both streaming and non-streaming tasks. See the endpoint's `Sample request` page for the variable values used in the URL.",
+                  "value": "{\n    \"service\": \"googlevertexai\",\n    \"service_settings\": {\n        \"provider\": \"mistral\",\n        \"service_account_json\": \"service-account-json\",\n        \"url\": \"https://%ENDPOINT_ID%.%LOCATION_ID%-%PROJECT_ID%.prediction.vertexai.goog/v1/projects/%PROJECT_ID%/locations/%LOCATION_ID%/endpoints/%ENDPOINT_ID%/chat/completions\"\n    }\n}"
+                },
+                "PutGoogleVertexAiRequestExample18": {
+                  "summary": "A chat_completion task for Google Model Garden Mistral dedicated endpoint with single streaming URL provided",
+                  "description": "Run `PUT _inference/chat_completion/google_model_garden_mistral_chat_completion` to create an inference endpoint to perform a `chat_completion` task using Mistral's model hosted on Google Model Garden dedicated endpoint with single streaming URL provided. See the endpoint's `Sample request` page for the variable values used in the URL.",
+                  "value": "{\n    \"service\": \"googlevertexai\",\n    \"service_settings\": {\n        \"provider\": \"mistral\",\n        \"service_account_json\": \"service-account-json\",\n        \"streaming_url\": \"https://%ENDPOINT_ID%.%LOCATION_ID%-%PROJECT_ID%.prediction.vertexai.goog/v1/projects/%PROJECT_ID%/locations/%LOCATION_ID%/endpoints/%ENDPOINT_ID%/chat/completions\"\n    }\n}"
+                },
+                "PutGoogleVertexAiRequestExample19": {
+                  "summary": "A completion task for Google Model Garden Mistral shared endpoint with single URL provided for both streaming and non-streaming tasks",
+                  "description": "Run `PUT _inference/completion/google_model_garden_mistral_completion` to create an inference endpoint to perform a `completion` task using Mistral's model hosted on Google Model Garden shared endpoint with single URL provided for both streaming and non-streaming tasks. See the endpoint's `Sample request` page for the variable values used in the URL.",
+                  "value": "{\n    \"service\": \"googlevertexai\",\n    \"service_settings\": {\n        \"provider\": \"mistral\",\n        \"service_account_json\": \"service-account-json\",\n        \"url\": \"https://%LOCATION_ID%-aiplatform.googleapis.com/v1/projects/%PROJECT_ID%/locations/%LOCATION_ID%/endpoints/%ENDPOINT_ID%/chat/completions\"\n    }\n}"
                 },
                 "PutGoogleVertexAiRequestExample2": {
                   "summary": "A rerank task",
                   "description": "Run `PUT _inference/rerank/google_vertex_ai_rerank` to create an inference endpoint to perform a `rerank` task type.",
                   "value": "{\n    \"service\": \"googlevertexai\",\n    \"service_settings\": {\n        \"service_account_json\": \"service-account-json\",\n        \"project_id\": \"project-id\"\n    }\n}"
                 },
+                "PutGoogleVertexAiRequestExample20": {
+                  "summary": "A chat_completion task for Google Model Garden Mistral shared endpoint with single streaming URL provided",
+                  "description": "Run `PUT _inference/chat_completion/google_model_garden_mistral_chat_completion` to create an inference endpoint to perform a `chat_completion` task using Mistral's model hosted on Google Model Garden shared endpoint with single streaming URL provided. See the endpoint's `Sample request` page for the variable values used in the URL.",
+                  "value": "{\n    \"service\": \"googlevertexai\",\n    \"service_settings\": {\n        \"provider\": \"mistral\",\n        \"service_account_json\": \"service-account-json\",\n        \"streaming_url\": \"https://%LOCATION_ID%-aiplatform.googleapis.com/v1/projects/%PROJECT_ID%/locations/%LOCATION_ID%/endpoints/%ENDPOINT_ID%/chat/completions\"\n    }\n}"
+                },
+                "PutGoogleVertexAiRequestExample21": {
+                  "summary": "A completion task for Google Model Garden AI21 serverless endpoint with separate URLs for streaming and non-streaming tasks",
+                  "description": "Run `PUT _inference/completion/google_model_garden_ai21_completion` to create an inference endpoint to perform a `completion` task using AI21's model hosted on Google Model Garden serverless endpoint with separate URLs for streaming and non-streaming tasks. See the AI21 model documentation for instructions on how to construct URLs.",
+                  "value": "{\n    \"service\": \"googlevertexai\",\n    \"service_settings\": {\n        \"provider\": \"ai21\",\n        \"service_account_json\": \"service-account-json\",\n        \"url\": \"https://%LOCATION_ID%-aiplatform.googleapis.com/v1/projects/%PROJECT_ID%/locations/%LOCATION_ID%/publishers/ai21/models/%MODEL_ID%:rawPredict\",\n        \"streaming_url\": \"https://%LOCATION_ID%-aiplatform.googleapis.com/v1/projects/%PROJECT_ID%/locations/%LOCATION_ID%/publishers/ai21/models/%MODEL_ID%:streamRawPredict\"\n    }\n}"
+                },
+                "PutGoogleVertexAiRequestExample22": {
+                  "summary": "A chat_completion task for Google Model Garden AI21 serverless endpoint with single streaming URL provided",
+                  "description": "Run `PUT _inference/chat_completion/google_model_garden_ai21_chat_completion` to create an inference endpoint to perform a `chat_completion` task using AI21's model hosted on Google Model Garden serverless endpoint with single streaming URL provided. See the AI21 model documentation for instructions on how to construct URLs.",
+                  "value": "{\n    \"service\": \"googlevertexai\",\n    \"service_settings\": {\n        \"provider\": \"ai21\",\n        \"service_account_json\": \"service-account-json\",\n        \"streaming_url\": \"https://%LOCATION_ID%-aiplatform.googleapis.com/v1/projects/%PROJECT_ID%/locations/%LOCATION_ID%/publishers/ai21/models/%MODEL_ID%:streamRawPredict\"\n    }\n}"
+                },
                 "PutGoogleVertexAiRequestExample3": {
-                  "summary": "A completion task for Google Model Garden Anthropic endpoint with separate URLs for streaming and non-streaming tasks",
-                  "description": "Run `PUT _inference/completion/google_model_garden_anthropic_completion` to create an inference endpoint to perform a `completion` task using Anthropic's model hosted on Google Model Garden with separate URLs for streaming and non-streaming tasks.",
-                  "value": "{\n    \"service\": \"googlevertexai\",\n    \"service_settings\": {\n        \"provider\": \"anthropic\",\n        \"service_account_json\": \"service-account-json\",\n        \"url\": \"https://url:rawPredict\",\n        \"streaming_url\": \"https://streaming_url:streamRawPredict\"\n    },\n    \"task_settings\": {\n        \"max_tokens\": 128\n    }\n}"
+                  "summary": "A completion task for Google Model Garden Anthropic serverless endpoint with separate URLs for streaming and non-streaming tasks",
+                  "description": "Run `PUT _inference/completion/google_model_garden_anthropic_completion` to create an inference endpoint to perform a `completion` task using Anthropic's serverless model hosted on Google Model Garden with separate URLs for streaming and non-streaming tasks. See the Anthropic model documentation for instructions on how to construct URLs.",
+                  "value": "{\n    \"service\": \"googlevertexai\",\n    \"service_settings\": {\n        \"provider\": \"anthropic\",\n        \"service_account_json\": \"service-account-json\",\n        \"url\": \"https://%LOCATION_ID%-aiplatform.googleapis.com/v1/projects/%PROJECT_ID%/locations/%LOCATION_ID%/publishers/anthropic/models/%MODEL_ID%:rawPredict\",\n        \"streaming_url\": \"https://%LOCATION_ID%-aiplatform.googleapis.com/v1/projects/%PROJECT_ID%/locations/%LOCATION_ID%/publishers/anthropic/models/%MODEL_ID%:streamRawPredict\"\n    },\n    \"task_settings\": {\n        \"max_tokens\": 128\n    }\n}"
                 },
                 "PutGoogleVertexAiRequestExample4": {
-                  "summary": "A chat_completion task for Google Model Garden Anthropic endpoint with single streaming URL provided",
-                  "description": "Run `PUT _inference/chat_completion/google_model_garden_anthropic_chat_completion` to create an inference endpoint to perform a `chat_completion` task using Anthropic's model hosted on Google Model Garden with single streaming URL provided.",
-                  "value": "{\n    \"service\": \"googlevertexai\",\n    \"service_settings\": {\n        \"provider\": \"anthropic\",\n        \"service_account_json\": \"service-account-json\",\n        \"streaming_url\": \"https://streaming_url:streamRawPredict\"\n    },\n    \"task_settings\": {\n        \"max_tokens\": 128\n    }\n}"
+                  "summary": "A chat_completion task for Google Model Garden Anthropic serverless endpoint with single streaming URL provided",
+                  "description": "Run `PUT _inference/chat_completion/google_model_garden_anthropic_chat_completion` to create an inference endpoint to perform a `chat_completion` task using Anthropic's serverless model hosted on Google Model Garden with single streaming URL provided. See the Anthropic model documentation for instructions on how to construct the URL.",
+                  "value": "{\n    \"service\": \"googlevertexai\",\n    \"service_settings\": {\n        \"provider\": \"anthropic\",\n        \"service_account_json\": \"service-account-json\",\n        \"streaming_url\": \"https://%LOCATION_ID%-aiplatform.googleapis.com/v1/projects/%PROJECT_ID%/locations/%LOCATION_ID%/publishers/anthropic/models/%MODEL_ID%:streamRawPredict\"\n    },\n    \"task_settings\": {\n        \"max_tokens\": 128\n    }\n}"
                 },
                 "PutGoogleVertexAiRequestExample5": {
-                  "summary": "A completion task for Google Model Garden Meta endpoint with single URL provided for both streaming and non-streaming tasks",
-                  "description": "Run `PUT _inference/completion/google_model_garden_meta_completion` to create an inference endpoint to perform a `completion` task using Meta's model hosted on Google Model Garden with single URL provided for both streaming and non-streaming tasks.",
-                  "value": "{\n    \"service\": \"googlevertexai\",\n    \"service_settings\": {\n        \"provider\": \"meta\",\n        \"model_id\": \"meta/llama-3.3-70b-instruct-maas\",\n        \"service_account_json\": \"service-account-json\",\n        \"url\": \"https://url/openapi/chat/completions\"\n    }\n}"
+                  "summary": "A completion task for Google Model Garden Meta serverless endpoint with single URL provided for both streaming and non-streaming tasks",
+                  "description": "Run `PUT _inference/completion/google_model_garden_meta_completion` to create an inference endpoint to perform a `completion` task using Meta's serverless model hosted on Google Model Garden with single URL provided for both streaming and non-streaming tasks. See the Meta model documentation for instructions on how to construct the URL.",
+                  "value": "{\n    \"service\": \"googlevertexai\",\n    \"service_settings\": {\n        \"provider\": \"meta\",\n        \"model_id\": \"meta/llama-3.3-70b-instruct-maas\",\n        \"service_account_json\": \"service-account-json\",\n        \"url\": \"https://%LOCATION_ID%-aiplatform.googleapis.com/v1/projects/%PROJECT_ID%/locations/%LOCATION_ID%/endpoints/openapi/chat/completions\"\n    }\n}"
                 },
                 "PutGoogleVertexAiRequestExample6": {
-                  "summary": "A chat_completion task for Google Model Garden Meta endpoint with single streaming URL provided",
-                  "description": "Run `PUT _inference/chat_completion/google_model_garden_meta_chat_completion` to create an inference endpoint to perform a `chat_completion` task using Meta's model hosted on Google Model Garden with single streaming URL provided.",
-                  "value": "{\n    \"service\": \"googlevertexai\",\n    \"service_settings\": {\n        \"provider\": \"meta\",\n        \"model_id\": \"meta/llama-3.3-70b-instruct-maas\",\n        \"service_account_json\": \"service-account-json\",\n        \"streaming_url\": \"https://url/openapi/chat/completions\"\n    }\n}"
+                  "summary": "A chat_completion task for Google Model Garden Meta serverless endpoint with single streaming URL provided",
+                  "description": "Run `PUT _inference/chat_completion/google_model_garden_meta_chat_completion` to create an inference endpoint to perform a `chat_completion` task using Meta's serverless model hosted on Google Model Garden with single streaming URL provided. See the Meta model documentation for instructions on how to construct the URL.",
+                  "value": "{\n    \"service\": \"googlevertexai\",\n    \"service_settings\": {\n        \"provider\": \"meta\",\n        \"model_id\": \"meta/llama-3.3-70b-instruct-maas\",\n        \"service_account_json\": \"service-account-json\",\n        \"streaming_url\": \"https://%LOCATION_ID%-aiplatform.googleapis.com/v1/projects/%PROJECT_ID%/locations/%LOCATION_ID%/endpoints/openapi/chat/completions\"\n    }\n}"
                 },
                 "PutGoogleVertexAiRequestExample7": {
-                  "summary": "A completion task for Google Model Garden Hugging Face endpoint with single URL provided for both streaming and non-streaming tasks",
-                  "description": "Run `PUT _inference/completion/google_model_garden_hugging_face_completion` to create an inference endpoint to perform a `completion` task using Hugging Face's model hosted on Google Model Garden with single URL provided for both streaming and non-streaming tasks.",
-                  "value": "{\n    \"service\": \"googlevertexai\",\n    \"service_settings\": {\n        \"provider\": \"hugging_face\",\n        \"service_account_json\": \"service-account-json\",\n        \"url\": \"https://url/chat/completions\"\n    }\n}"
+                  "summary": "A completion task for Google Model Garden Meta dedicated endpoint with single URL provided for both streaming and non-streaming tasks",
+                  "description": "Run `PUT _inference/completion/google_model_garden_meta_completion` to create an inference endpoint to perform a `completion` task using Meta's model hosted on Google Model Garden dedicated endpoint with single URL provided for both streaming and non-streaming tasks. See the endpoint's `Sample request` page for the variable values used in the URL.",
+                  "value": "{\n    \"service\": \"googlevertexai\",\n    \"service_settings\": {\n        \"provider\": \"meta\",\n        \"service_account_json\": \"service-account-json\",\n        \"url\": \"https://%ENDPOINT_ID%.%LOCATION_ID%-fasttryout.prediction.vertexai.goog/v1/projects/%PROJECT_ID%/locations/%LOCATION_ID%/endpoints/%ENDPOINT_ID%/chat/completions\"\n    }\n}"
                 },
                 "PutGoogleVertexAiRequestExample8": {
-                  "summary": "A chat_completion task for Google Model Garden Hugging Face endpoint with single streaming URL provided",
-                  "description": "Run `PUT _inference/chat_completion/google_model_garden_hugging_face_chat_completion` to create an inference endpoint to perform a `chat_completion` task using Hugging Face's model hosted on Google Model Garden with single streaming URL provided.",
-                  "value": "{\n    \"service\": \"googlevertexai\",\n    \"service_settings\": {\n        \"provider\": \"hugging_face\",\n        \"service_account_json\": \"service-account-json\",\n        \"streaming_url\": \"https://url/chat/completions\"\n    }\n}"
+                  "summary": "A chat_completion task for Google Model Garden Meta dedicated endpoint with single streaming URL provided",
+                  "description": "Run `PUT _inference/chat_completion/google_model_garden_meta_chat_completion` to create an inference endpoint to perform a `chat_completion` task using Meta's model hosted on Google Model Garden dedicated endpoint with single streaming URL provided. See the endpoint's `Sample request` page for the variable values used in the URL.",
+                  "value": "{\n    \"service\": \"googlevertexai\",\n    \"service_settings\": {\n        \"provider\": \"meta\",\n        \"service_account_json\": \"service-account-json\",\n        \"streaming_url\": \"https://%ENDPOINT_ID%.%LOCATION_ID%-fasttryout.prediction.vertexai.goog/v1/projects/%PROJECT_ID%/locations/%LOCATION_ID%/endpoints/%ENDPOINT_ID%/chat/completions\"\n    }\n}"
                 },
                 "PutGoogleVertexAiRequestExample9": {
-                  "summary": "A completion task for Google Model Garden Mistral endpoint with separate URLs for streaming and non-streaming tasks",
-                  "description": "Run `PUT _inference/completion/google_model_garden_mistral_completion` to create an inference endpoint to perform a `completion` task using Mistral's model hosted on Google Model Garden with separate URLs for streaming and non-streaming tasks.",
-                  "value": "{\n    \"service\": \"googlevertexai\",\n    \"service_settings\": {\n        \"provider\": \"mistral\",\n        \"model_id\": \"mistral-small-2503\",\n        \"service_account_json\": \"service-account-json\",\n        \"url\": \"https://url:rawPredict\",\n        \"streaming_url\": \"https://url:streamRawPredict\"\n    }\n}"
+                  "summary": "A completion task for Google Model Garden Meta shared endpoint with single URL provided for both streaming and non-streaming tasks",
+                  "description": "Run `PUT _inference/completion/google_model_garden_meta_completion` to create an inference endpoint to perform a `completion` task using Meta's model hosted on Google Model Garden shared endpoint with single URL provided for both streaming and non-streaming tasks. See the endpoint's `Sample request` page for the variable values used in the URL.",
+                  "value": "{\n    \"service\": \"googlevertexai\",\n    \"service_settings\": {\n        \"provider\": \"meta\",\n        \"service_account_json\": \"service-account-json\",\n        \"url\": \"https://%LOCATION_ID%-aiplatform.googleapis.com/v1/projects/%PROJECT_ID%/locations/%LOCATION_ID%/endpoints/%ENDPOINT_ID%/chat/completions\"\n    }\n}"
                 }
               }
             }
@@ -67129,11 +67179,11 @@
             ]
           },
           "url": {
-            "description": "The URL for non-streaming `completion` requests to a Google Model Garden provider endpoint.\nIf both `url` and `streaming_url` are provided, each is used for its respective mode.\nIf `streaming_url` is not provided, `url` is also used for streaming `completion` and `chat_completion`.\nIf `provider` is not provided or set to `google` (Google Vertex AI), do not set `url` (or `streaming_url`).\nAt least one of `url` or `streaming_url` must be provided for Google Model Garden endpoint usage.\nCertain providers require separate URLs for streaming and non-streaming operations (e.g., Anthropic, Mistral, AI21). Others support both operation types through a single URL (e.g., Meta, Hugging Face).",
+            "description": "The URL for non-streaming `completion` requests to a Google Model Garden provider endpoint.\nIf both `url` and `streaming_url` are provided, each is used for its respective mode.\nIf `streaming_url` is not provided, `url` is also used for streaming `completion` and `chat_completion`.\nIf `provider` is not provided or set to `google` (Google Vertex AI), do not set `url` (or `streaming_url`).\nAt least one of `url` or `streaming_url` must be provided for Google Model Garden endpoint usage.\nCertain providers require separate URLs for streaming and non-streaming operations (e.g., Anthropic, Mistral, AI21). Others support both operation types through a single URL (e.g., Meta, Hugging Face).\nInformation on constructing the URL for various providers can be found in the Google Model Garden documentation for the model, or on the endpoint’s `Sample request` page. The request examples also illustrate the proper formatting for the `url`.",
             "type": "string"
           },
           "streaming_url": {
-            "description": "The URL for streaming `completion` and `chat_completion` requests to a Google Model Garden provider endpoint.\nIf both `streaming_url` and `url` are provided, each is used for its respective mode.\nIf `url` is not provided, `streaming_url` is also used for non-streaming `completion` requests.\nIf `provider` is not provided or set to `google` (Google Vertex AI), do not set `streaming_url` (or `url`).\nAt least one of `streaming_url` or `url` must be provided for Google Model Garden endpoint usage.\nCertain providers require separate URLs for streaming and non-streaming operations (e.g., Anthropic, Mistral, AI21). Others support both operation types through a single URL (e.g., Meta, Hugging Face).",
+            "description": "The URL for streaming `completion` and `chat_completion` requests to a Google Model Garden provider endpoint.\nIf both `streaming_url` and `url` are provided, each is used for its respective mode.\nIf `url` is not provided, `streaming_url` is also used for non-streaming `completion` requests.\nIf `provider` is not provided or set to `google` (Google Vertex AI), do not set `streaming_url` (or `url`).\nAt least one of `streaming_url` or `url` must be provided for Google Model Garden endpoint usage.\nCertain providers require separate URLs for streaming and non-streaming operations (e.g., Anthropic, Mistral, AI21). Others support both operation types through a single URL (e.g., Meta, Hugging Face).\nInformation on constructing the URL for various providers can be found in the Google Model Garden documentation for the model, or on the endpoint’s `Sample request` page. The request examples also illustrate the proper formatting for the `streaming_url`.",
             "type": "string"
           },
           "location": {
diff --git a/output/schema/schema.json b/output/schema/schema.json
index c5728277c2..43abda361c 100644
--- a/output/schema/schema.json
+++ b/output/schema/schema.json
@@ -173498,7 +173498,7 @@
         "name": "GoogleModelGardenProvider",
         "namespace": "inference._types"
       },
-      "specLocation": "inference/_types/CommonTypes.ts#L1500-L1507"
+      "specLocation": "inference/_types/CommonTypes.ts#L1502-L1509"
     },
     {
       "kind": "interface",
@@ -173520,7 +173520,7 @@
           }
         },
         {
-          "description": "The URL for non-streaming `completion` requests to a Google Model Garden provider endpoint.\nIf both `url` and `streaming_url` are provided, each is used for its respective mode.\nIf `streaming_url` is not provided, `url` is also used for streaming `completion` and `chat_completion`.\nIf `provider` is not provided or set to `google` (Google Vertex AI), do not set `url` (or `streaming_url`).\nAt least one of `url` or `streaming_url` must be provided for Google Model Garden endpoint usage.\nCertain providers require separate URLs for streaming and non-streaming operations (e.g., Anthropic, Mistral, AI21). Others support both operation types through a single URL (e.g., Meta, Hugging Face).",
+          "description": "The URL for non-streaming `completion` requests to a Google Model Garden provider endpoint.\nIf both `url` and `streaming_url` are provided, each is used for its respective mode.\nIf `streaming_url` is not provided, `url` is also used for streaming `completion` and `chat_completion`.\nIf `provider` is not provided or set to `google` (Google Vertex AI), do not set `url` (or `streaming_url`).\nAt least one of `url` or `streaming_url` must be provided for Google Model Garden endpoint usage.\nCertain providers require separate URLs for streaming and non-streaming operations (e.g., Anthropic, Mistral, AI21). Others support both operation types through a single URL (e.g., Meta, Hugging Face).\nInformation on constructing the URL for various providers can be found in the Google Model Garden documentation for the model, or on the endpoint’s `Sample request` page. The request examples also illustrate the proper formatting for the `url`.",
           "name": "url",
           "required": false,
           "type": {
@@ -173532,7 +173532,7 @@
           }
         },
         {
-          "description": "The URL for streaming `completion` and `chat_completion` requests to a Google Model Garden provider endpoint.\nIf both `streaming_url` and `url` are provided, each is used for its respective mode.\nIf `url` is not provided, `streaming_url` is also used for non-streaming `completion` requests.\nIf `provider` is not provided or set to `google` (Google Vertex AI), do not set `streaming_url` (or `url`).\nAt least one of `streaming_url` or `url` must be provided for Google Model Garden endpoint usage.\nCertain providers require separate URLs for streaming and non-streaming operations (e.g., Anthropic, Mistral, AI21). Others support both operation types through a single URL (e.g., Meta, Hugging Face).",
+          "description": "The URL for streaming `completion` and `chat_completion` requests to a Google Model Garden provider endpoint.\nIf both `streaming_url` and `url` are provided, each is used for its respective mode.\nIf `url` is not provided, `streaming_url` is also used for non-streaming `completion` requests.\nIf `provider` is not provided or set to `google` (Google Vertex AI), do not set `streaming_url` (or `url`).\nAt least one of `streaming_url` or `url` must be provided for Google Model Garden endpoint usage.\nCertain providers require separate URLs for streaming and non-streaming operations (e.g., Anthropic, Mistral, AI21). Others support both operation types through a single URL (e.g., Meta, Hugging Face).\nInformation on constructing the URL for various providers can be found in the Google Model Garden documentation for the model, or on the endpoint’s `Sample request` page. The request examples also illustrate the proper formatting for the `streaming_url`.",
           "name": "streaming_url",
           "required": false,
           "type": {
@@ -173622,7 +173622,7 @@
           }
         }
       ],
-      "specLocation": "inference/_types/CommonTypes.ts#L1433-L1498"
+      "specLocation": "inference/_types/CommonTypes.ts#L1433-L1500"
     },
     {
       "kind": "enum",
@@ -173635,7 +173635,7 @@
         "name": "GoogleVertexAIServiceType",
         "namespace": "inference._types"
       },
-      "specLocation": "inference/_types/CommonTypes.ts#L1548-L1550"
+      "specLocation": "inference/_types/CommonTypes.ts#L1550-L1552"
     },
     {
       "kind": "interface",
@@ -173697,7 +173697,7 @@
           }
         }
       ],
-      "specLocation": "inference/_types/CommonTypes.ts#L1509-L1532"
+      "specLocation": "inference/_types/CommonTypes.ts#L1511-L1534"
     },
     {
       "kind": "enum",
@@ -173719,7 +173719,7 @@
         "name": "GoogleVertexAITaskType",
         "namespace": "inference._types"
       },
-      "specLocation": "inference/_types/CommonTypes.ts#L1541-L1546"
+      "specLocation": "inference/_types/CommonTypes.ts#L1543-L1548"
     },
     {
       "kind": "interface",
@@ -173781,7 +173781,7 @@
           }
         }
       ],
-      "specLocation": "inference/_types/CommonTypes.ts#L1552-L1584"
+      "specLocation": "inference/_types/CommonTypes.ts#L1554-L1586"
     },
     {
       "kind": "enum",
@@ -173794,7 +173794,7 @@
         "name": "HuggingFaceServiceType",
         "namespace": "inference._types"
       },
-      "specLocation": "inference/_types/CommonTypes.ts#L1605-L1607"
+      "specLocation": "inference/_types/CommonTypes.ts#L1607-L1609"
     },
     {
       "kind": "interface",
@@ -173828,7 +173828,7 @@
           }
         }
       ],
-      "specLocation": "inference/_types/CommonTypes.ts#L1586-L1596"
+      "specLocation": "inference/_types/CommonTypes.ts#L1588-L1598"
     },
     {
       "kind": "enum",
@@ -173850,7 +173850,7 @@
         "name": "HuggingFaceTaskType",
         "namespace": "inference._types"
       },
-      "specLocation": "inference/_types/CommonTypes.ts#L1598-L1603"
+      "specLocation": "inference/_types/CommonTypes.ts#L1600-L1605"
     },
     {
       "kind": "interface",
@@ -175082,7 +175082,7 @@
           }
         }
       ],
-      "specLocation": "inference/_types/CommonTypes.ts#L1609-L1638"
+      "specLocation": "inference/_types/CommonTypes.ts#L1611-L1640"
     },
     {
       "kind": "enum",
@@ -175095,7 +175095,7 @@
         "name": "JinaAIServiceType",
         "namespace": "inference._types"
       },
-      "specLocation": "inference/_types/CommonTypes.ts#L1668-L1670"
+      "specLocation": "inference/_types/CommonTypes.ts#L1670-L1672"
     },
     {
       "kind": "enum",
@@ -175114,7 +175114,7 @@
         "name": "JinaAISimilarityType",
         "namespace": "inference._types"
       },
-      "specLocation": "inference/_types/CommonTypes.ts#L1672-L1676"
+      "specLocation": "inference/_types/CommonTypes.ts#L1674-L1678"
     },
     {
       "kind": "interface",
@@ -175160,7 +175160,7 @@
           }
         }
       ],
-      "specLocation": "inference/_types/CommonTypes.ts#L1640-L1661"
+      "specLocation": "inference/_types/CommonTypes.ts#L1642-L1663"
     },
     {
       "kind": "enum",
@@ -175176,7 +175176,7 @@
         "name": "JinaAITaskType",
         "namespace": "inference._types"
       },
-      "specLocation": "inference/_types/CommonTypes.ts#L1663-L1666"
+      "specLocation": "inference/_types/CommonTypes.ts#L1665-L1668"
     },
     {
       "kind": "enum",
@@ -175198,7 +175198,7 @@
         "name": "JinaAITextEmbeddingTask",
         "namespace": "inference._types"
       },
-      "specLocation": "inference/_types/CommonTypes.ts#L1678-L1683"
+      "specLocation": "inference/_types/CommonTypes.ts#L1680-L1685"
     },
     {
       "kind": "interface",
@@ -175270,7 +175270,7 @@
           }
         }
       ],
-      "specLocation": "inference/_types/CommonTypes.ts#L1685-L1715"
+      "specLocation": "inference/_types/CommonTypes.ts#L1687-L1717"
     },
     {
       "kind": "enum",
@@ -175283,7 +175283,7 @@
         "name": "LlamaServiceType",
         "namespace": "inference._types"
       },
-      "specLocation": "inference/_types/CommonTypes.ts#L1723-L1725"
+      "specLocation": "inference/_types/CommonTypes.ts#L1725-L1727"
     },
     {
       "kind": "enum",
@@ -175302,7 +175302,7 @@
         "name": "LlamaSimilarityType",
         "namespace": "inference._types"
       },
-      "specLocation": "inference/_types/CommonTypes.ts#L1727-L1731"
+      "specLocation": "inference/_types/CommonTypes.ts#L1729-L1733"
     },
     {
       "kind": "enum",
@@ -175321,7 +175321,7 @@
         "name": "LlamaTaskType",
         "namespace": "inference._types"
       },
-      "specLocation": "inference/_types/CommonTypes.ts#L1717-L1721"
+      "specLocation": "inference/_types/CommonTypes.ts#L1719-L1723"
     },
     {
       "kind": "interface",
@@ -175479,7 +175479,7 @@
           }
         }
       ],
-      "specLocation": "inference/_types/CommonTypes.ts#L1733-L1760"
+      "specLocation": "inference/_types/CommonTypes.ts#L1735-L1762"
     },
     {
       "kind": "enum",
@@ -175492,7 +175492,7 @@
         "name": "MistralServiceType",
         "namespace": "inference._types"
       },
-      "specLocation": "inference/_types/CommonTypes.ts#L1768-L1770"
+      "specLocation": "inference/_types/CommonTypes.ts#L1770-L1772"
     },
     {
       "kind": "enum",
@@ -175511,7 +175511,7 @@
         "name": "MistralTaskType",
         "namespace": "inference._types"
       },
-      "specLocation": "inference/_types/CommonTypes.ts#L1762-L1766"
+      "specLocation": "inference/_types/CommonTypes.ts#L1764-L1768"
     },
     {
       "kind": "interface",
@@ -175598,7 +175598,7 @@
           }
         }
       ],
-      "specLocation": "inference/_types/CommonTypes.ts#L1772-L1814"
+      "specLocation": "inference/_types/CommonTypes.ts#L1774-L1816"
     },
     {
       "kind": "enum",
@@ -175611,7 +175611,7 @@
         "name": "OpenAIServiceType",
         "namespace": "inference._types"
       },
-      "specLocation": "inference/_types/CommonTypes.ts#L1841-L1843"
+      "specLocation": "inference/_types/CommonTypes.ts#L1843-L1845"
     },
     {
       "kind": "interface",
@@ -175641,7 +175641,7 @@
           }
         }
       ],
-      "specLocation": "inference/_types/CommonTypes.ts#L1816-L1833"
+      "specLocation": "inference/_types/CommonTypes.ts#L1818-L1835"
     },
     {
       "kind": "enum",
@@ -175660,7 +175660,7 @@
         "name": "OpenAITaskType",
         "namespace": "inference._types"
       },
-      "specLocation": "inference/_types/CommonTypes.ts#L1835-L1839"
+      "specLocation": "inference/_types/CommonTypes.ts#L1837-L1841"
     },
     {
       "kind": "interface",
@@ -176506,7 +176506,7 @@
           }
         }
       ],
-      "specLocation": "inference/_types/CommonTypes.ts#L1534-L1539"
+      "specLocation": "inference/_types/CommonTypes.ts#L1536-L1541"
     },
     {
       "kind": "interface",
@@ -176652,7 +176652,7 @@
           }
         }
       ],
-      "specLocation": "inference/_types/CommonTypes.ts#L1845-L1876"
+      "specLocation": "inference/_types/CommonTypes.ts#L1847-L1878"
     },
     {
       "kind": "enum",
@@ -176665,7 +176665,7 @@
         "name": "VoyageAIServiceType",
         "namespace": "inference._types"
       },
-      "specLocation": "inference/_types/CommonTypes.ts#L1909-L1911"
+      "specLocation": "inference/_types/CommonTypes.ts#L1911-L1913"
     },
     {
       "kind": "interface",
@@ -176725,7 +176725,7 @@
           }
         }
       ],
-      "specLocation": "inference/_types/CommonTypes.ts#L1878-L1902"
+      "specLocation": "inference/_types/CommonTypes.ts#L1880-L1904"
     },
     {
       "kind": "enum",
@@ -176741,7 +176741,7 @@
         "name": "VoyageAITaskType",
         "namespace": "inference._types"
       },
-      "specLocation": "inference/_types/CommonTypes.ts#L1904-L1907"
+      "specLocation": "inference/_types/CommonTypes.ts#L1906-L1909"
     },
     {
       "kind": "interface",
@@ -176829,7 +176829,7 @@
           }
         }
       ],
-      "specLocation": "inference/_types/CommonTypes.ts#L1913-L1951"
+      "specLocation": "inference/_types/CommonTypes.ts#L1915-L1953"
     },
     {
       "kind": "enum",
@@ -176842,7 +176842,7 @@
         "name": "WatsonxServiceType",
         "namespace": "inference._types"
       },
-      "specLocation": "inference/_types/CommonTypes.ts#L1959-L1961"
+      "specLocation": "inference/_types/CommonTypes.ts#L1961-L1963"
     },
     {
       "kind": "enum",
@@ -176861,7 +176861,7 @@
         "name": "WatsonxTaskType",
         "namespace": "inference._types"
       },
-      "specLocation": "inference/_types/CommonTypes.ts#L1953-L1957"
+      "specLocation": "inference/_types/CommonTypes.ts#L1955-L1959"
     },
     {
       "kind": "request",
@@ -180596,22 +180596,64 @@
           "value": "{\n    \"service\": \"googlevertexai\",\n    \"service_settings\": {\n        \"service_account_json\": \"service-account-json\",\n        \"model_id\": \"model-id\",\n        \"location\": \"location\",\n        \"project_id\": \"project-id\"\n    }\n}"
         },
         "PutGoogleVertexAiRequestExample10": {
-          "description": "Run `PUT _inference/chat_completion/google_model_garden_mistral_chat_completion` to create an inference endpoint to perform a `chat_completion` task using Mistral's model hosted on Google Model Garden with single streaming URL provided.",
-          "method_request": "PUT _inference/chat_completion/google_model_garden_mistral_chat_completion",
-          "summary": "A chat_completion task for Google Model Garden Mistral endpoint with single streaming URL provided",
-          "value": "{\n    \"service\": \"googlevertexai\",\n    \"service_settings\": {\n        \"provider\": \"mistral\",\n        \"model_id\": \"mistral-small-2503\",\n        \"service_account_json\": \"service-account-json\",\n        \"streaming_url\": \"https://url:streamRawPredict\"\n    }\n}"
+          "description": "Run `PUT _inference/chat_completion/google_model_garden_meta_chat_completion` to create an inference endpoint to perform a `chat_completion` task using Meta's model hosted on Google Model Garden shared endpoint with single streaming URL provided. See the endpoint's `Sample request` page for the variable values used in the URL.",
+          "method_request": "PUT _inference/chat_completion/google_model_garden_meta_chat_completion",
+          "summary": "A chat_completion task for Google Model Garden Meta shared endpoint with single streaming URL provided",
+          "value": "{\n    \"service\": \"googlevertexai\",\n    \"service_settings\": {\n        \"provider\": \"meta\",\n        \"service_account_json\": \"service-account-json\",\n        \"streaming_url\": \"https://%LOCATION_ID%-aiplatform.googleapis.com/v1/projects/%PROJECT_ID%/locations/%LOCATION_ID%/endpoints/%ENDPOINT_ID%/chat/completions\"\n    }\n}"
         },
         "PutGoogleVertexAiRequestExample11": {
-          "description": "Run `PUT _inference/completion/google_model_garden_ai21_completion` to create an inference endpoint to perform a `completion` task using AI21's model hosted on Google Model Garden with separate URLs for streaming and non-streaming tasks.",
-          "method_request": "PUT _inference/completion/google_model_garden_ai21_completion",
-          "summary": "A completion task for Google Model Garden AI21 endpoint with separate URLs for streaming and non-streaming tasks",
-          "value": "{\n    \"service\": \"googlevertexai\",\n    \"service_settings\": {\n        \"provider\": \"ai21\",\n        \"service_account_json\": \"service-account-json\",\n        \"url\": \"https://url:rawPredict\",\n        \"streaming_url\": \"https://url:streamRawPredict\"\n    }\n}"
+          "description": "Run `PUT _inference/completion/google_model_garden_hugging_face_completion` to create an inference endpoint to perform a `completion` task using Hugging Face's model hosted on Google Model Garden dedicated endpoint with single URL provided for both streaming and non-streaming tasks. See the endpoint's `Sample request` page for the variable values used in the URL.",
+          "method_request": "PUT _inference/completion/google_model_garden_hugging_face_completion",
+          "summary": "A completion task for Google Model Garden Hugging Face dedicated endpoint with single URL provided for both streaming and non-streaming tasks",
+          "value": "{\n    \"service\": \"googlevertexai\",\n    \"service_settings\": {\n        \"provider\": \"hugging_face\",\n        \"service_account_json\": \"service-account-json\",\n        \"url\": \"https://%ENDPOINT_ID%.%LOCATION_ID%-%PROJECT_ID%.prediction.vertexai.goog/v1/projects/%PROJECT_ID%/locations/%LOCATION_ID%/endpoints/%ENDPOINT_ID%/chat/completions\"\n    }\n}"
         },
         "PutGoogleVertexAiRequestExample12": {
-          "description": "Run `PUT _inference/chat_completion/google_model_garden_ai21_chat_completion` to create an inference endpoint to perform a `chat_completion` task using AI21's model hosted on Google Model Garden with single streaming URL provided.",
-          "method_request": "PUT _inference/chat_completion/google_model_garden_ai21_chat_completion",
-          "summary": "A chat_completion task for Google Model Garden AI21 endpoint with single streaming URL provided",
-          "value": "{\n    \"service\": \"googlevertexai\",\n    \"service_settings\": {\n        \"provider\": \"ai21\",\n        \"service_account_json\": \"service-account-json\",\n        \"streaming_url\": \"https://url:streamRawPredict\"\n    }\n}"
+          "description": "Run `PUT _inference/chat_completion/google_model_garden_hugging_face_chat_completion` to create an inference endpoint to perform a `chat_completion` task using Hugging Face's model hosted on Google Model Garden dedicated endpoint with single streaming URL provided. See the endpoint's `Sample request` page for the variable values used in the URL.",
+          "method_request": "PUT _inference/chat_completion/google_model_garden_hugging_face_chat_completion",
+          "summary": "A chat_completion task for Google Model Garden Hugging Face dedicated endpoint with single streaming URL provided",
+          "value": "{\n    \"service\": \"googlevertexai\",\n    \"service_settings\": {\n        \"provider\": \"hugging_face\",\n        \"service_account_json\": \"service-account-json\",\n        \"streaming_url\": \"https://%ENDPOINT_ID%.%LOCATION_ID%-%PROJECT_ID%.prediction.vertexai.goog/v1/projects/%PROJECT_ID%/locations/%LOCATION_ID%/endpoints/%ENDPOINT_ID%/chat/completions\"\n    }\n}"
+        },
+        "PutGoogleVertexAiRequestExample13": {
+          "description": "Run `PUT _inference/completion/google_model_garden_hugging_face_completion` to create an inference endpoint to perform a `completion` task using Hugging Face's model hosted on Google Model Garden shared endpoint with single URL provided for both streaming and non-streaming tasks. See the endpoint's `Sample request` page for the variable values used in the URL.",
+          "method_request": "PUT _inference/completion/google_model_garden_hugging_face_completion",
+          "summary": "A completion task for Google Model Garden Hugging Face shared endpoint with single URL provided for both streaming and non-streaming tasks",
+          "value": "{\n    \"service\": \"googlevertexai\",\n    \"service_settings\": {\n        \"provider\": \"hugging_face\",\n        \"service_account_json\": \"service-account-json\",\n        \"url\": \"https://%LOCATION_ID%-aiplatform.googleapis.com/v1/projects/%PROJECT_ID%/locations/%LOCATION_ID%/endpoints/%ENDPOINT_ID%/chat/completions\"\n    }\n}"
+        },
+        "PutGoogleVertexAiRequestExample14": {
+          "description": "Run `PUT _inference/chat_completion/google_model_garden_hugging_face_chat_completion` to create an inference endpoint to perform a `chat_completion` task using Hugging Face's model hosted on Google Model Garden shared endpoint with single streaming URL provided. See the endpoint's `Sample request` page for the variable values used in the URL.",
+          "method_request": "PUT _inference/chat_completion/google_model_garden_hugging_face_chat_completion",
+          "summary": "A chat_completion task for Google Model Garden Hugging Face shared endpoint with single streaming URL provided",
+          "value": "{\n    \"service\": \"googlevertexai\",\n    \"service_settings\": {\n        \"provider\": \"hugging_face\",\n        \"service_account_json\": \"service-account-json\",\n        \"streaming_url\": \"https://%LOCATION_ID%-aiplatform.googleapis.com/v1/projects/%PROJECT_ID%/locations/%LOCATION_ID%/endpoints/%ENDPOINT_ID%/chat/completions\"\n    }\n}"
+        },
+        "PutGoogleVertexAiRequestExample15": {
+          "description": "Run `PUT _inference/completion/google_model_garden_mistral_completion` to create an inference endpoint to perform a `completion` task using Mistral's serverless model hosted on Google Model Garden with separate URLs for streaming and non-streaming tasks. See the Mistral model documentation for instructions on how to construct URLs.",
+          "method_request": "PUT _inference/completion/google_model_garden_mistral_completion",
+          "summary": "A completion task for Google Model Garden Mistral serverless endpoint with separate URLs for streaming and non-streaming tasks",
+          "value": "{\n    \"service\": \"googlevertexai\",\n    \"service_settings\": {\n        \"provider\": \"mistral\",\n        \"model_id\": \"mistral-small-2503\",\n        \"service_account_json\": \"service-account-json\",\n        \"url\": \"https://%LOCATION_ID%-aiplatform.googleapis.com/v1/projects/%PROJECT_ID%/locations/%LOCATION_ID%/publishers/mistralai/models/%MODEL_ID%:rawPredict\",\n        \"streaming_url\": \"https://%LOCATION_ID%-aiplatform.googleapis.com/v1/projects/%PROJECT_ID%/locations/%LOCATION_ID%/publishers/mistralai/models/%MODEL_ID%:streamRawPredict\"\n    }\n}"
+        },
+        "PutGoogleVertexAiRequestExample16": {
+          "description": "Run `PUT _inference/chat_completion/google_model_garden_mistral_chat_completion` to create an inference endpoint to perform a `chat_completion` task using Mistral's serverless model hosted on Google Model Garden with single streaming URL provided. See the Mistral model documentation for instructions on how to construct the URL.",
+          "method_request": "PUT _inference/chat_completion/google_model_garden_mistral_chat_completion",
+          "summary": "A chat_completion task for Google Model Garden Mistral serverless endpoint with single streaming URL provided",
+          "value": "{\n    \"service\": \"googlevertexai\",\n    \"service_settings\": {\n        \"provider\": \"mistral\",\n        \"model_id\": \"mistral-small-2503\",\n        \"service_account_json\": \"service-account-json\",\n        \"streaming_url\": \"https://%LOCATION_ID%-aiplatform.googleapis.com/v1/projects/%PROJECT_ID%/locations/%LOCATION_ID%/publishers/mistralai/models/%MODEL_ID%:streamRawPredict\"\n    }\n}"
+        },
+        "PutGoogleVertexAiRequestExample17": {
+          "description": "Run `PUT _inference/completion/google_model_garden_mistral_completion` to create an inference endpoint to perform a `completion` task using Mistral's model hosted on Google Model Garden dedicated endpoint with single URL provided for both streaming and non-streaming tasks. See the endpoint's `Sample request` page for the variable values used in the URL.",
+          "method_request": "PUT _inference/completion/google_model_garden_mistral_completion",
+          "summary": "A completion task for Google Model Garden Mistral dedicated endpoint with single URL provided for both streaming and non-streaming tasks",
+          "value": "{\n    \"service\": \"googlevertexai\",\n    \"service_settings\": {\n        \"provider\": \"mistral\",\n        \"service_account_json\": \"service-account-json\",\n        \"url\": \"https://%ENDPOINT_ID%.%LOCATION_ID%-%PROJECT_ID%.prediction.vertexai.goog/v1/projects/%PROJECT_ID%/locations/%LOCATION_ID%/endpoints/%ENDPOINT_ID%/chat/completions\"\n    }\n}"
+        },
+        "PutGoogleVertexAiRequestExample18": {
+          "description": "Run `PUT _inference/chat_completion/google_model_garden_mistral_chat_completion` to create an inference endpoint to perform a `chat_completion` task using Mistral's model hosted on Google Model Garden dedicated endpoint with single streaming URL provided. See the endpoint's `Sample request` page for the variable values used in the URL.",
+          "method_request": "PUT _inference/chat_completion/google_model_garden_mistral_chat_completion",
+          "summary": "A chat_completion task for Google Model Garden Mistral dedicated endpoint with single streaming URL provided",
+          "value": "{\n    \"service\": \"googlevertexai\",\n    \"service_settings\": {\n        \"provider\": \"mistral\",\n        \"service_account_json\": \"service-account-json\",\n        \"streaming_url\": \"https://%ENDPOINT_ID%.%LOCATION_ID%-%PROJECT_ID%.prediction.vertexai.goog/v1/projects/%PROJECT_ID%/locations/%LOCATION_ID%/endpoints/%ENDPOINT_ID%/chat/completions\"\n    }\n}"
+        },
+        "PutGoogleVertexAiRequestExample19": {
+          "description": "Run `PUT _inference/completion/google_model_garden_mistral_completion` to create an inference endpoint to perform a `completion` task using Mistral's model hosted on Google Model Garden shared endpoint with single URL provided for both streaming and non-streaming tasks. See the endpoint's `Sample request` page for the variable values used in the URL.",
+          "method_request": "PUT _inference/completion/google_model_garden_mistral_completion",
+          "summary": "A completion task for Google Model Garden Mistral shared endpoint with single URL provided for both streaming and non-streaming tasks",
+          "value": "{\n    \"service\": \"googlevertexai\",\n    \"service_settings\": {\n        \"provider\": \"mistral\",\n        \"service_account_json\": \"service-account-json\",\n        \"url\": \"https://%LOCATION_ID%-aiplatform.googleapis.com/v1/projects/%PROJECT_ID%/locations/%LOCATION_ID%/endpoints/%ENDPOINT_ID%/chat/completions\"\n    }\n}"
         },
         "PutGoogleVertexAiRequestExample2": {
           "alternatives": [
@@ -180641,47 +180683,65 @@
           "summary": "A rerank task",
           "value": "{\n    \"service\": \"googlevertexai\",\n    \"service_settings\": {\n        \"service_account_json\": \"service-account-json\",\n        \"project_id\": \"project-id\"\n    }\n}"
         },
+        "PutGoogleVertexAiRequestExample20": {
+          "description": "Run `PUT _inference/chat_completion/google_model_garden_mistral_chat_completion` to create an inference endpoint to perform a `chat_completion` task using Mistral's model hosted on Google Model Garden shared endpoint with single streaming URL provided. See the endpoint's `Sample request` page for the variable values used in the URL.",
+          "method_request": "PUT _inference/chat_completion/google_model_garden_mistral_chat_completion",
+          "summary": "A chat_completion task for Google Model Garden Mistral shared endpoint with single streaming URL provided",
+          "value": "{\n    \"service\": \"googlevertexai\",\n    \"service_settings\": {\n        \"provider\": \"mistral\",\n        \"service_account_json\": \"service-account-json\",\n        \"streaming_url\": \"https://%LOCATION_ID%-aiplatform.googleapis.com/v1/projects/%PROJECT_ID%/locations/%LOCATION_ID%/endpoints/%ENDPOINT_ID%/chat/completions\"\n    }\n}"
+        },
+        "PutGoogleVertexAiRequestExample21": {
+          "description": "Run `PUT _inference/completion/google_model_garden_ai21_completion` to create an inference endpoint to perform a `completion` task using AI21's model hosted on Google Model Garden serverless endpoint with separate URLs for streaming and non-streaming tasks. See the AI21 model documentation for instructions on how to construct URLs.",
+          "method_request": "PUT _inference/completion/google_model_garden_ai21_completion",
+          "summary": "A completion task for Google Model Garden AI21 serverless endpoint with separate URLs for streaming and non-streaming tasks",
+          "value": "{\n    \"service\": \"googlevertexai\",\n    \"service_settings\": {\n        \"provider\": \"ai21\",\n        \"service_account_json\": \"service-account-json\",\n        \"url\": \"https://%LOCATION_ID%-aiplatform.googleapis.com/v1/projects/%PROJECT_ID%/locations/%LOCATION_ID%/publishers/ai21/models/%MODEL_ID%:rawPredict\",\n        \"streaming_url\": \"https://%LOCATION_ID%-aiplatform.googleapis.com/v1/projects/%PROJECT_ID%/locations/%LOCATION_ID%/publishers/ai21/models/%MODEL_ID%:streamRawPredict\"\n    }\n}"
+        },
+        "PutGoogleVertexAiRequestExample22": {
+          "description": "Run `PUT _inference/chat_completion/google_model_garden_ai21_chat_completion` to create an inference endpoint to perform a `chat_completion` task using AI21's model hosted on Google Model Garden serverless endpoint with single streaming URL provided. See the AI21 model documentation for instructions on how to construct URLs.",
+          "method_request": "PUT _inference/chat_completion/google_model_garden_ai21_chat_completion",
+          "summary": "A chat_completion task for Google Model Garden AI21 serverless endpoint with single streaming URL provided",
+          "value": "{\n    \"service\": \"googlevertexai\",\n    \"service_settings\": {\n        \"provider\": \"ai21\",\n        \"service_account_json\": \"service-account-json\",\n        \"streaming_url\": \"https://%LOCATION_ID%-aiplatform.googleapis.com/v1/projects/%PROJECT_ID%/locations/%LOCATION_ID%/publishers/ai21/models/%MODEL_ID%:streamRawPredict\"\n    }\n}"
+        },
         "PutGoogleVertexAiRequestExample3": {
-          "description": "Run `PUT _inference/completion/google_model_garden_anthropic_completion` to create an inference endpoint to perform a `completion` task using Anthropic's model hosted on Google Model Garden with separate URLs for streaming and non-streaming tasks.",
+          "description": "Run `PUT _inference/completion/google_model_garden_anthropic_completion` to create an inference endpoint to perform a `completion` task using Anthropic's serverless model hosted on Google Model Garden with separate URLs for streaming and non-streaming tasks. See the Anthropic model documentation for instructions on how to construct URLs.",
           "method_request": "PUT _inference/completion/google_model_garden_anthropic_completion",
-          "summary": "A completion task for Google Model Garden Anthropic endpoint with separate URLs for streaming and non-streaming tasks",
-          "value": "{\n    \"service\": \"googlevertexai\",\n    \"service_settings\": {\n        \"provider\": \"anthropic\",\n        \"service_account_json\": \"service-account-json\",\n        \"url\": \"https://url:rawPredict\",\n        \"streaming_url\": \"https://streaming_url:streamRawPredict\"\n    },\n    \"task_settings\": {\n        \"max_tokens\": 128\n    }\n}"
+          "summary": "A completion task for Google Model Garden Anthropic serverless endpoint with separate URLs for streaming and non-streaming tasks",
+          "value": "{\n    \"service\": \"googlevertexai\",\n    \"service_settings\": {\n        \"provider\": \"anthropic\",\n        \"service_account_json\": \"service-account-json\",\n        \"url\": \"https://%LOCATION_ID%-aiplatform.googleapis.com/v1/projects/%PROJECT_ID%/locations/%LOCATION_ID%/publishers/anthropic/models/%MODEL_ID%:rawPredict\",\n        \"streaming_url\": \"https://%LOCATION_ID%-aiplatform.googleapis.com/v1/projects/%PROJECT_ID%/locations/%LOCATION_ID%/publishers/anthropic/models/%MODEL_ID%:streamRawPredict\"\n    },\n    \"task_settings\": {\n        \"max_tokens\": 128\n    }\n}"
         },
         "PutGoogleVertexAiRequestExample4": {
-          "description": "Run `PUT _inference/chat_completion/google_model_garden_anthropic_chat_completion` to create an inference endpoint to perform a `chat_completion` task using Anthropic's model hosted on Google Model Garden with single streaming URL provided.",
+          "description": "Run `PUT _inference/chat_completion/google_model_garden_anthropic_chat_completion` to create an inference endpoint to perform a `chat_completion` task using Anthropic's serverless model hosted on Google Model Garden with single streaming URL provided. See the Anthropic model documentation for instructions on how to construct the URL.",
           "method_request": "PUT _inference/chat_completion/google_model_garden_anthropic_chat_completion",
-          "summary": "A chat_completion task for Google Model Garden Anthropic endpoint with single streaming URL provided",
-          "value": "{\n    \"service\": \"googlevertexai\",\n    \"service_settings\": {\n        \"provider\": \"anthropic\",\n        \"service_account_json\": \"service-account-json\",\n        \"streaming_url\": \"https://streaming_url:streamRawPredict\"\n    },\n    \"task_settings\": {\n        \"max_tokens\": 128\n    }\n}"
+          "summary": "A chat_completion task for Google Model Garden Anthropic serverless endpoint with single streaming URL provided",
+          "value": "{\n    \"service\": \"googlevertexai\",\n    \"service_settings\": {\n        \"provider\": \"anthropic\",\n        \"service_account_json\": \"service-account-json\",\n        \"streaming_url\": \"https://%LOCATION_ID%-aiplatform.googleapis.com/v1/projects/%PROJECT_ID%/locations/%LOCATION_ID%/publishers/anthropic/models/%MODEL_ID%:streamRawPredict\"\n    },\n    \"task_settings\": {\n        \"max_tokens\": 128\n    }\n}"
         },
         "PutGoogleVertexAiRequestExample5": {
-          "description": "Run `PUT _inference/completion/google_model_garden_meta_completion` to create an inference endpoint to perform a `completion` task using Meta's model hosted on Google Model Garden with single URL provided for both streaming and non-streaming tasks.",
+          "description": "Run `PUT _inference/completion/google_model_garden_meta_completion` to create an inference endpoint to perform a `completion` task using Meta's serverless model hosted on Google Model Garden with single URL provided for both streaming and non-streaming tasks. See the Meta model documentation for instructions on how to construct the URL.",
           "method_request": "PUT _inference/completion/google_model_garden_meta_completion",
-          "summary": "A completion task for Google Model Garden Meta endpoint with single URL provided for both streaming and non-streaming tasks",
-          "value": "{\n    \"service\": \"googlevertexai\",\n    \"service_settings\": {\n        \"provider\": \"meta\",\n        \"model_id\": \"meta/llama-3.3-70b-instruct-maas\",\n        \"service_account_json\": \"service-account-json\",\n        \"url\": \"https://url/openapi/chat/completions\"\n    }\n}"
+          "summary": "A completion task for Google Model Garden Meta serverless endpoint with single URL provided for both streaming and non-streaming tasks",
+          "value": "{\n    \"service\": \"googlevertexai\",\n    \"service_settings\": {\n        \"provider\": \"meta\",\n        \"model_id\": \"meta/llama-3.3-70b-instruct-maas\",\n        \"service_account_json\": \"service-account-json\",\n        \"url\": \"https://%LOCATION_ID%-aiplatform.googleapis.com/v1/projects/%PROJECT_ID%/locations/%LOCATION_ID%/endpoints/openapi/chat/completions\"\n    }\n}"
         },
         "PutGoogleVertexAiRequestExample6": {
-          "description": "Run `PUT _inference/chat_completion/google_model_garden_meta_chat_completion` to create an inference endpoint to perform a `chat_completion` task using Meta's model hosted on Google Model Garden with single streaming URL provided.",
+          "description": "Run `PUT _inference/chat_completion/google_model_garden_meta_chat_completion` to create an inference endpoint to perform a `chat_completion` task using Meta's serverless model hosted on Google Model Garden with single streaming URL provided. See the Meta model documentation for instructions on how to construct the URL.",
           "method_request": "PUT _inference/chat_completion/google_model_garden_meta_chat_completion",
-          "summary": "A chat_completion task for Google Model Garden Meta endpoint with single streaming URL provided",
-          "value": "{\n    \"service\": \"googlevertexai\",\n    \"service_settings\": {\n        \"provider\": \"meta\",\n        \"model_id\": \"meta/llama-3.3-70b-instruct-maas\",\n        \"service_account_json\": \"service-account-json\",\n        \"streaming_url\": \"https://url/openapi/chat/completions\"\n    }\n}"
+          "summary": "A chat_completion task for Google Model Garden Meta serverless endpoint with single streaming URL provided",
+          "value": "{\n    \"service\": \"googlevertexai\",\n    \"service_settings\": {\n        \"provider\": \"meta\",\n        \"model_id\": \"meta/llama-3.3-70b-instruct-maas\",\n        \"service_account_json\": \"service-account-json\",\n        \"streaming_url\": \"https://%LOCATION_ID%-aiplatform.googleapis.com/v1/projects/%PROJECT_ID%/locations/%LOCATION_ID%/endpoints/openapi/chat/completions\"\n    }\n}"
         },
         "PutGoogleVertexAiRequestExample7": {
-          "description": "Run `PUT _inference/completion/google_model_garden_hugging_face_completion` to create an inference endpoint to perform a `completion` task using Hugging Face's model hosted on Google Model Garden with single URL provided for both streaming and non-streaming tasks.",
-          "method_request": "PUT _inference/completion/google_model_garden_hugging_face_completion",
-          "summary": "A completion task for Google Model Garden Hugging Face endpoint with single URL provided for both streaming and non-streaming tasks",
-          "value": "{\n    \"service\": \"googlevertexai\",\n    \"service_settings\": {\n        \"provider\": \"hugging_face\",\n        \"service_account_json\": \"service-account-json\",\n        \"url\": \"https://url/chat/completions\"\n    }\n}"
+          "description": "Run `PUT _inference/completion/google_model_garden_meta_completion` to create an inference endpoint to perform a `completion` task using Meta's model hosted on Google Model Garden dedicated endpoint with single URL provided for both streaming and non-streaming tasks. See the endpoint's `Sample request` page for the variable values used in the URL.",
+          "method_request": "PUT _inference/completion/google_model_garden_meta_completion",
+          "summary": "A completion task for Google Model Garden Meta dedicated endpoint with single URL provided for both streaming and non-streaming tasks",
+          "value": "{\n    \"service\": \"googlevertexai\",\n    \"service_settings\": {\n        \"provider\": \"meta\",\n        \"service_account_json\": \"service-account-json\",\n        \"url\": \"https://%ENDPOINT_ID%.%LOCATION_ID%-fasttryout.prediction.vertexai.goog/v1/projects/%PROJECT_ID%/locations/%LOCATION_ID%/endpoints/%ENDPOINT_ID%/chat/completions\"\n    }\n}"
         },
         "PutGoogleVertexAiRequestExample8": {
-          "description": "Run `PUT _inference/chat_completion/google_model_garden_hugging_face_chat_completion` to create an inference endpoint to perform a `chat_completion` task using Hugging Face's model hosted on Google Model Garden with single streaming URL provided.",
-          "method_request": "PUT _inference/chat_completion/google_model_garden_hugging_face_chat_completion",
-          "summary": "A chat_completion task for Google Model Garden Hugging Face endpoint with single streaming URL provided",
-          "value": "{\n    \"service\": \"googlevertexai\",\n    \"service_settings\": {\n        \"provider\": \"hugging_face\",\n        \"service_account_json\": \"service-account-json\",\n        \"streaming_url\": \"https://url/chat/completions\"\n    }\n}"
+          "description": "Run `PUT _inference/chat_completion/google_model_garden_meta_chat_completion` to create an inference endpoint to perform a `chat_completion` task using Meta's model hosted on Google Model Garden dedicated endpoint with single streaming URL provided. See the endpoint's `Sample request` page for the variable values used in the URL.",
+          "method_request": "PUT _inference/chat_completion/google_model_garden_meta_chat_completion",
+          "summary": "A chat_completion task for Google Model Garden Meta dedicated endpoint with single streaming URL provided",
+          "value": "{\n    \"service\": \"googlevertexai\",\n    \"service_settings\": {\n        \"provider\": \"meta\",\n        \"service_account_json\": \"service-account-json\",\n        \"streaming_url\": \"https://%ENDPOINT_ID%.%LOCATION_ID%-fasttryout.prediction.vertexai.goog/v1/projects/%PROJECT_ID%/locations/%LOCATION_ID%/endpoints/%ENDPOINT_ID%/chat/completions\"\n    }\n}"
         },
         "PutGoogleVertexAiRequestExample9": {
-          "description": "Run `PUT _inference/completion/google_model_garden_mistral_completion` to create an inference endpoint to perform a `completion` task using Mistral's model hosted on Google Model Garden with separate URLs for streaming and non-streaming tasks.",
-          "method_request": "PUT _inference/completion/google_model_garden_mistral_completion",
-          "summary": "A completion task for Google Model Garden Mistral endpoint with separate URLs for streaming and non-streaming tasks",
-          "value": "{\n    \"service\": \"googlevertexai\",\n    \"service_settings\": {\n        \"provider\": \"mistral\",\n        \"model_id\": \"mistral-small-2503\",\n        \"service_account_json\": \"service-account-json\",\n        \"url\": \"https://url:rawPredict\",\n        \"streaming_url\": \"https://url:streamRawPredict\"\n    }\n}"
+          "description": "Run `PUT _inference/completion/google_model_garden_meta_completion` to create an inference endpoint to perform a `completion` task using Meta's model hosted on Google Model Garden shared endpoint with single URL provided for both streaming and non-streaming tasks. See the endpoint's `Sample request` page for the variable values used in the URL.",
+          "method_request": "PUT _inference/completion/google_model_garden_meta_completion",
+          "summary": "A completion task for Google Model Garden Meta shared endpoint with single URL provided for both streaming and non-streaming tasks",
+          "value": "{\n    \"service\": \"googlevertexai\",\n    \"service_settings\": {\n        \"provider\": \"meta\",\n        \"service_account_json\": \"service-account-json\",\n        \"url\": \"https://%LOCATION_ID%-aiplatform.googleapis.com/v1/projects/%PROJECT_ID%/locations/%LOCATION_ID%/endpoints/%ENDPOINT_ID%/chat/completions\"\n    }\n}"
         }
       },
       "inherits": {
diff --git a/specification/inference/_types/CommonTypes.ts b/specification/inference/_types/CommonTypes.ts
index d404b62dc0..6dc52f2cef 100644
--- a/specification/inference/_types/CommonTypes.ts
+++ b/specification/inference/_types/CommonTypes.ts
@@ -1446,6 +1446,7 @@ export class GoogleVertexAIServiceSettings {
    * If `provider` is not provided or set to `google` (Google Vertex AI), do not set `url` (or `streaming_url`).
    * At least one of `url` or `streaming_url` must be provided for Google Model Garden endpoint usage.
    * Certain providers require separate URLs for streaming and non-streaming operations (e.g., Anthropic, Mistral, AI21). Others support both operation types through a single URL (e.g., Meta, Hugging Face).
+   * Information on constructing the URL for various providers can be found in the Google Model Garden documentation for the model, or on the endpoint’s `Sample request` page. The request examples also illustrate the proper formatting for the `url`.
    */
   url?: string
   /**
@@ -1455,6 +1456,7 @@ export class GoogleVertexAIServiceSettings {
    * If `provider` is not provided or set to `google` (Google Vertex AI), do not set `streaming_url` (or `url`).
    * At least one of `streaming_url` or `url` must be provided for Google Model Garden endpoint usage.
    * Certain providers require separate URLs for streaming and non-streaming operations (e.g., Anthropic, Mistral, AI21). Others support both operation types through a single URL (e.g., Meta, Hugging Face).
+   * Information on constructing the URL for various providers can be found in the Google Model Garden documentation for the model, or on the endpoint’s `Sample request` page. The request examples also illustrate the proper formatting for the `streaming_url`.
    */
   streaming_url?: string
   /**
diff --git a/specification/inference/put_googlevertexai/examples/request/PutGoogleVertexAiRequestExample10.yaml b/specification/inference/put_googlevertexai/examples/request/PutGoogleVertexAiRequestExample10.yaml
index ad2ed0183a..cd0e0cccea 100644
--- a/specification/inference/put_googlevertexai/examples/request/PutGoogleVertexAiRequestExample10.yaml
+++ b/specification/inference/put_googlevertexai/examples/request/PutGoogleVertexAiRequestExample10.yaml
@@ -1,14 +1,13 @@
-summary: A chat_completion task for Google Model Garden Mistral endpoint with single streaming URL provided
-description: Run `PUT _inference/chat_completion/google_model_garden_mistral_chat_completion` to create an inference endpoint to perform a `chat_completion` task using Mistral's model hosted on Google Model Garden with single streaming URL provided.
-method_request: 'PUT _inference/chat_completion/google_model_garden_mistral_chat_completion'
+summary: A chat_completion task for Google Model Garden Meta shared endpoint with single streaming URL provided
+description: Run `PUT _inference/chat_completion/google_model_garden_meta_chat_completion` to create an inference endpoint to perform a `chat_completion` task using Meta's model hosted on Google Model Garden shared endpoint with single streaming URL provided. See the endpoint's `Sample request` page for the variable values used in the URL.
+method_request: 'PUT _inference/chat_completion/google_model_garden_meta_chat_completion'
 # type: "request"
 value: |-
   {
       "service": "googlevertexai",
       "service_settings": {
-          "provider": "mistral",
-          "model_id": "mistral-small-2503",
+          "provider": "meta",
           "service_account_json": "service-account-json",
-          "streaming_url": "https://url:streamRawPredict"
+          "streaming_url": "https://%LOCATION_ID%-aiplatform.googleapis.com/v1/projects/%PROJECT_ID%/locations/%LOCATION_ID%/endpoints/%ENDPOINT_ID%/chat/completions"
       }
   }
diff --git a/specification/inference/put_googlevertexai/examples/request/PutGoogleVertexAiRequestExample11.yaml b/specification/inference/put_googlevertexai/examples/request/PutGoogleVertexAiRequestExample11.yaml
index 00a076c727..9faa3c3b5d 100644
--- a/specification/inference/put_googlevertexai/examples/request/PutGoogleVertexAiRequestExample11.yaml
+++ b/specification/inference/put_googlevertexai/examples/request/PutGoogleVertexAiRequestExample11.yaml
@@ -1,14 +1,13 @@
-summary: A completion task for Google Model Garden AI21 endpoint with separate URLs for streaming and non-streaming tasks
-description: Run `PUT _inference/completion/google_model_garden_ai21_completion` to create an inference endpoint to perform a `completion` task using AI21's model hosted on Google Model Garden with separate URLs for streaming and non-streaming tasks.
-method_request: 'PUT _inference/completion/google_model_garden_ai21_completion'
+summary: A completion task for Google Model Garden Hugging Face dedicated endpoint with single URL provided for both streaming and non-streaming tasks
+description: Run `PUT _inference/completion/google_model_garden_hugging_face_completion` to create an inference endpoint to perform a `completion` task using Hugging Face's model hosted on Google Model Garden dedicated endpoint with single URL provided for both streaming and non-streaming tasks. See the endpoint's `Sample request` page for the variable values used in the URL.
+method_request: 'PUT _inference/completion/google_model_garden_hugging_face_completion'
 # type: "request"
 value: |-
   {
       "service": "googlevertexai",
       "service_settings": {
-          "provider": "ai21",
+          "provider": "hugging_face",
           "service_account_json": "service-account-json",
-          "url": "https://url:rawPredict",
-          "streaming_url": "https://url:streamRawPredict"
+          "url": "https://%ENDPOINT_ID%.%LOCATION_ID%-%PROJECT_ID%.prediction.vertexai.goog/v1/projects/%PROJECT_ID%/locations/%LOCATION_ID%/endpoints/%ENDPOINT_ID%/chat/completions"
       }
   }
diff --git a/specification/inference/put_googlevertexai/examples/request/PutGoogleVertexAiRequestExample12.yaml b/specification/inference/put_googlevertexai/examples/request/PutGoogleVertexAiRequestExample12.yaml
index ceadc43079..f766cb7abb 100644
--- a/specification/inference/put_googlevertexai/examples/request/PutGoogleVertexAiRequestExample12.yaml
+++ b/specification/inference/put_googlevertexai/examples/request/PutGoogleVertexAiRequestExample12.yaml
@@ -1,13 +1,13 @@
-summary: A chat_completion task for Google Model Garden AI21 endpoint with single streaming URL provided
-description: Run `PUT _inference/chat_completion/google_model_garden_ai21_chat_completion` to create an inference endpoint to perform a `chat_completion` task using AI21's model hosted on Google Model Garden with single streaming URL provided.
-method_request: 'PUT _inference/chat_completion/google_model_garden_ai21_chat_completion'
+summary: A chat_completion task for Google Model Garden Hugging Face dedicated endpoint with single streaming URL provided
+description: Run `PUT _inference/chat_completion/google_model_garden_hugging_face_chat_completion` to create an inference endpoint to perform a `chat_completion` task using Hugging Face's model hosted on Google Model Garden dedicated endpoint with single streaming URL provided. See the endpoint's `Sample request` page for the variable values used in the URL.
+method_request: 'PUT _inference/chat_completion/google_model_garden_hugging_face_chat_completion'
 # type: "request"
 value: |-
   {
       "service": "googlevertexai",
       "service_settings": {
-          "provider": "ai21",
+          "provider": "hugging_face",
           "service_account_json": "service-account-json",
-          "streaming_url": "https://url:streamRawPredict"
+          "streaming_url": "https://%ENDPOINT_ID%.%LOCATION_ID%-%PROJECT_ID%.prediction.vertexai.goog/v1/projects/%PROJECT_ID%/locations/%LOCATION_ID%/endpoints/%ENDPOINT_ID%/chat/completions"
       }
   }
diff --git a/specification/inference/put_googlevertexai/examples/request/PutGoogleVertexAiRequestExample13.yaml b/specification/inference/put_googlevertexai/examples/request/PutGoogleVertexAiRequestExample13.yaml
new file mode 100644
index 0000000000..e6e690f9bd
--- /dev/null
+++ b/specification/inference/put_googlevertexai/examples/request/PutGoogleVertexAiRequestExample13.yaml
@@ -0,0 +1,13 @@
+summary: A completion task for Google Model Garden Hugging Face shared endpoint with single URL provided for both streaming and non-streaming tasks
+description: Run `PUT _inference/completion/google_model_garden_hugging_face_completion` to create an inference endpoint to perform a `completion` task using Hugging Face's model hosted on Google Model Garden shared endpoint with single URL provided for both streaming and non-streaming tasks. See the endpoint's `Sample request` page for the variable values used in the URL.
+method_request: 'PUT _inference/completion/google_model_garden_hugging_face_completion'
+# type: "request"
+value: |-
+  {
+      "service": "googlevertexai",
+      "service_settings": {
+          "provider": "hugging_face",
+          "service_account_json": "service-account-json",
+          "url": "https://%LOCATION_ID%-aiplatform.googleapis.com/v1/projects/%PROJECT_ID%/locations/%LOCATION_ID%/endpoints/%ENDPOINT_ID%/chat/completions"
+      }
+  }
diff --git a/specification/inference/put_googlevertexai/examples/request/PutGoogleVertexAiRequestExample14.yaml b/specification/inference/put_googlevertexai/examples/request/PutGoogleVertexAiRequestExample14.yaml
new file mode 100644
index 0000000000..19de6d9efb
--- /dev/null
+++ b/specification/inference/put_googlevertexai/examples/request/PutGoogleVertexAiRequestExample14.yaml
@@ -0,0 +1,13 @@
+summary: A chat_completion task for Google Model Garden Hugging Face shared endpoint with single streaming URL provided
+description: Run `PUT _inference/chat_completion/google_model_garden_hugging_face_chat_completion` to create an inference endpoint to perform a `chat_completion` task using Hugging Face's model hosted on Google Model Garden shared endpoint with single streaming URL provided. See the endpoint's `Sample request` page for the variable values used in the URL.
+method_request: 'PUT _inference/chat_completion/google_model_garden_hugging_face_chat_completion'
+# type: "request"
+value: |-
+  {
+      "service": "googlevertexai",
+      "service_settings": {
+          "provider": "hugging_face",
+          "service_account_json": "service-account-json",
+          "streaming_url": "https://%LOCATION_ID%-aiplatform.googleapis.com/v1/projects/%PROJECT_ID%/locations/%LOCATION_ID%/endpoints/%ENDPOINT_ID%/chat/completions"
+      }
+  }
diff --git a/specification/inference/put_googlevertexai/examples/request/PutGoogleVertexAiRequestExample15.yaml b/specification/inference/put_googlevertexai/examples/request/PutGoogleVertexAiRequestExample15.yaml
new file mode 100644
index 0000000000..b49fd214db
--- /dev/null
+++ b/specification/inference/put_googlevertexai/examples/request/PutGoogleVertexAiRequestExample15.yaml
@@ -0,0 +1,15 @@
+summary: A completion task for Google Model Garden Mistral serverless endpoint with separate URLs for streaming and non-streaming tasks
+description: Run `PUT _inference/completion/google_model_garden_mistral_completion` to create an inference endpoint to perform a `completion` task using Mistral's serverless model hosted on Google Model Garden with separate URLs for streaming and non-streaming tasks. See the Mistral model documentation for instructions on how to construct URLs.
+method_request: 'PUT _inference/completion/google_model_garden_mistral_completion'
+# type: "request"
+value: |-
+  {
+      "service": "googlevertexai",
+      "service_settings": {
+          "provider": "mistral",
+          "model_id": "mistral-small-2503",
+          "service_account_json": "service-account-json",
+          "url": "https://%LOCATION_ID%-aiplatform.googleapis.com/v1/projects/%PROJECT_ID%/locations/%LOCATION_ID%/publishers/mistralai/models/%MODEL_ID%:rawPredict",
+          "streaming_url": "https://%LOCATION_ID%-aiplatform.googleapis.com/v1/projects/%PROJECT_ID%/locations/%LOCATION_ID%/publishers/mistralai/models/%MODEL_ID%:streamRawPredict"
+      }
+  }
diff --git a/specification/inference/put_googlevertexai/examples/request/PutGoogleVertexAiRequestExample16.yaml b/specification/inference/put_googlevertexai/examples/request/PutGoogleVertexAiRequestExample16.yaml
new file mode 100644
index 0000000000..fcaba9ff26
--- /dev/null
+++ b/specification/inference/put_googlevertexai/examples/request/PutGoogleVertexAiRequestExample16.yaml
@@ -0,0 +1,14 @@
+summary: A chat_completion task for Google Model Garden Mistral serverless endpoint with single streaming URL provided
+description: Run `PUT _inference/chat_completion/google_model_garden_mistral_chat_completion` to create an inference endpoint to perform a `chat_completion` task using Mistral's serverless model hosted on Google Model Garden with single streaming URL provided. See the Mistral model documentation for instructions on how to construct the URL.
+method_request: 'PUT _inference/chat_completion/google_model_garden_mistral_chat_completion'
+# type: "request"
+value: |-
+  {
+      "service": "googlevertexai",
+      "service_settings": {
+          "provider": "mistral",
+          "model_id": "mistral-small-2503",
+          "service_account_json": "service-account-json",
+          "streaming_url": "https://%LOCATION_ID%-aiplatform.googleapis.com/v1/projects/%PROJECT_ID%/locations/%LOCATION_ID%/publishers/mistralai/models/%MODEL_ID%:streamRawPredict"
+      }
+  }
diff --git a/specification/inference/put_googlevertexai/examples/request/PutGoogleVertexAiRequestExample17.yaml b/specification/inference/put_googlevertexai/examples/request/PutGoogleVertexAiRequestExample17.yaml
new file mode 100644
index 0000000000..5463166837
--- /dev/null
+++ b/specification/inference/put_googlevertexai/examples/request/PutGoogleVertexAiRequestExample17.yaml
@@ -0,0 +1,13 @@
+summary: A completion task for Google Model Garden Mistral dedicated endpoint with single URL provided for both streaming and non-streaming tasks
+description: Run `PUT _inference/completion/google_model_garden_mistral_completion` to create an inference endpoint to perform a `completion` task using Mistral's model hosted on Google Model Garden dedicated endpoint with single URL provided for both streaming and non-streaming tasks. See the endpoint's `Sample request` page for the variable values used in the URL.
+method_request: 'PUT _inference/completion/google_model_garden_mistral_completion'
+# type: "request"
+value: |-
+  {
+      "service": "googlevertexai",
+      "service_settings": {
+          "provider": "mistral",
+          "service_account_json": "service-account-json",
+          "url": "https://%ENDPOINT_ID%.%LOCATION_ID%-%PROJECT_ID%.prediction.vertexai.goog/v1/projects/%PROJECT_ID%/locations/%LOCATION_ID%/endpoints/%ENDPOINT_ID%/chat/completions"
+      }
+  }
diff --git a/specification/inference/put_googlevertexai/examples/request/PutGoogleVertexAiRequestExample18.yaml b/specification/inference/put_googlevertexai/examples/request/PutGoogleVertexAiRequestExample18.yaml
new file mode 100644
index 0000000000..a749a47c80
--- /dev/null
+++ b/specification/inference/put_googlevertexai/examples/request/PutGoogleVertexAiRequestExample18.yaml
@@ -0,0 +1,13 @@
+summary: A chat_completion task for Google Model Garden Mistral dedicated endpoint with single streaming URL provided
+description: Run `PUT _inference/chat_completion/google_model_garden_mistral_chat_completion` to create an inference endpoint to perform a `chat_completion` task using Mistral's model hosted on Google Model Garden dedicated endpoint with single streaming URL provided. See the endpoint's `Sample request` page for the variable values used in the URL.
+method_request: 'PUT _inference/chat_completion/google_model_garden_mistral_chat_completion'
+# type: "request"
+value: |-
+  {
+      "service": "googlevertexai",
+      "service_settings": {
+          "provider": "mistral",
+          "service_account_json": "service-account-json",
+          "streaming_url": "https://%ENDPOINT_ID%.%LOCATION_ID%-%PROJECT_ID%.prediction.vertexai.goog/v1/projects/%PROJECT_ID%/locations/%LOCATION_ID%/endpoints/%ENDPOINT_ID%/chat/completions"
+      }
+  }
diff --git a/specification/inference/put_googlevertexai/examples/request/PutGoogleVertexAiRequestExample19.yaml b/specification/inference/put_googlevertexai/examples/request/PutGoogleVertexAiRequestExample19.yaml
new file mode 100644
index 0000000000..112966f9d4
--- /dev/null
+++ b/specification/inference/put_googlevertexai/examples/request/PutGoogleVertexAiRequestExample19.yaml
@@ -0,0 +1,13 @@
+summary: A completion task for Google Model Garden Mistral shared endpoint with single URL provided for both streaming and non-streaming tasks
+description: Run `PUT _inference/completion/google_model_garden_mistral_completion` to create an inference endpoint to perform a `completion` task using Mistral's model hosted on Google Model Garden shared endpoint with single URL provided for both streaming and non-streaming tasks. See the endpoint's `Sample request` page for the variable values used in the URL.
+method_request: 'PUT _inference/completion/google_model_garden_mistral_completion'
+# type: "request"
+value: |-
+  {
+      "service": "googlevertexai",
+      "service_settings": {
+          "provider": "mistral",
+          "service_account_json": "service-account-json",
+          "url": "https://%LOCATION_ID%-aiplatform.googleapis.com/v1/projects/%PROJECT_ID%/locations/%LOCATION_ID%/endpoints/%ENDPOINT_ID%/chat/completions"
+      }
+  }
diff --git a/specification/inference/put_googlevertexai/examples/request/PutGoogleVertexAiRequestExample20.yaml b/specification/inference/put_googlevertexai/examples/request/PutGoogleVertexAiRequestExample20.yaml
new file mode 100644
index 0000000000..1bdafc14ac
--- /dev/null
+++ b/specification/inference/put_googlevertexai/examples/request/PutGoogleVertexAiRequestExample20.yaml
@@ -0,0 +1,13 @@
+summary: A chat_completion task for Google Model Garden Mistral shared endpoint with single streaming URL provided
+description: Run `PUT _inference/chat_completion/google_model_garden_mistral_chat_completion` to create an inference endpoint to perform a `chat_completion` task using Mistral's model hosted on Google Model Garden shared endpoint with single streaming URL provided. See the endpoint's `Sample request` page for the variable values used in the URL.
+method_request: 'PUT _inference/chat_completion/google_model_garden_mistral_chat_completion'
+# type: "request"
+value: |-
+  {
+      "service": "googlevertexai",
+      "service_settings": {
+          "provider": "mistral",
+          "service_account_json": "service-account-json",
+          "streaming_url": "https://%LOCATION_ID%-aiplatform.googleapis.com/v1/projects/%PROJECT_ID%/locations/%LOCATION_ID%/endpoints/%ENDPOINT_ID%/chat/completions"
+      }
+  }
diff --git a/specification/inference/put_googlevertexai/examples/request/PutGoogleVertexAiRequestExample21.yaml b/specification/inference/put_googlevertexai/examples/request/PutGoogleVertexAiRequestExample21.yaml
new file mode 100644
index 0000000000..c7dc7f0b04
--- /dev/null
+++ b/specification/inference/put_googlevertexai/examples/request/PutGoogleVertexAiRequestExample21.yaml
@@ -0,0 +1,14 @@
+summary: A completion task for Google Model Garden AI21 serverless endpoint with separate URLs for streaming and non-streaming tasks
+description: Run `PUT _inference/completion/google_model_garden_ai21_completion` to create an inference endpoint to perform a `completion` task using AI21's model hosted on Google Model Garden serverless endpoint with separate URLs for streaming and non-streaming tasks. See the AI21 model documentation for instructions on how to construct URLs.
+method_request: 'PUT _inference/completion/google_model_garden_ai21_completion'
+# type: "request"
+value: |-
+  {
+      "service": "googlevertexai",
+      "service_settings": {
+          "provider": "ai21",
+          "service_account_json": "service-account-json",
+          "url": "https://%LOCATION_ID%-aiplatform.googleapis.com/v1/projects/%PROJECT_ID%/locations/%LOCATION_ID%/publishers/ai21/models/%MODEL_ID%:rawPredict",
+          "streaming_url": "https://%LOCATION_ID%-aiplatform.googleapis.com/v1/projects/%PROJECT_ID%/locations/%LOCATION_ID%/publishers/ai21/models/%MODEL_ID%:streamRawPredict"
+      }
+  }
diff --git a/specification/inference/put_googlevertexai/examples/request/PutGoogleVertexAiRequestExample22.yaml b/specification/inference/put_googlevertexai/examples/request/PutGoogleVertexAiRequestExample22.yaml
new file mode 100644
index 0000000000..ef365f73dd
--- /dev/null
+++ b/specification/inference/put_googlevertexai/examples/request/PutGoogleVertexAiRequestExample22.yaml
@@ -0,0 +1,13 @@
+summary: A chat_completion task for Google Model Garden AI21 serverless endpoint with single streaming URL provided
+description: Run `PUT _inference/chat_completion/google_model_garden_ai21_chat_completion` to create an inference endpoint to perform a `chat_completion` task using AI21's model hosted on Google Model Garden serverless endpoint with single streaming URL provided. See the AI21 model documentation for instructions on how to construct URLs.
+method_request: 'PUT _inference/chat_completion/google_model_garden_ai21_chat_completion'
+# type: "request"
+value: |-
+  {
+      "service": "googlevertexai",
+      "service_settings": {
+          "provider": "ai21",
+          "service_account_json": "service-account-json",
+          "streaming_url": "https://%LOCATION_ID%-aiplatform.googleapis.com/v1/projects/%PROJECT_ID%/locations/%LOCATION_ID%/publishers/ai21/models/%MODEL_ID%:streamRawPredict"
+      }
+  }
diff --git a/specification/inference/put_googlevertexai/examples/request/PutGoogleVertexAiRequestExample3.yaml b/specification/inference/put_googlevertexai/examples/request/PutGoogleVertexAiRequestExample3.yaml
index 224eec318a..dd4026665d 100644
--- a/specification/inference/put_googlevertexai/examples/request/PutGoogleVertexAiRequestExample3.yaml
+++ b/specification/inference/put_googlevertexai/examples/request/PutGoogleVertexAiRequestExample3.yaml
@@ -1,5 +1,5 @@
-summary: A completion task for Google Model Garden Anthropic endpoint with separate URLs for streaming and non-streaming tasks
-description: Run `PUT _inference/completion/google_model_garden_anthropic_completion` to create an inference endpoint to perform a `completion` task using Anthropic's model hosted on Google Model Garden with separate URLs for streaming and non-streaming tasks.
+summary: A completion task for Google Model Garden Anthropic serverless endpoint with separate URLs for streaming and non-streaming tasks
+description: Run `PUT _inference/completion/google_model_garden_anthropic_completion` to create an inference endpoint to perform a `completion` task using Anthropic's serverless model hosted on Google Model Garden with separate URLs for streaming and non-streaming tasks. See the Anthropic model documentation for instructions on how to construct URLs.
 method_request: 'PUT _inference/completion/google_model_garden_anthropic_completion'
 # type: "request"
 value: |-
@@ -8,8 +8,8 @@ value: |-
       "service_settings": {
           "provider": "anthropic",
           "service_account_json": "service-account-json",
-          "url": "https://url:rawPredict",
-          "streaming_url": "https://streaming_url:streamRawPredict"
+          "url": "https://%LOCATION_ID%-aiplatform.googleapis.com/v1/projects/%PROJECT_ID%/locations/%LOCATION_ID%/publishers/anthropic/models/%MODEL_ID%:rawPredict",
+          "streaming_url": "https://%LOCATION_ID%-aiplatform.googleapis.com/v1/projects/%PROJECT_ID%/locations/%LOCATION_ID%/publishers/anthropic/models/%MODEL_ID%:streamRawPredict"
       },
       "task_settings": {
           "max_tokens": 128
diff --git a/specification/inference/put_googlevertexai/examples/request/PutGoogleVertexAiRequestExample4.yaml b/specification/inference/put_googlevertexai/examples/request/PutGoogleVertexAiRequestExample4.yaml
index a8ae043de1..058f60f3a2 100644
--- a/specification/inference/put_googlevertexai/examples/request/PutGoogleVertexAiRequestExample4.yaml
+++ b/specification/inference/put_googlevertexai/examples/request/PutGoogleVertexAiRequestExample4.yaml
@@ -1,5 +1,5 @@
-summary: A chat_completion task for Google Model Garden Anthropic endpoint with single streaming URL provided
-description: Run `PUT _inference/chat_completion/google_model_garden_anthropic_chat_completion` to create an inference endpoint to perform a `chat_completion` task using Anthropic's model hosted on Google Model Garden with single streaming URL provided.
+summary: A chat_completion task for Google Model Garden Anthropic serverless endpoint with single streaming URL provided
+description: Run `PUT _inference/chat_completion/google_model_garden_anthropic_chat_completion` to create an inference endpoint to perform a `chat_completion` task using Anthropic's serverless model hosted on Google Model Garden with single streaming URL provided. See the Anthropic model documentation for instructions on how to construct the URL.
 method_request: 'PUT _inference/chat_completion/google_model_garden_anthropic_chat_completion'
 # type: "request"
 value: |-
@@ -8,7 +8,7 @@ value: |-
       "service_settings": {
           "provider": "anthropic",
           "service_account_json": "service-account-json",
-          "streaming_url": "https://streaming_url:streamRawPredict"
+          "streaming_url": "https://%LOCATION_ID%-aiplatform.googleapis.com/v1/projects/%PROJECT_ID%/locations/%LOCATION_ID%/publishers/anthropic/models/%MODEL_ID%:streamRawPredict"
       },
       "task_settings": {
           "max_tokens": 128
diff --git a/specification/inference/put_googlevertexai/examples/request/PutGoogleVertexAiRequestExample5.yaml b/specification/inference/put_googlevertexai/examples/request/PutGoogleVertexAiRequestExample5.yaml
index 19f4ce4de5..e7c86dc760 100644
--- a/specification/inference/put_googlevertexai/examples/request/PutGoogleVertexAiRequestExample5.yaml
+++ b/specification/inference/put_googlevertexai/examples/request/PutGoogleVertexAiRequestExample5.yaml
@@ -1,5 +1,5 @@
-summary: A completion task for Google Model Garden Meta endpoint with single URL provided for both streaming and non-streaming tasks
-description: Run `PUT _inference/completion/google_model_garden_meta_completion` to create an inference endpoint to perform a `completion` task using Meta's model hosted on Google Model Garden with single URL provided for both streaming and non-streaming tasks.
+summary: A completion task for Google Model Garden Meta serverless endpoint with single URL provided for both streaming and non-streaming tasks
+description: Run `PUT _inference/completion/google_model_garden_meta_completion` to create an inference endpoint to perform a `completion` task using Meta's serverless model hosted on Google Model Garden with single URL provided for both streaming and non-streaming tasks. See the Meta model documentation for instructions on how to construct the URL.
 method_request: 'PUT _inference/completion/google_model_garden_meta_completion'
 # type: "request"
 value: |-
@@ -9,6 +9,6 @@ value: |-
           "provider": "meta",
           "model_id": "meta/llama-3.3-70b-instruct-maas",
           "service_account_json": "service-account-json",
-          "url": "https://url/openapi/chat/completions"
+          "url": "https://%LOCATION_ID%-aiplatform.googleapis.com/v1/projects/%PROJECT_ID%/locations/%LOCATION_ID%/endpoints/openapi/chat/completions"
       }
   }
diff --git a/specification/inference/put_googlevertexai/examples/request/PutGoogleVertexAiRequestExample6.yaml b/specification/inference/put_googlevertexai/examples/request/PutGoogleVertexAiRequestExample6.yaml
index 8d38a23ca5..0e3241b4ae 100644
--- a/specification/inference/put_googlevertexai/examples/request/PutGoogleVertexAiRequestExample6.yaml
+++ b/specification/inference/put_googlevertexai/examples/request/PutGoogleVertexAiRequestExample6.yaml
@@ -1,5 +1,5 @@
-summary: A chat_completion task for Google Model Garden Meta endpoint with single streaming URL provided
-description: Run `PUT _inference/chat_completion/google_model_garden_meta_chat_completion` to create an inference endpoint to perform a `chat_completion` task using Meta's model hosted on Google Model Garden with single streaming URL provided.
+summary: A chat_completion task for Google Model Garden Meta serverless endpoint with single streaming URL provided
+description: Run `PUT _inference/chat_completion/google_model_garden_meta_chat_completion` to create an inference endpoint to perform a `chat_completion` task using Meta's serverless model hosted on Google Model Garden with single streaming URL provided. See the Meta model documentation for instructions on how to construct the URL.
 method_request: 'PUT _inference/chat_completion/google_model_garden_meta_chat_completion'
 # type: "request"
 value: |-
@@ -9,6 +9,6 @@ value: |-
           "provider": "meta",
           "model_id": "meta/llama-3.3-70b-instruct-maas",
           "service_account_json": "service-account-json",
-          "streaming_url": "https://url/openapi/chat/completions"
+          "streaming_url": "https://%LOCATION_ID%-aiplatform.googleapis.com/v1/projects/%PROJECT_ID%/locations/%LOCATION_ID%/endpoints/openapi/chat/completions"
       }
   }
diff --git a/specification/inference/put_googlevertexai/examples/request/PutGoogleVertexAiRequestExample7.yaml b/specification/inference/put_googlevertexai/examples/request/PutGoogleVertexAiRequestExample7.yaml
index e503c99756..c7a68ce82a 100644
--- a/specification/inference/put_googlevertexai/examples/request/PutGoogleVertexAiRequestExample7.yaml
+++ b/specification/inference/put_googlevertexai/examples/request/PutGoogleVertexAiRequestExample7.yaml
@@ -1,13 +1,13 @@
-summary: A completion task for Google Model Garden Hugging Face endpoint with single URL provided for both streaming and non-streaming tasks
-description: Run `PUT _inference/completion/google_model_garden_hugging_face_completion` to create an inference endpoint to perform a `completion` task using Hugging Face's model hosted on Google Model Garden with single URL provided for both streaming and non-streaming tasks.
-method_request: 'PUT _inference/completion/google_model_garden_hugging_face_completion'
+summary: A completion task for Google Model Garden Meta dedicated endpoint with single URL provided for both streaming and non-streaming tasks
+description: Run `PUT _inference/completion/google_model_garden_meta_completion` to create an inference endpoint to perform a `completion` task using Meta's model hosted on Google Model Garden dedicated endpoint with single URL provided for both streaming and non-streaming tasks. See the endpoint's `Sample request` page for the variable values used in the URL.
+method_request: 'PUT _inference/completion/google_model_garden_meta_completion'
 # type: "request"
 value: |-
   {
       "service": "googlevertexai",
       "service_settings": {
-          "provider": "hugging_face",
+          "provider": "meta",
           "service_account_json": "service-account-json",
-          "url": "https://url/chat/completions"
+          "url": "https://%ENDPOINT_ID%.%LOCATION_ID%-fasttryout.prediction.vertexai.goog/v1/projects/%PROJECT_ID%/locations/%LOCATION_ID%/endpoints/%ENDPOINT_ID%/chat/completions"
       }
   }
diff --git a/specification/inference/put_googlevertexai/examples/request/PutGoogleVertexAiRequestExample8.yaml b/specification/inference/put_googlevertexai/examples/request/PutGoogleVertexAiRequestExample8.yaml
index 59c9c14010..35810d8992 100644
--- a/specification/inference/put_googlevertexai/examples/request/PutGoogleVertexAiRequestExample8.yaml
+++ b/specification/inference/put_googlevertexai/examples/request/PutGoogleVertexAiRequestExample8.yaml
@@ -1,13 +1,13 @@
-summary: A chat_completion task for Google Model Garden Hugging Face endpoint with single streaming URL provided
-description: Run `PUT _inference/chat_completion/google_model_garden_hugging_face_chat_completion` to create an inference endpoint to perform a `chat_completion` task using Hugging Face's model hosted on Google Model Garden with single streaming URL provided.
-method_request: 'PUT _inference/chat_completion/google_model_garden_hugging_face_chat_completion'
+summary: A chat_completion task for Google Model Garden Meta dedicated endpoint with single streaming URL provided
+description: Run `PUT _inference/chat_completion/google_model_garden_meta_chat_completion` to create an inference endpoint to perform a `chat_completion` task using Meta's model hosted on Google Model Garden dedicated endpoint with single streaming URL provided. See the endpoint's `Sample request` page for the variable values used in the URL.
+method_request: 'PUT _inference/chat_completion/google_model_garden_meta_chat_completion'
 # type: "request"
 value: |-
   {
       "service": "googlevertexai",
       "service_settings": {
-          "provider": "hugging_face",
+          "provider": "meta",
           "service_account_json": "service-account-json",
-          "streaming_url": "https://url/chat/completions"
+          "streaming_url": "https://%ENDPOINT_ID%.%LOCATION_ID%-fasttryout.prediction.vertexai.goog/v1/projects/%PROJECT_ID%/locations/%LOCATION_ID%/endpoints/%ENDPOINT_ID%/chat/completions"
       }
   }
diff --git a/specification/inference/put_googlevertexai/examples/request/PutGoogleVertexAiRequestExample9.yaml b/specification/inference/put_googlevertexai/examples/request/PutGoogleVertexAiRequestExample9.yaml
index 17c69126b5..682fa174a7 100644
--- a/specification/inference/put_googlevertexai/examples/request/PutGoogleVertexAiRequestExample9.yaml
+++ b/specification/inference/put_googlevertexai/examples/request/PutGoogleVertexAiRequestExample9.yaml
@@ -1,15 +1,13 @@
-summary: A completion task for Google Model Garden Mistral endpoint with separate URLs for streaming and non-streaming tasks
-description: Run `PUT _inference/completion/google_model_garden_mistral_completion` to create an inference endpoint to perform a `completion` task using Mistral's model hosted on Google Model Garden with separate URLs for streaming and non-streaming tasks.
-method_request: 'PUT _inference/completion/google_model_garden_mistral_completion'
+summary: A completion task for Google Model Garden Meta shared endpoint with single URL provided for both streaming and non-streaming tasks
+description: Run `PUT _inference/completion/google_model_garden_meta_completion` to create an inference endpoint to perform a `completion` task using Meta's model hosted on Google Model Garden shared endpoint with single URL provided for both streaming and non-streaming tasks. See the endpoint's `Sample request` page for the variable values used in the URL.
+method_request: 'PUT _inference/completion/google_model_garden_meta_completion'
 # type: "request"
 value: |-
   {
       "service": "googlevertexai",
       "service_settings": {
-          "provider": "mistral",
-          "model_id": "mistral-small-2503",
+          "provider": "meta",
           "service_account_json": "service-account-json",
-          "url": "https://url:rawPredict",
-          "streaming_url": "https://url:streamRawPredict"
+          "url": "https://%LOCATION_ID%-aiplatform.googleapis.com/v1/projects/%PROJECT_ID%/locations/%LOCATION_ID%/endpoints/%ENDPOINT_ID%/chat/completions"
       }
   }