diff --git a/specification/ai/ModelInference/examples/2025-04-01/GetChatCompletions_AudioModality_Gen.json b/specification/ai/ModelInference/examples/2025-04-01/GetChatCompletions_AudioModality_Gen.json
deleted file mode 100644
index 01ebe10c8908..000000000000
--- a/specification/ai/ModelInference/examples/2025-04-01/GetChatCompletions_AudioModality_Gen.json
+++ /dev/null
@@ -1,99 +0,0 @@
-{
-  "title": "Audio modality chat completion",
-  "operationId": "GetChatCompletions",
-  "parameters": {
-    "api-version": "2025-04-01",
-    "extra-parameters": "error",
-    "body": {
-      "modalities": [
-        "text",
-        "audio"
-      ],
-      "messages": [
-        {
-          "role": "system",
-          "content": "You are a helpful assistant"
-        },
-        {
-          "role": "user",
-          "content": [
-            {
-              "type": "input_audio",
-              "input_audio": {
-                "data": "<base64 encoded audio data>",
-                "format": "wav"
-              }
-            }
-          ]
-        },
-        {
-          "role": "assistant",
-          "content": null,
-          "audio": {
-            "id": "abcdef1234"
-          }
-        },
-        {
-          "role": "user",
-          "content": [
-            {
-              "type": "input_audio",
-              "input_audio": {
-                "data": "<base64 encoded audio data>",
-                "format": "wav"
-              }
-            }
-          ]
-        }
-      ],
-      "frequency_penalty": 0,
-      "presence_penalty": 0,
-      "temperature": 0,
-      "top_p": 0,
-      "seed": 21,
-      "model": "my-model-name"
-    }
-  },
-  "responses": {
-    "200": {
-      "body": {
-        "id": "kgousajxgzyhugvqekuswuqbk",
-        "object": "chat.completion",
-        "created": 1696522361,
-        "model": "my-model-name",
-        "usage": {
-          "completion_tokens": 19,
-          "prompt_tokens": 28,
-          "total_tokens": 16,
-          "completion_tokens_details": {
-            "audio_tokens": 5,
-            "reasoning_tokens": 0,
-            "total_tokens": 5
-          },
-          "prompt_tokens_details": {
-            "audio_tokens": 10,
-            "cached_tokens": 0
-          }
-        },
-        "choices": [
-          {
-            "index": 0,
-            "finish_reason": "stop",
-            "message": {
-              "role": "assistant",
-              "content": null,
-              "tool_calls": null,
-              "audio": {
-                "id": "abcdef1234",
-                "format": "wav",
-                "data": "<base64 encoded audio data>",
-                "expires_at": 1896522361,
-                "transcript": "This is a sample transcript"
-              }
-            }
-          }
-        ]
-      }
-    }
-  }
-}
diff --git a/specification/ai/ModelInference/examples/2025-04-01/GetChatCompletions_MaximumSet_Gen.json b/specification/ai/ModelInference/examples/2025-04-01/GetChatCompletions_MaximumSet_Gen.json
deleted file mode 100644
index 1ce6d85f1c7e..000000000000
--- a/specification/ai/ModelInference/examples/2025-04-01/GetChatCompletions_MaximumSet_Gen.json
+++ /dev/null
@@ -1,95 +0,0 @@
-{
-  "title": "maximum set chat completion",
-  "operationId": "GetChatCompletions",
-  "parameters": {
-    "api-version": "2025-04-01",
-    "extra-parameters": "error",
-    "body": {
-      "modalities": [
-        "text"
-      ],
-      "messages": [
-        {
-          "role": "system",
-          "content": "You are a helpful assistant"
-        },
-        {
-          "role": "user",
-          "content": "Explain Riemann's conjecture"
-        },
-        {
-          "role": "assistant",
-          "content": "The Riemann Conjecture is a deep mathematical conjecture around prime numbers and how they can be predicted. It was first published in Riemann's groundbreaking 1859 paper. The conjecture states that the Riemann zeta function has its zeros only at the negative even integers and complex numbers with real part 1/21. Many consider it to be the most important unsolved problem in pure mathematics. The Riemann hypothesis is a way to predict the probability that numbers in a certain range are prime that was also devised by German mathematician Bernhard Riemann in 18594."
-        },
-        {
-          "role": "user",
-          "content": "Ist it proved?"
-        }
-      ],
-      "frequency_penalty": 0,
-      "stream": true,
-      "presence_penalty": 0,
-      "temperature": 0,
-      "top_p": 0,
-      "max_tokens": 255,
-      "response_format": {
-        "type": "text"
-      },
-      "stop": [
-        "<|endoftext|>"
-      ],
-      "tools": [
-        {
-          "type": "function",
-          "function": {
-            "name": "my-function-name",
-            "description": "A function useful to know if a theroem is proved or not"
-          }
-        }
-      ],
-      "seed": 21,
-      "model": "my-model-name"
-    }
-  },
-  "responses": {
-    "200": {
-      "body": {
-        "id": "kgousajxgzyhugvqekuswuqbk",
-        "object": "chat.completion",
-        "created": 18,
-        "model": "my-model-name",
-        "usage": {
-          "completion_tokens": 19,
-          "prompt_tokens": 28,
-          "total_tokens": 16,
-          "completion_tokens_details": {
-            "audio_tokens": 5,
-            "reasoning_tokens": 0,
-            "total_tokens": 5
-          }
-        },
-        "choices": [
-          {
-            "index": 7,
-            "finish_reason": "stop",
-            "message": {
-              "role": "assistant",
-              "content": null,
-              "reasoning_content": null,
-              "tool_calls": [
-                {
-                  "id": "yrobmilsrugmbwukmzo",
-                  "type": "function",
-                  "function": {
-                    "name": "my-function-name",
-                    "arguments": "{ \"arg1\": \"value1\", \"arg2\": \"value2\" }"
-                  }
-                }
-              ]
-            }
-          }
-        ]
-      }
-    }
-  }
-}
diff --git a/specification/ai/ModelInference/examples/2025-04-01/GetChatCompletions_MinimumSet_Gen.json b/specification/ai/ModelInference/examples/2025-04-01/GetChatCompletions_MinimumSet_Gen.json
deleted file mode 100644
index 75fce220d10e..000000000000
--- a/specification/ai/ModelInference/examples/2025-04-01/GetChatCompletions_MinimumSet_Gen.json
+++ /dev/null
@@ -1,40 +0,0 @@
-{
-  "title": "minimum set chat completion",
-  "operationId": "GetChatCompletions",
-  "parameters": {
-    "api-version": "2025-04-01",
-    "body": {
-      "messages": [
-        {
-          "role": "user",
-          "content": "Explain Riemann's conjecture"
-        }
-      ]
-    }
-  },
-  "responses": {
-    "200": {
-      "body": {
-        "id": "kgousajxgzyhugvqekuswuqbk",
-        "object": "chat.completion",
-        "created": 1234567890,
-        "model": "my-model-name",
-        "usage": {
-          "prompt_tokens": 205,
-          "completion_tokens": 5,
-          "total_tokens": 210
-        },
-        "choices": [
-          {
-            "index": 0,
-            "finish_reason": "stop",
-            "message": {
-              "role": "assistant",
-              "content": "The Riemann Conjecture is a deep mathematical conjecture around prime numbers and how they can be predicted. It was first published in Riemann's groundbreaking 1859 paper. The conjecture states that the Riemann zeta function has its zeros only at the negative even integers and complex numbers with real part 1/21. Many consider it to be the most important unsolved problem in pure mathematics. The Riemann hypothesis is a way to predict the probability that numbers in a certain range are prime that was also devised by German mathematician Bernhard Riemann in 18594"
-            }
-          }
-        ]
-      }
-    }
-  }
-}
diff --git a/specification/ai/ModelInference/examples/2025-04-01/GetEmbeddings_MaximumSet_Gen.json b/specification/ai/ModelInference/examples/2025-04-01/GetEmbeddings_MaximumSet_Gen.json
deleted file mode 100644
index 67e4b8a1440e..000000000000
--- a/specification/ai/ModelInference/examples/2025-04-01/GetEmbeddings_MaximumSet_Gen.json
+++ /dev/null
@@ -1,50 +0,0 @@
-{
-  "title": "maximum set embeddings",
-  "operationId": "GetEmbeddings",
-  "parameters": {
-    "api-version": "2025-04-01",
-    "extra-parameters": "error",
-    "body": {
-      "input": [
-        "This is a very good text"
-      ],
-      "dimensions": 1024,
-      "encoding_format": "float",
-      "input_type": "text",
-      "model": "my-model-name"
-    }
-  },
-  "responses": {
-    "200": {
-      "body": {
-        "id": "cknxthfa",
-        "data": [
-          {
-            "index": 0,
-            "object": "embedding",
-            "embedding": [
-              0,
-              0,
-              0,
-              0,
-              0,
-              0,
-              0,
-              0,
-              0,
-              0,
-              0,
-              0
-            ]
-          }
-        ],
-        "object": "list",
-        "model": "my-model-name",
-        "usage": {
-          "prompt_tokens": 15,
-          "total_tokens": 15
-        }
-      }
-    }
-  }
-}
diff --git a/specification/ai/ModelInference/examples/2025-04-01/GetEmbeddings_MinimumSet_Gen.json b/specification/ai/ModelInference/examples/2025-04-01/GetEmbeddings_MinimumSet_Gen.json
deleted file mode 100644
index 7225ec8a8c14..000000000000
--- a/specification/ai/ModelInference/examples/2025-04-01/GetEmbeddings_MinimumSet_Gen.json
+++ /dev/null
@@ -1,45 +0,0 @@
-{
-  "title": "minimum set embeddings",
-  "operationId": "GetEmbeddings",
-  "parameters": {
-    "api-version": "2025-04-01",
-    "body": {
-      "input": [
-        "This is a very good text"
-      ]
-    }
-  },
-  "responses": {
-    "200": {
-      "body": {
-        "id": "cknxthfa",
-        "data": [
-          {
-            "index": 0,
-            "object": "embedding",
-            "embedding": [
-              0,
-              0,
-              0,
-              0,
-              0,
-              0,
-              0,
-              0,
-              0,
-              0,
-              0,
-              0
-            ]
-          }
-        ],
-        "object": "list",
-        "model": "my-model-name",
-        "usage": {
-          "prompt_tokens": 15,
-          "total_tokens": 15
-        }
-      }
-    }
-  }
-}
diff --git a/specification/ai/ModelInference/examples/2025-04-01/GetImageEmbeddings_MaximumSet_Gen.json b/specification/ai/ModelInference/examples/2025-04-01/GetImageEmbeddings_MaximumSet_Gen.json
deleted file mode 100644
index 7bf59bb84883..000000000000
--- a/specification/ai/ModelInference/examples/2025-04-01/GetImageEmbeddings_MaximumSet_Gen.json
+++ /dev/null
@@ -1,53 +0,0 @@
-{
-  "title": "maximum set image embeddings",
-  "operationId": "GetImageEmbeddings",
-  "parameters": {
-    "api-version": "2025-04-01",
-    "extra-parameters": "error",
-    "body": {
-      "input": [
-        {
-          "image": "puqkvvlvgcjyzughesnkena",
-          "text": "azrzyjsmnuefqpowpvfmyobeehqsni"
-        }
-      ],
-      "dimensions": 1024,
-      "encoding_format": "float",
-      "input_type": "text",
-      "model": "my-model-name"
-    }
-  },
-  "responses": {
-    "200": {
-      "body": {
-        "id": "cknxthfa",
-        "data": [
-          {
-            "index": 0,
-            "object": "embedding",
-            "embedding": [
-              0,
-              0,
-              0,
-              0,
-              0,
-              0,
-              0,
-              0,
-              0,
-              0,
-              0,
-              0
-            ]
-          }
-        ],
-        "object": "list",
-        "model": "my-model-name",
-        "usage": {
-          "prompt_tokens": 15,
-          "total_tokens": 15
-        }
-      }
-    }
-  }
-}
diff --git a/specification/ai/ModelInference/examples/2025-04-01/GetImageEmbeddings_MinimumSet_Gen.json b/specification/ai/ModelInference/examples/2025-04-01/GetImageEmbeddings_MinimumSet_Gen.json
deleted file mode 100644
index c9963a729a2b..000000000000
--- a/specification/ai/ModelInference/examples/2025-04-01/GetImageEmbeddings_MinimumSet_Gen.json
+++ /dev/null
@@ -1,47 +0,0 @@
-{
-  "title": "minimum set image embeddings",
-  "operationId": "GetImageEmbeddings",
-  "parameters": {
-    "api-version": "2025-04-01",
-    "body": {
-      "input": [
-        {
-          "image": "gvmojtfooxixxzayrditjlyymg"
-        }
-      ]
-    }
-  },
-  "responses": {
-    "200": {
-      "body": {
-        "id": "cknxthfa",
-        "data": [
-          {
-            "index": 0,
-            "object": "embedding",
-            "embedding": [
-              0,
-              0,
-              0,
-              0,
-              0,
-              0,
-              0,
-              0,
-              0,
-              0,
-              0,
-              0
-            ]
-          }
-        ],
-        "object": "list",
-        "model": "my-model-name",
-        "usage": {
-          "prompt_tokens": 15,
-          "total_tokens": 15
-        }
-      }
-    }
-  }
-}
diff --git a/specification/ai/ModelInference/examples/2025-04-01/GetModelInfo_MaximumSet_Gen.json b/specification/ai/ModelInference/examples/2025-04-01/GetModelInfo_MaximumSet_Gen.json
deleted file mode 100644
index 6a94e8c02b7a..000000000000
--- a/specification/ai/ModelInference/examples/2025-04-01/GetModelInfo_MaximumSet_Gen.json
+++ /dev/null
@@ -1,17 +0,0 @@
-{
-  "title": "maximum set model information",
-  "operationId": "GetModelInfo",
-  "parameters": {
-    "api-version": "2025-04-01",
-    "model": "Phi-3.5-mini-instruct"
-  },
-  "responses": {
-    "200": {
-      "body": {
-        "model_name": "Phi-3.5-mini-instruct",
-        "model_type": "chat-completion",
-        "model_provider_name": "Microsoft"
-      }
-    }
-  }
-}
diff --git a/specification/ai/ModelInference/examples/2025-04-01/GetModelInfo_MinimumSet_Gen.json b/specification/ai/ModelInference/examples/2025-04-01/GetModelInfo_MinimumSet_Gen.json
deleted file mode 100644
index c5c343992c7d..000000000000
--- a/specification/ai/ModelInference/examples/2025-04-01/GetModelInfo_MinimumSet_Gen.json
+++ /dev/null
@@ -1,17 +0,0 @@
-{
-  "title": "minimum set model information",
-  "operationId": "GetModelInfo",
-  "parameters": {
-    "api-version": "2025-04-01",
-    "model": "Phi-3.5-mini-instruct"
-  },
-  "responses": {
-    "200": {
-      "body": {
-        "model_name": "Phi-3.5-mini-instruct",
-        "model_type": "chat-completion",
-        "model_provider_name": "Microsoft"
-      }
-    }
-  }
-}
diff --git a/specification/ai/ModelInference/examples/2025-05-01/GetChatCompletions_AudioModality_Gen.json b/specification/ai/ModelInference/examples/2025-05-01/GetChatCompletions_AudioModality_Gen.json
deleted file mode 100644
index 5434af06078d..000000000000
--- a/specification/ai/ModelInference/examples/2025-05-01/GetChatCompletions_AudioModality_Gen.json
+++ /dev/null
@@ -1,99 +0,0 @@
-{
-  "title": "Chat completion with audio content",
-  "operationId": "GetChatCompletions",
-  "parameters": {
-    "api-version": "2025-05-01",
-    "extra-parameters": "error",
-    "body": {
-      "modalities": [
-        "text",
-        "audio"
-      ],
-      "messages": [
-        {
-          "role": "system",
-          "content": "You are a helpful assistant"
-        },
-        {
-          "role": "user",
-          "content": [
-            {
-              "type": "input_audio",
-              "input_audio": {
-                "data": "<base64 encoded audio data>",
-                "format": "wav"
-              }
-            }
-          ]
-        },
-        {
-          "role": "assistant",
-          "content": null,
-          "audio": {
-            "id": "abcdef1234"
-          }
-        },
-        {
-          "role": "user",
-          "content": [
-            {
-              "type": "input_audio",
-              "input_audio": {
-                "data": "<base64 encoded audio data>",
-                "format": "wav"
-              }
-            }
-          ]
-        }
-      ],
-      "frequency_penalty": 0,
-      "presence_penalty": 0,
-      "temperature": 0,
-      "top_p": 0,
-      "seed": 21,
-      "model": "my-model-name"
-    }
-  },
-  "responses": {
-    "200": {
-      "body": {
-        "id": "kgousajxgzyhugvqekuswuqbk",
-        "object": "chat.completion",
-        "created": 1696522361,
-        "model": "my-model-name",
-        "usage": {
-          "completion_tokens": 19,
-          "prompt_tokens": 28,
-          "total_tokens": 16,
-          "completion_tokens_details": {
-            "audio_tokens": 5,
-            "reasoning_tokens": 0,
-            "total_tokens": 5
-          },
-          "prompt_tokens_details": {
-            "audio_tokens": 10,
-            "cached_tokens": 0
-          }
-        },
-        "choices": [
-          {
-            "index": 0,
-            "finish_reason": "stop",
-            "message": {
-              "role": "assistant",
-              "content": null,
-              "tool_calls": null,
-              "audio": {
-                "id": "abcdef1234",
-                "format": "wav",
-                "data": "<base64 encoded audio data>",
-                "expires_at": 1896522361,
-                "transcript": "This is a sample transcript"
-              }
-            }
-          }
-        ]
-      }
-    }
-  }
-}
diff --git a/specification/ai/ModelInference/examples/2025-05-01/GetChatCompletions_MaximumSet_Gen.json b/specification/ai/ModelInference/examples/2025-05-01/GetChatCompletions_MaximumSet_Gen.json
deleted file mode 100644
index 4d154c3a9c2c..000000000000
--- a/specification/ai/ModelInference/examples/2025-05-01/GetChatCompletions_MaximumSet_Gen.json
+++ /dev/null
@@ -1,95 +0,0 @@
-{
-  "title": "Chat completion with multiple parameters and chat history",
-  "operationId": "GetChatCompletions",
-  "parameters": {
-    "api-version": "2025-05-01",
-    "extra-parameters": "error",
-    "body": {
-      "modalities": [
-        "text"
-      ],
-      "messages": [
-        {
-          "role": "system",
-          "content": "You are a helpful assistant"
-        },
-        {
-          "role": "user",
-          "content": "Explain Riemann's conjecture"
-        },
-        {
-          "role": "assistant",
-          "content": "The Riemann Conjecture is a deep mathematical conjecture around prime numbers and how they can be predicted. It was first published in Riemann's groundbreaking 1859 paper. The conjecture states that the Riemann zeta function has its zeros only at the negative even integers and complex numbers with real part 1/21. Many consider it to be the most important unsolved problem in pure mathematics. The Riemann hypothesis is a way to predict the probability that numbers in a certain range are prime that was also devised by German mathematician Bernhard Riemann in 18594."
-        },
-        {
-          "role": "user",
-          "content": "Ist it proved?"
-        }
-      ],
-      "frequency_penalty": 0,
-      "stream": true,
-      "presence_penalty": 0,
-      "temperature": 0,
-      "top_p": 0,
-      "max_tokens": 255,
-      "response_format": {
-        "type": "text"
-      },
-      "stop": [
-        "<|endoftext|>"
-      ],
-      "tools": [
-        {
-          "type": "function",
-          "function": {
-            "name": "my-function-name",
-            "description": "A function useful to know if a theroem is proved or not"
-          }
-        }
-      ],
-      "seed": 21,
-      "model": "my-model-name"
-    }
-  },
-  "responses": {
-    "200": {
-      "body": {
-        "id": "kgousajxgzyhugvqekuswuqbk",
-        "object": "chat.completion",
-        "created": 18,
-        "model": "my-model-name",
-        "usage": {
-          "completion_tokens": 19,
-          "prompt_tokens": 28,
-          "total_tokens": 16,
-          "completion_tokens_details": {
-            "audio_tokens": 5,
-            "reasoning_tokens": 0,
-            "total_tokens": 5
-          }
-        },
-        "choices": [
-          {
-            "index": 7,
-            "finish_reason": "stop",
-            "message": {
-              "role": "assistant",
-              "content": null,
-              "reasoning_content": null,
-              "tool_calls": [
-                {
-                  "id": "yrobmilsrugmbwukmzo",
-                  "type": "function",
-                  "function": {
-                    "name": "my-function-name",
-                    "arguments": "{ \"arg1\": \"value1\", \"arg2\": \"value2\" }"
-                  }
-                }
-              ]
-            }
-          }
-        ]
-      }
-    }
-  }
-}
diff --git a/specification/ai/ModelInference/examples/2025-05-01/GetChatCompletions_MinimumSet_Gen.json b/specification/ai/ModelInference/examples/2025-05-01/GetChatCompletions_MinimumSet_Gen.json
deleted file mode 100644
index 9e35a49d9c96..000000000000
--- a/specification/ai/ModelInference/examples/2025-05-01/GetChatCompletions_MinimumSet_Gen.json
+++ /dev/null
@@ -1,40 +0,0 @@
-{
-  "title": "Simple chat completion",
-  "operationId": "GetChatCompletions",
-  "parameters": {
-    "api-version": "2025-05-01",
-    "body": {
-      "messages": [
-        {
-          "role": "user",
-          "content": "Explain Riemann's conjecture"
-        }
-      ]
-    }
-  },
-  "responses": {
-    "200": {
-      "body": {
-        "id": "kgousajxgzyhugvqekuswuqbk",
-        "object": "chat.completion",
-        "created": 1234567890,
-        "model": "my-model-name",
-        "usage": {
-          "prompt_tokens": 205,
-          "completion_tokens": 5,
-          "total_tokens": 210
-        },
-        "choices": [
-          {
-            "index": 0,
-            "finish_reason": "stop",
-            "message": {
-              "role": "assistant",
-              "content": "The Riemann Conjecture is a deep mathematical conjecture around prime numbers and how they can be predicted. It was first published in Riemann's groundbreaking 1859 paper. The conjecture states that the Riemann zeta function has its zeros only at the negative even integers and complex numbers with real part 1/21. Many consider it to be the most important unsolved problem in pure mathematics. The Riemann hypothesis is a way to predict the probability that numbers in a certain range are prime that was also devised by German mathematician Bernhard Riemann in 18594"
-            }
-          }
-        ]
-      }
-    }
-  }
-}
diff --git a/specification/ai/ModelInference/examples/2025-05-01/GetEmbeddings_MaximumSet_Gen.json b/specification/ai/ModelInference/examples/2025-05-01/GetEmbeddings_MaximumSet_Gen.json
deleted file mode 100644
index 9247e0436dc6..000000000000
--- a/specification/ai/ModelInference/examples/2025-05-01/GetEmbeddings_MaximumSet_Gen.json
+++ /dev/null
@@ -1,50 +0,0 @@
-{
-  "title": "Create text embeddings with dimension, encoding, and input type",
-  "operationId": "GetEmbeddings",
-  "parameters": {
-    "api-version": "2025-05-01",
-    "extra-parameters": "error",
-    "body": {
-      "input": [
-        "This is a very good text"
-      ],
-      "dimensions": 1024,
-      "encoding_format": "float",
-      "input_type": "text",
-      "model": "my-model-name"
-    }
-  },
-  "responses": {
-    "200": {
-      "body": {
-        "id": "cknxthfa",
-        "data": [
-          {
-            "index": 0,
-            "object": "embedding",
-            "embedding": [
-              0,
-              0,
-              0,
-              0,
-              0,
-              0,
-              0,
-              0,
-              0,
-              0,
-              0,
-              0
-            ]
-          }
-        ],
-        "object": "list",
-        "model": "my-model-name",
-        "usage": {
-          "prompt_tokens": 15,
-          "total_tokens": 15
-        }
-      }
-    }
-  }
-}
diff --git a/specification/ai/ModelInference/examples/2025-05-01/GetEmbeddings_MinimumSet_Gen.json b/specification/ai/ModelInference/examples/2025-05-01/GetEmbeddings_MinimumSet_Gen.json
deleted file mode 100644
index 59cfd528c2a1..000000000000
--- a/specification/ai/ModelInference/examples/2025-05-01/GetEmbeddings_MinimumSet_Gen.json
+++ /dev/null
@@ -1,45 +0,0 @@
-{
-  "title": "Create text embeddings",
-  "operationId": "GetEmbeddings",
-  "parameters": {
-    "api-version": "2025-05-01",
-    "body": {
-      "input": [
-        "This is a very good text"
-      ]
-    }
-  },
-  "responses": {
-    "200": {
-      "body": {
-        "id": "cknxthfa",
-        "data": [
-          {
-            "index": 0,
-            "object": "embedding",
-            "embedding": [
-              0,
-              0,
-              0,
-              0,
-              0,
-              0,
-              0,
-              0,
-              0,
-              0,
-              0,
-              0
-            ]
-          }
-        ],
-        "object": "list",
-        "model": "my-model-name",
-        "usage": {
-          "prompt_tokens": 15,
-          "total_tokens": 15
-        }
-      }
-    }
-  }
-}
diff --git a/specification/ai/ModelInference/examples/2025-05-01/GetImageEmbeddings_MaximumSet_Gen.json b/specification/ai/ModelInference/examples/2025-05-01/GetImageEmbeddings_MaximumSet_Gen.json
deleted file mode 100644
index 3df9cb9ed8c4..000000000000
--- a/specification/ai/ModelInference/examples/2025-05-01/GetImageEmbeddings_MaximumSet_Gen.json
+++ /dev/null
@@ -1,53 +0,0 @@
-{
-  "title": "Create image embeddings from text and image pair, and encoding format",
-  "operationId": "GetImageEmbeddings",
-  "parameters": {
-    "api-version": "2025-05-01",
-    "extra-parameters": "error",
-    "body": {
-      "input": [
-        {
-          "image": "puqkvvlvgcjyzughesnkena",
-          "text": "azrzyjsmnuefqpowpvfmyobeehqsni"
-        }
-      ],
-      "dimensions": 1024,
-      "encoding_format": "float",
-      "input_type": "text",
-      "model": "my-model-name"
-    }
-  },
-  "responses": {
-    "200": {
-      "body": {
-        "id": "cknxthfa",
-        "data": [
-          {
-            "index": 0,
-            "object": "embedding",
-            "embedding": [
-              0,
-              0,
-              0,
-              0,
-              0,
-              0,
-              0,
-              0,
-              0,
-              0,
-              0,
-              0
-            ]
-          }
-        ],
-        "object": "list",
-        "model": "my-model-name",
-        "usage": {
-          "prompt_tokens": 15,
-          "total_tokens": 15
-        }
-      }
-    }
-  }
-}
diff --git a/specification/ai/ModelInference/examples/2025-05-01/GetImageEmbeddings_MinimumSet_Gen.json b/specification/ai/ModelInference/examples/2025-05-01/GetImageEmbeddings_MinimumSet_Gen.json
deleted file mode 100644
index 7c686097d492..000000000000
--- a/specification/ai/ModelInference/examples/2025-05-01/GetImageEmbeddings_MinimumSet_Gen.json
+++ /dev/null
@@ -1,47 +0,0 @@
-{
-  "title": "Generate image embeddings",
-  "operationId": "GetImageEmbeddings",
-  "parameters": {
-    "api-version": "2025-05-01",
-    "body": {
-      "input": [
-        {
-          "image": "gvmojtfooxixxzayrditjlyymg"
-        }
-      ]
-    }
-  },
-  "responses": {
-    "200": {
-      "body": {
-        "id": "cknxthfa",
-        "data": [
-          {
-            "index": 0,
-            "object": "embedding",
-            "embedding": [
-              0,
-              0,
-              0,
-              0,
-              0,
-              0,
-              0,
-              0,
-              0,
-              0,
-              0,
-              0
-            ]
-          }
-        ],
-        "object": "list",
-        "model": "my-model-name",
-        "usage": {
-          "prompt_tokens": 15,
-          "total_tokens": 15
-        }
-      }
-    }
-  }
-}
diff --git a/specification/ai/ModelInference/examples/2025-05-01/GetModelInfo_MaximumSet_Gen.json b/specification/ai/ModelInference/examples/2025-05-01/GetModelInfo_MaximumSet_Gen.json
deleted file mode 100644
index f6012f69291a..000000000000
--- a/specification/ai/ModelInference/examples/2025-05-01/GetModelInfo_MaximumSet_Gen.json
+++ /dev/null
@@ -1,17 +0,0 @@
-{
-  "title": "Get model information for a model deployment",
-  "operationId": "GetModelInfo",
-  "parameters": {
-    "api-version": "2025-05-01",
-    "model": "Phi-3.5-mini-instruct"
-  },
-  "responses": {
-    "200": {
-      "body": {
-        "model_name": "Phi-3.5-mini-instruct",
-        "model_type": "chat-completion",
-        "model_provider_name": "Microsoft"
-      }
-    }
-  }
-}
diff --git a/specification/ai/ModelInference/examples/2025-05-01/GetModelInfo_MinimumSet_Gen.json b/specification/ai/ModelInference/examples/2025-05-01/GetModelInfo_MinimumSet_Gen.json
deleted file mode 100644
index 628e6539a92d..000000000000
--- a/specification/ai/ModelInference/examples/2025-05-01/GetModelInfo_MinimumSet_Gen.json
+++ /dev/null
@@ -1,16 +0,0 @@
-{
-  "title": "Get model information",
-  "operationId": "GetModelInfo",
-  "parameters": {
-    "api-version": "2025-05-01"
-  },
-  "responses": {
-    "200": {
-      "body": {
-        "model_name": "Phi-3.5-mini-instruct",
-        "model_type": "chat-completion",
-        "model_provider_name": "Microsoft"
-      }
-    }
-  }
-}
diff --git a/specification/ai/ModelInference/examples/2025-05-15-preview/GetChatCompletions_AudioModality_Gen.json b/specification/ai/ModelInference/examples/2025-05-15-preview/GetChatCompletions_AudioModality_Gen.json
deleted file mode 100644
index 8ecb38ed11a7..000000000000
--- a/specification/ai/ModelInference/examples/2025-05-15-preview/GetChatCompletions_AudioModality_Gen.json
+++ /dev/null
@@ -1,99 +0,0 @@
-{
-  "title": "Chat completion with audio content",
-  "operationId": "GetChatCompletions",
-  "parameters": {
-    "api-version": "2025-05-15-preview",
-    "extra-parameters": "error",
-    "body": {
-      "modalities": [
-        "text",
-        "audio"
-      ],
-      "messages": [
-        {
-          "role": "system",
-          "content": "You are a helpful assistant"
-        },
-        {
-          "role": "user",
-          "content": [
-            {
-              "type": "input_audio",
-              "input_audio": {
-                "data": "<base64 encoded audio data>",
-                "format": "wav"
-              }
-            }
-          ]
-        },
-        {
-          "role": "assistant",
-          "content": null,
-          "audio": {
-            "id": "abcdef1234"
-          }
-        },
-        {
-          "role": "user",
-          "content": [
-            {
-              "type": "input_audio",
-              "input_audio": {
-                "data": "<base64 encoded audio data>",
-                "format": "wav"
-              }
-            }
-          ]
-        }
-      ],
-      "frequency_penalty": 0,
-      "presence_penalty": 0,
-      "temperature": 0,
-      "top_p": 0,
-      "seed": 21,
-      "model": "my-model-name"
-    }
-  },
-  "responses": {
-    "200": {
-      "body": {
-        "id": "kgousajxgzyhugvqekuswuqbk",
-        "object": "chat.completion",
-        "created": 1696522361,
-        "model": "my-model-name",
-        "usage": {
-          "completion_tokens": 19,
-          "prompt_tokens": 28,
-          "total_tokens": 16,
-          "completion_tokens_details": {
-            "audio_tokens": 5,
-            "reasoning_tokens": 0,
-            "total_tokens": 5
-          },
-          "prompt_tokens_details": {
-            "audio_tokens": 10,
-            "cached_tokens": 0
-          }
-        },
-        "choices": [
-          {
-            "index": 0,
-            "finish_reason": "stop",
-            "message": {
-              "role": "assistant",
-              "content": null,
-              "tool_calls": null,
-              "audio": {
-                "id": "abcdef1234",
-                "format": "wav",
-                "data": "<base64 encoded audio data>",
-                "expires_at": 1896522361,
-                "transcript": "This is a sample transcript"
-              }
-            }
-          }
-        ]
-      }
-    }
-  }
-}
diff --git a/specification/ai/ModelInference/examples/2025-05-15-preview/GetChatCompletions_MaximumSet_Gen.json b/specification/ai/ModelInference/examples/2025-05-15-preview/GetChatCompletions_MaximumSet_Gen.json
deleted file mode 100644
index 9f291b43f7e9..000000000000
--- a/specification/ai/ModelInference/examples/2025-05-15-preview/GetChatCompletions_MaximumSet_Gen.json
+++ /dev/null
@@ -1,95 +0,0 @@
-{
-  "title": "Chat completion with multiple parameters and chat history",
-  "operationId": "GetChatCompletions",
-  "parameters": {
-    "api-version": "2025-05-15-preview",
-    "extra-parameters": "error",
-    "body": {
-      "modalities": [
-        "text"
-      ],
-      "messages": [
-        {
-          "role": "system",
-          "content": "You are a helpful assistant"
-        },
-        {
-          "role": "user",
-          "content": "Explain Riemann's conjecture"
-        },
-        {
-          "role": "assistant",
-          "content": "The Riemann Conjecture is a deep mathematical conjecture around prime numbers and how they can be predicted. It was first published in Riemann's groundbreaking 1859 paper. The conjecture states that the Riemann zeta function has its zeros only at the negative even integers and complex numbers with real part 1/21. Many consider it to be the most important unsolved problem in pure mathematics. The Riemann hypothesis is a way to predict the probability that numbers in a certain range are prime that was also devised by German mathematician Bernhard Riemann in 18594."
-        },
-        {
-          "role": "user",
-          "content": "Ist it proved?"
-        }
-      ],
-      "frequency_penalty": 0,
-      "stream": true,
-      "presence_penalty": 0,
-      "temperature": 0,
-      "top_p": 0,
-      "max_tokens": 255,
-      "response_format": {
-        "type": "text"
-      },
-      "stop": [
-        "<|endoftext|>"
-      ],
-      "tools": [
-        {
-          "type": "function",
-          "function": {
-            "name": "my-function-name",
-            "description": "A function useful to know if a theroem is proved or not"
-          }
-        }
-      ],
-      "seed": 21,
-      "model": "my-model-name"
-    }
-  },
-  "responses": {
-    "200": {
-      "body": {
-        "id": "kgousajxgzyhugvqekuswuqbk",
-        "object": "chat.completion",
-        "created": 18,
-        "model": "my-model-name",
-        "usage": {
-          "completion_tokens": 19,
-          "prompt_tokens": 28,
-          "total_tokens": 16,
-          "completion_tokens_details": {
-            "audio_tokens": 5,
-            "reasoning_tokens": 0,
-            "total_tokens": 5
-          }
-        },
-        "choices": [
-          {
-            "index": 7,
-            "finish_reason": "stop",
-            "message": {
-              "role": "assistant",
-              "content": null,
-              "reasoning_content": null,
-              "tool_calls": [
-                {
-                  "id": "yrobmilsrugmbwukmzo",
-                  "type": "function",
-                  "function": {
-                    "name": "my-function-name",
-                    "arguments": "{ \"arg1\": \"value1\", \"arg2\": \"value2\" }"
-                  }
-                }
-              ]
-            }
-          }
-        ]
-      }
-    }
-  }
-}
diff --git a/specification/ai/ModelInference/examples/2025-05-15-preview/GetChatCompletions_MinimumSet_Gen.json b/specification/ai/ModelInference/examples/2025-05-15-preview/GetChatCompletions_MinimumSet_Gen.json
deleted file mode 100644
index 2aac392805a5..000000000000
--- a/specification/ai/ModelInference/examples/2025-05-15-preview/GetChatCompletions_MinimumSet_Gen.json
+++ /dev/null
@@ -1,40 +0,0 @@
-{
-  "title": "Simple chat completion",
-  "operationId": "GetChatCompletions",
-  "parameters": {
-    "api-version": "2025-05-15-preview",
-    "body": {
-      "messages": [
-        {
-          "role": "user",
-          "content": "Explain Riemann's conjecture"
-        }
-      ]
-    }
-  },
-  "responses": {
-    "200": {
-      "body": {
-        "id": "kgousajxgzyhugvqekuswuqbk",
-        "object": "chat.completion",
-        "created": 1234567890,
-        "model": "my-model-name",
-        "usage": {
-          "prompt_tokens": 205,
-          "completion_tokens": 5,
-          "total_tokens": 210
-        },
-        "choices": [
-          {
-            "index": 0,
-            "finish_reason": "stop",
-            "message": {
-              "role": "assistant",
-              "content": "The Riemann Conjecture is a deep mathematical conjecture around prime numbers and how they can be predicted. It was first published in Riemann's groundbreaking 1859 paper. The conjecture states that the Riemann zeta function has its zeros only at the negative even integers and complex numbers with real part 1/21. Many consider it to be the most important unsolved problem in pure mathematics. The Riemann hypothesis is a way to predict the probability that numbers in a certain range are prime that was also devised by German mathematician Bernhard Riemann in 18594"
-            }
-          }
-        ]
-      }
-    }
-  }
-}
diff --git a/specification/ai/ModelInference/examples/2025-05-15-preview/GetEmbeddings_MaximumSet_Gen.json b/specification/ai/ModelInference/examples/2025-05-15-preview/GetEmbeddings_MaximumSet_Gen.json
deleted file mode 100644
index 13f078da618f..000000000000
--- a/specification/ai/ModelInference/examples/2025-05-15-preview/GetEmbeddings_MaximumSet_Gen.json
+++ /dev/null
@@ -1,50 +0,0 @@
-{
-  "title": "Create text embeddings with dimension, encoding, and input type",
-  "operationId": "GetEmbeddings",
-  "parameters": {
-    "api-version": "2025-05-15-preview",
-    "extra-parameters": "error",
-    "body": {
-      "input": [
-        "This is a very good text"
-      ],
-      "dimensions": 1024,
-      "encoding_format": "float",
-      "input_type": "text",
-      "model": "my-model-name"
-    }
-  },
-  "responses": {
-    "200": {
-      "body": {
-        "id": "cknxthfa",
-        "data": [
-          {
-            "index": 0,
-            "object": "embedding",
-            "embedding": [
-              0,
-              0,
-              0,
-              0,
-              0,
-              0,
-              0,
-              0,
-              0,
-              0,
-              0,
-              0
-            ]
-          }
-        ],
-        "object": "list",
-        "model": "my-model-name",
-        "usage": {
-          "prompt_tokens": 15,
-          "total_tokens": 15
-        }
-      }
-    }
-  }
-}
diff --git a/specification/ai/ModelInference/examples/2025-05-15-preview/GetEmbeddings_MinimumSet_Gen.json b/specification/ai/ModelInference/examples/2025-05-15-preview/GetEmbeddings_MinimumSet_Gen.json
deleted file mode 100644
index ebb6fc2511ee..000000000000
--- a/specification/ai/ModelInference/examples/2025-05-15-preview/GetEmbeddings_MinimumSet_Gen.json
+++ /dev/null
@@ -1,45 +0,0 @@
-{
-  "title": "Create text embeddings",
-  "operationId": "GetEmbeddings",
-  "parameters": {
-    "api-version": "2025-05-15-preview",
-    "body": {
-      "input": [
-        "This is a very good text"
-      ]
-    }
-  },
-  "responses": {
-    "200": {
-      "body": {
-        "id": "cknxthfa",
-        "data": [
-          {
-            "index": 0,
-            "object": "embedding",
-            "embedding": [
-              0,
-              0,
-              0,
-              0,
-              0,
-              0,
-              0,
-              0,
-              0,
-              0,
-              0,
-              0
-            ]
-          }
-        ],
-        "object": "list",
-        "model": "my-model-name",
-        "usage": {
-          "prompt_tokens": 15,
-          "total_tokens": 15
-        }
-      }
-    }
-  }
-}
diff --git a/specification/ai/ModelInference/examples/2025-05-15-preview/GetImageEmbeddings_MaximumSet_Gen.json b/specification/ai/ModelInference/examples/2025-05-15-preview/GetImageEmbeddings_MaximumSet_Gen.json
deleted file mode 100644
index bd35a7d22f0c..000000000000
--- a/specification/ai/ModelInference/examples/2025-05-15-preview/GetImageEmbeddings_MaximumSet_Gen.json
+++ /dev/null
@@ -1,53 +0,0 @@
-{
-  "title": "maximum set image embeddings",
-  "operationId": "GetImageEmbeddings",
-  "parameters": {
-    "api-version": "2025-05-15-preview",
-    "extra-parameters": "error",
-    "body": {
-      "input": [
-        {
-          "image": "puqkvvlvgcjyzughesnkena",
-          "text": "azrzyjsmnuefqpowpvfmyobeehqsni"
-        }
-      ],
-      "dimensions": 1024,
-      "encoding_format": "float",
-      "input_type": "text",
-      "model": "my-model-name"
-    }
-  },
-  "responses": {
-    "200": {
-      "body": {
-        "id": "cknxthfa",
-        "data": [
-          {
-            "index": 0,
-            "object": "embedding",
-            "embedding": [
-              0,
-              0,
-              0,
-              0,
-              0,
-              0,
-              0,
-              0,
-              0,
-              0,
-              0,
-              0
-            ]
-          }
-        ],
-        "object": "list",
-        "model": "my-model-name",
-        "usage": {
-          "prompt_tokens": 15,
-          "total_tokens": 15
-        }
-      }
-    }
-  }
-}
diff --git a/specification/ai/ModelInference/examples/2025-05-15-preview/GetImageEmbeddings_MinimumSet_Gen.json b/specification/ai/ModelInference/examples/2025-05-15-preview/GetImageEmbeddings_MinimumSet_Gen.json
deleted file mode 100644
index f8677ebc8a31..000000000000
--- a/specification/ai/ModelInference/examples/2025-05-15-preview/GetImageEmbeddings_MinimumSet_Gen.json
+++ /dev/null
@@ -1,47 +0,0 @@
-{
-  "title": "Generate image embeddings",
-  "operationId": "GetImageEmbeddings",
-  "parameters": {
-    "api-version": "2025-05-15-preview",
-    "body": {
-      "input": [
-        {
-          "image": "gvmojtfooxixxzayrditjlyymg"
-        }
-      ]
-    }
-  },
-  "responses": {
-    "200": {
-      "body": {
-        "id": "cknxthfa",
-        "data": [
-          {
-            "index": 0,
-            "object": "embedding",
-            "embedding": [
-              0,
-              0,
-              0,
-              0,
-              0,
-              0,
-              0,
-              0,
-              0,
-              0,
-              0,
-              0
-            ]
-          }
-        ],
-        "object": "list",
-        "model": "my-model-name",
-        "usage": {
-          "prompt_tokens": 15,
-          "total_tokens": 15
-        }
-      }
-    }
-  }
-}
diff --git a/specification/ai/ModelInference/examples/2025-05-15-preview/GetImageGenerations_MaximumSet_Gen.json b/specification/ai/ModelInference/examples/2025-05-15-preview/GetImageGenerations_MaximumSet_Gen.json
deleted file mode 100644
index 3e04eec83312..000000000000
--- a/specification/ai/ModelInference/examples/2025-05-15-preview/GetImageGenerations_MaximumSet_Gen.json
+++ /dev/null
@@ -1,41 +0,0 @@
-{
-  "title": "Generate an image based on a prompt, a negative prompt, and an image",
-  "operationId": "GetImageGenerations",
-  "parameters": {
-    "api-version": "2025-05-15-preview",
-    "extra-parameters": "error",
-    "body": {
-      "prompt": "An image of a cat",
-      "negative_prompt": "a dog",
-      "image_prompt": {
-        "image": "<base64 encoded image data>",
-        "strength": 0.7
-      },
-      "size": "1024x1024",
-      "output_format": "png",
-      "quality": "standard",
-      "n": 1,
-      "model": "Stable-Image-Ultra",
-      "seed": 42
-    }
-  },
-  "responses": {
-    "200": {
-      "body": {
-        "id": "cknxthfa",
-        "created": 18,
-        "data": [
-          {
-            "image": "<base64 encoded image data>",
-            "seed": 42
-          }
-        ],
-        "object": "image.generation",
-        "model": "Stable-Image-Ultra",
-        "usage": {
-          "image_generations": 1
-        }
-      }
-    }
-  }
-}
diff --git a/specification/ai/ModelInference/examples/2025-05-15-preview/GetImageGenerations_MinimumSet_Gen.json b/specification/ai/ModelInference/examples/2025-05-15-preview/GetImageGenerations_MinimumSet_Gen.json
deleted file mode 100644
index 2045052b5223..000000000000
--- a/specification/ai/ModelInference/examples/2025-05-15-preview/GetImageGenerations_MinimumSet_Gen.json
+++ /dev/null
@@ -1,32 +0,0 @@
-{
-  "title": "Generate a simple image based on a prompt",
-  "operationId": "GetImageGenerations",
-  "parameters": {
-    "api-version": "2025-05-15-preview",
-    "body": {
-      "prompt": "An image of a cat",
-      "size": "1024x1024",
-      "model": "Stable-Image-Ultra",
-      "n": 1
-    }
-  },
-  "responses": {
-    "200": {
-      "body": {
-        "id": "cknxthfa",
-        "created": 18,
-        "data": [
-          {
-            "image": "<base64 encoded image data>",
-            "seed": 42
-          }
-        ],
-        "object": "image.generation",
-        "model": "Stable-Image-Ultra",
-        "usage": {
-          "image_generations": 1
-        }
-      }
-    }
-  }
-}
diff --git a/specification/ai/ModelInference/examples/2025-05-15-preview/GetModelInfo_MaximumSet_Gen.json b/specification/ai/ModelInference/examples/2025-05-15-preview/GetModelInfo_MaximumSet_Gen.json
deleted file mode 100644
index 534a51a834bd..000000000000
--- a/specification/ai/ModelInference/examples/2025-05-15-preview/GetModelInfo_MaximumSet_Gen.json
+++ /dev/null
@@ -1,17 +0,0 @@
-{
-  "title": "Get model information for a model deployment",
-  "operationId": "GetModelInfo",
-  "parameters": {
-    "api-version": "2025-05-15-preview",
-    "model": "Phi-3.5-mini-instruct"
-  },
-  "responses": {
-    "200": {
-      "body": {
-        "model_name": "Phi-3.5-mini-instruct",
-        "model_type": "chat-completion",
-        "model_provider_name": "Microsoft"
-      }
-    }
-  }
-}
diff --git a/specification/ai/ModelInference/examples/2025-05-15-preview/GetModelInfo_MinimumSet_Gen.json b/specification/ai/ModelInference/examples/2025-05-15-preview/GetModelInfo_MinimumSet_Gen.json
deleted file mode 100644
index ad29d906732a..000000000000
--- a/specification/ai/ModelInference/examples/2025-05-15-preview/GetModelInfo_MinimumSet_Gen.json
+++ /dev/null
@@ -1,16 +0,0 @@
-{
-  "title": "Get model information",
-  "operationId": "GetModelInfo",
-  "parameters": {
-    "api-version": "2025-05-15-preview"
-  },
-  "responses": {
-    "200": {
-      "body": {
-        "model_name": "Phi-3.5-mini-instruct",
-        "model_type": "chat-completion",
-        "model_provider_name": "Microsoft"
-      }
-    }
-  }
-}
diff --git a/specification/ai/ModelInference/main.tsp b/specification/ai/ModelInference/main.tsp
index d9bbf7ed83d2..0abc11ecbe3e 100644
--- a/specification/ai/ModelInference/main.tsp
+++ b/specification/ai/ModelInference/main.tsp
@@ -18,7 +18,7 @@ using TypeSpec.Versioning;
 )
 @service(#{ title: "AI Model Inference" })
 @server(
-  "https://{resource}.services.ai.azure.com/api/models",
+  "https://{resource}.services.ai.azure.com/models",
   "AI Model Inference",
   {
     @doc("The Azure AI Services resource name, for example 'my-resource'")
@@ -33,16 +33,4 @@ enum Versions {
   @useDependency(Azure.Core.Versions.v1_0_Preview_2)
   @doc("The 2024-05-01-preview version of the AI.Model service.")
   v2024_05_01_Preview: "2024-05-01-preview",
-
-  @useDependency(Azure.Core.Versions.v1_0_Preview_2)
-  @doc("The 2025-04-01 version of the AI.Model service.")
-  v2025_04_01: "2025-04-01",
-
-  @useDependency(Azure.Core.Versions.v1_0_Preview_2)
-  @doc("The 2025-05-01 version of the AI.Model service.")
-  v2025_05_01: "2025-05-01",
-
-  @useDependency(Azure.Core.Versions.v1_0_Preview_2)
-  @doc("The 2025-05-15-preview version of the AI.Model service.")
-  v2025_05_15_Preview: "2025-05-15-preview",
 }
diff --git a/specification/ai/ModelInference/models/chat_completions.tsp b/specification/ai/ModelInference/models/chat_completions.tsp
index 9e9d6cc06bb2..8c5b81019ff7 100644
--- a/specification/ai/ModelInference/models/chat_completions.tsp
+++ b/specification/ai/ModelInference/models/chat_completions.tsp
@@ -131,15 +131,6 @@ model ChatCompletionsOptions {
     """)
   modalities?: ChatCompletionsModality[];
 
-  @doc("""
-    User security context contains several parameters that describe the AI application itself, and the end user that interacts with the AI application. 
-    These fields assist your security operations teams to investigate and mitigate security incidents by providing a comprehensive approach to protecting your AI applications. 
-    [Learn more](https://aka.ms/TP4AI/Documentation/EndUserContext) about protecting AI applications using Microsoft Defender for Cloud.
-    """)
-  @added(ModelInference.Versions.v2025_05_01)
-  @clientName("UserSecurityContext", "csharp")
-  user_security_context?: UserSecurityContext;
-
   ...Record<unknown>;
 }
 @doc("The modalities that the model is allowed to use for the chat completions response.")
@@ -256,11 +247,6 @@ model CompletionsUsageDetails {
   @visibility(Lifecycle.Read)
   audio_tokens: int32;
 
-  @doc("The number of tokens corresponding to reasoning.")
-  @added(ModelInference.Versions.v2025_04_01)
-  @visibility(Lifecycle.Read)
-  reasoning_tokens: int32;
-
   @doc("The total number of tokens processed for the completions request and response.")
   @visibility(Lifecycle.Read)
   total_tokens: int32;
@@ -506,11 +492,6 @@ model StreamingChatResponseMessageUpdate {
   @visibility(Lifecycle.Read)
   content?: string;
 
-  @doc("The reasoning content the model used for generating the response")
-  @added(ModelInference.Versions.v2025_04_01)
-  @visibility(Lifecycle.Read)
-  reasoning_content?: string;
-
   @encodedName("application/json", "tool_calls")
   @doc("""
     The tool calls that must be resolved and have their outputs appended to subsequent input messages for the chat
@@ -531,12 +512,6 @@ model ChatResponseMessage {
   @visibility(Lifecycle.Read)
   content: string | null;
 
-  #suppress "@azure-tools/typespec-azure-core/no-nullable" "explicitly nullable in mirrored API"
-  @doc("The reasoning content the model used for generating the response")
-  @added(ModelInference.Versions.v2025_04_01)
-  @visibility(Lifecycle.Read)
-  reasoning_content?: string;
-
   @encodedName("application/json", "tool_calls")
   @doc("""
     The tool calls that must be resolved and have their outputs appended to subsequent input messages for the chat
@@ -800,38 +775,3 @@ model ChatCompletionsAudio {
   @visibility(Lifecycle.Read)
   transcript: string;
 }
-
-@doc("""
-  User security context contains several parameters that describe the AI application itself, and the end user that interacts with the AI application. 
-  These fields assist your security operations teams to investigate and mitigate security incidents by providing a comprehensive approach to protecting your AI applications. 
-  [Learn more](https://aka.ms/TP4AI/Documentation/EndUserContext) about protecting AI applications using Microsoft Defender for Cloud.
-  """)
-@added(ModelInference.Versions.v2025_05_01)
-model UserSecurityContext {
-  @doc("The name of the application. Sensitive personal information should not be included in this field.")
-  @maxLength(100)
-  application_name?: string;
-
-  @doc("""
-    This identifier is the Microsoft Entra ID (formerly Azure Active Directory) user object ID used to authenticate end-users within the generative AI application. Sensitive personal information should not be included in this field.
-    """)
-  @minLength(36)
-  @maxLength(36)
-  @pattern("^[0-9a-fA-F]{8}-[0-9a-fA-F]{4}-[0-9a-fA-F]{4}-[0-9a-fA-F]{4}-[0-9a-fA-F]{12}$")
-  end_user_id?: string;
-
-  @doc("""
-    The Microsoft 365 tenant ID the end user belongs to. It's required when the generative AI application is multi tenant.
-    """)
-  @minLength(36)
-  @maxLength(36)
-  @pattern("^[0-9a-fA-F]{8}-[0-9a-fA-F]{4}-[0-9a-fA-F]{4}-[0-9a-fA-F]{4}-[0-9a-fA-F]{12}$")
-  end_user_tenant_id?: string;
-
-  @doc("""
-    Captures the original client's IP address, accepting both IPv4 and IPv6 formats.
-    """)
-  @minLength(2)
-  @maxLength(45)
-  source_ip?: string;
-}
diff --git a/specification/ai/ModelInference/models/image_generations.tsp b/specification/ai/ModelInference/models/image_generations.tsp
deleted file mode 100644
index ba540f164002..000000000000
--- a/specification/ai/ModelInference/models/image_generations.tsp
+++ /dev/null
@@ -1,197 +0,0 @@
-import "@typespec/rest";
-import "@typespec/http";
-import "@typespec/versioning";
-
-using TypeSpec.Rest;
-using TypeSpec.Http;
-using TypeSpec.Versioning;
-
-namespace ModelInference;
-
-@doc("""
-  The configuration information for an image generation request.
-  """)
-@added(ModelInference.Versions.v2025_05_15_Preview)
-model ImageGenerationsOptions {
-  @doc("""
-    The prompt to use for the image geneartion. Read the model documentation to understand
-    which language you should use to prompt the model and get specific results.
-    """)
-  prompt: string;
-
-  @doc("""
-    Optional. The prompt to use for the negative image generation. Read the model documentation
-    to understand to prompt the model and get specific results. If the model doesn't support
-    negative prompts, a 422 error is returned.
-    """)
-  negative_prompt?: string;
-
-  @doc("""
-    Optional. An image to use as guidance for the image generation process.
-    """)
-  image_prompt?: ImageGenerationPrompt;
-
-  #suppress "@azure-tools/typespec-autorest/union-unsupported" "OpenAPI v2 support deferred"
-  @doc("""
-    The size of the image to generate.
-    """)
-  size: ImageGenerationPreset | ImageGenerationSize;
-
-  @doc("""
-    Optional. The format in which to generate the image.
-    Returns a 422 error if the model doesn't support the value or parameter.
-    """)
-  output_format?: ImageGenerationOutputFormat = "png";
-
-  @doc("""
-    Optional. The quality of the image to generate.
-    Returns a 422 error if the model doesn't support the value or parameter.
-    """)
-  quality?: ImageGenerationQuality = "standard";
-
-  @doc("""
-    The number of images to generate. Defaults to 1.
-    """)
-  n: int32 = 1;
-
-  @doc("""
-    ID of the specific AI model to use, if more than one model is available on the endpoint.
-    """)
-  `model`?: string;
-
-  @doc("""
-    If specified, the system will make a best effort to sample deterministically such that repeated requests with the
-    same seed and parameters should return the same result. Determinism is not guaranteed.
-    """)
-  seed?: int64;
-
-  ...Record<unknown>;
-}
-
-@doc("The preset size of the image to generate.")
-@added(ModelInference.Versions.v2025_05_15_Preview)
-union ImageGenerationPreset {
-  string,
-
-  @doc("The size of the image is 672x1566 pixels.")
-  nine_twenty_one: "672x1566",
-
-  @doc("The size of the image is 768x1366 pixels.")
-  nine_sixteen: "768x1366",
-
-  @doc("The size of the image is 836x1254 pixels.")
-  two_three: "836x1254",
-
-  @doc("The size of the image is 916x1145 pixels.")
-  four_five: "916x1145",
-
-  @doc("The size of the image is 1024x1024 pixels.")
-  one_one: "1024x1024",
-
-  @doc("The size of the image is 1145x916 pixels.")
-  five_four: "1145x916",
-
-  @doc("The size of the image is 1254x836 pixels.")
-  three_two: "1254x836",
-
-  @doc("The size of the image is 1366x768 pixels.")
-  sixteen_nine: "1366x768",
-
-  @doc("The size of the image is 1566x672 pixels.")
-  twenty_one_nine: "1566x672",
-}
-
-@doc("The size of the image to generate.")
-@added(ModelInference.Versions.v2025_05_15_Preview)
-model ImageGenerationSize {
-  @doc("The width of the image to generate, in pixels.")
-  width: int32;
-
-  @doc("The height of the image to generate, in pixels.")
-  height: int32;
-}
-
-@doc("The image generation format to use in the output.")
-@added(ModelInference.Versions.v2025_05_15_Preview)
-union ImageGenerationOutputFormat {
-  string,
-
-  @doc("Generates images in PNG format.")
-  png: "png",
-
-  @doc("Generates images in JPEG format.")
-  jpg: "jpg",
-}
-
-@doc("Represents an image to use for guidance during the generation.")
-@added(ModelInference.Versions.v2025_05_15_Preview)
-model ImageGenerationPrompt {
-  @doc("""
-    The input image encoded in base64 string as a data URL.
-    Example: `data:image/{format};base64,{data}`."
-    """)
-  image: string;
-
-  @doc("""
-    The degree at which the generation process uses the image prompt as guidance. 1 indicates
-    a generation process that fully follows the input image. 0 a generation a process that doesn't
-    take into consideration the prompt image.
-    """)
-  strength?: float32 = 1;
-}
-
-@doc("The quality of the image to generate.")
-@added(ModelInference.Versions.v2025_05_15_Preview)
-union ImageGenerationQuality {
-  string,
-
-  @doc("Generates images with the HD quality.")
-  hd: "hd",
-
-  @doc("Generates images with standard quality.")
-  standard: "standard",
-}
-
-@doc("The result of an image generation request.")
-@added(ModelInference.Versions.v2025_05_15_Preview)
-model ImageGenerationsResult {
-  @doc("Unique identifier for the generation result.")
-  id: string;
-
-  #suppress "@microsoft/azure-openapi-validator/IntegerTypeMustHaveFormat" "Format is present but rule only allow int32 or int64."
-  @doc("""
-    The first timestamp associated with generation activity for this completions response,
-    represented as seconds since the beginning of the Unix epoch of 00:00 on 1 Jan 1970.
-    """)
-  @encode(DateTimeKnownEncoding.unixTimestamp, int64)
-  created: utcDateTime;
-
-  @doc("The model used for the chat completion.")
-  `model`: string;
-
-  @doc("Embedding values for the prompts submitted in the request.")
-  data: ImageGenerationItem[];
-
-  @doc("Usage counts for tokens input using the embeddings API.")
-  usage: ImageGenerationUsage;
-
-  @doc("The object type of the image generation result. Will always be `list`.")
-  object: "image.generation";
-}
-
-@doc("Representation of a single image generation.")
-@added(ModelInference.Versions.v2025_05_15_Preview)
-model ImageGenerationItem {
-  @doc("The image generated, encoded in base64.")
-  image: string;
-
-  @doc("The seed that can be used to generate the image.")
-  seed?: int64;
-}
-
-@doc("Usage counts for image generation API.")
-@added(ModelInference.Versions.v2025_05_15_Preview)
-model ImageGenerationUsage {
-  @doc("The number of image generations performed.")
-  image_generations: int32;
-}
diff --git a/specification/ai/ModelInference/routes.tsp b/specification/ai/ModelInference/routes.tsp
index 5a11f41c763f..4f85c0dee174 100644
--- a/specification/ai/ModelInference/routes.tsp
+++ b/specification/ai/ModelInference/routes.tsp
@@ -7,7 +7,6 @@ import "./models/common.tsp";
 import "./models/chat_completions.tsp";
 import "./models/embeddings.tsp";
 import "./models/image_embeddings.tsp";
-import "./models/image_generations.tsp";
 
 using TypeSpec.Rest;
 using TypeSpec.Http;
@@ -69,21 +68,6 @@ op getImageEmbeddings is Azure.Core.RpcOperation<
   EmbeddingsResult
 >;
 
-@doc("""
-  Generates an image based on a text or image prompt.
-  The method makes a REST API call to the `/images/generations` route on the given endpoint.
-  """)
-@actionSeparator("/")
-@route("images/generations")
-@added(ModelInference.Versions.v2025_05_15_Preview)
-op getImageGenerations is Azure.Core.RpcOperation<
-  {
-    ...ImageGenerationsOptions;
-    ...AdditionalRequestHeaders;
-  },
-  ImageGenerationsResult
->;
-
 @doc("""
   Returns information about the AI model deployed.
   The method makes a REST API call to the `/info` route on the given endpoint.
@@ -92,12 +76,4 @@ op getImageGenerations is Azure.Core.RpcOperation<
   """)
 @get
 @route("/info")
-op getModelInfo is Azure.Core.RpcOperation<
-  {
-    @doc("The model deployment name you want information from.")
-    @query
-    @added(ModelInference.Versions.v2025_04_01)
-    `model`?: string;
-  },
-  ModelInfo
->;
+op getModelInfo is Azure.Core.RpcOperation<{}, ModelInfo>;
diff --git a/specification/ai/data-plane/ModelInference/openapi/2024-05-01-preview/openapi.yaml b/specification/ai/data-plane/ModelInference/openapi/2024-05-01-preview/openapi.yaml
index a61dfc2bd3e5..ee0402515cc1 100644
--- a/specification/ai/data-plane/ModelInference/openapi/2024-05-01-preview/openapi.yaml
+++ b/specification/ai/data-plane/ModelInference/openapi/2024-05-01-preview/openapi.yaml
@@ -1393,9 +1393,6 @@ components:
       type: string
       enum:
         - 2024-05-01-preview
-        - '2025-04-01'
-        - '2025-05-01'
-        - 2025-05-15-preview
       description: The AI.Model service versions.
   securitySchemes:
     ApiKeyAuth:
@@ -1413,7 +1410,7 @@ components:
           scopes:
             https://cognitiveservices.azure.com/.default: ''
 servers:
-  - url: https://{resource}.services.ai.azure.com/api/models
+  - url: https://{resource}.services.ai.azure.com/models
     description: AI Model Inference
     variables:
       resource:
diff --git a/specification/ai/data-plane/ModelInference/openapi/2025-04-01/openapi.yaml b/specification/ai/data-plane/ModelInference/openapi/2025-04-01/openapi.yaml
deleted file mode 100644
index 83b356b0ee54..000000000000
--- a/specification/ai/data-plane/ModelInference/openapi/2025-04-01/openapi.yaml
+++ /dev/null
@@ -1,1442 +0,0 @@
-openapi: 3.0.0
-info:
-  title: AI Model Inference
-  version: '2025-04-01'
-tags: []
-paths:
-  /chat/completions:
-    post:
-      operationId: getChatCompletions
-      description: |-
-        Gets chat completions for the provided chat messages.
-        Completions support a wide variety of tasks and generate text that continues from or "completes"
-        provided prompt data. The method makes a REST API call to the `/chat/completions` route
-        on the given endpoint.
-      parameters:
-        - $ref: '#/components/parameters/Azure.Core.Foundations.ApiVersionParameter'
-        - name: extra-parameters
-          in: header
-          required: false
-          description: |-
-            Controls what happens if extra parameters, undefined by the REST API,
-            are passed in the JSON request payload.
-            This sets the HTTP request header `extra-parameters`.
-          schema:
-            $ref: '#/components/schemas/ExtraParameters'
-      responses:
-        '200':
-          description: The request has succeeded.
-          content:
-            application/json:
-              schema:
-                $ref: '#/components/schemas/ChatCompletions'
-        default:
-          description: An unexpected error response.
-          headers:
-            x-ms-error-code:
-              required: false
-              description: String error code indicating what went wrong.
-              schema:
-                type: string
-          content:
-            application/json:
-              schema:
-                $ref: '#/components/schemas/Azure.Core.Foundations.ErrorResponse'
-      requestBody:
-        required: true
-        content:
-          application/json:
-            schema:
-              $ref: '#/components/schemas/ChatCompletionsOptions'
-        description: The parameters of the chat completions request.
-  /embeddings:
-    post:
-      operationId: getEmbeddings
-      description: |-
-        Return the embedding vectors for given text prompts.
-        The method makes a REST API call to the `/embeddings` route on the given endpoint.
-      parameters:
-        - $ref: '#/components/parameters/Azure.Core.Foundations.ApiVersionParameter'
-        - name: extra-parameters
-          in: header
-          required: false
-          description: |-
-            Controls what happens if extra parameters, undefined by the REST API,
-            are passed in the JSON request payload.
-            This sets the HTTP request header `extra-parameters`.
-          schema:
-            $ref: '#/components/schemas/ExtraParameters'
-      responses:
-        '200':
-          description: The request has succeeded.
-          content:
-            application/json:
-              schema:
-                $ref: '#/components/schemas/EmbeddingsResult'
-        default:
-          description: An unexpected error response.
-          headers:
-            x-ms-error-code:
-              required: false
-              description: String error code indicating what went wrong.
-              schema:
-                type: string
-          content:
-            application/json:
-              schema:
-                $ref: '#/components/schemas/Azure.Core.Foundations.ErrorResponse'
-      requestBody:
-        required: true
-        content:
-          application/json:
-            schema:
-              $ref: '#/components/schemas/EmbeddingsOptions'
-        description: The parameters of the embeddings request.
-  /images/embeddings:
-    post:
-      operationId: getImageEmbeddings
-      description: |-
-        Return the embedding vectors for given images.
-        The method makes a REST API call to the `/images/embeddings` route on the given endpoint.
-      parameters:
-        - $ref: '#/components/parameters/Azure.Core.Foundations.ApiVersionParameter'
-        - name: extra-parameters
-          in: header
-          required: false
-          description: |-
-            Controls what happens if extra parameters, undefined by the REST API,
-            are passed in the JSON request payload.
-            This sets the HTTP request header `extra-parameters`.
-          schema:
-            $ref: '#/components/schemas/ExtraParameters'
-      responses:
-        '200':
-          description: The request has succeeded.
-          content:
-            application/json:
-              schema:
-                $ref: '#/components/schemas/EmbeddingsResult'
-        default:
-          description: An unexpected error response.
-          headers:
-            x-ms-error-code:
-              required: false
-              description: String error code indicating what went wrong.
-              schema:
-                type: string
-          content:
-            application/json:
-              schema:
-                $ref: '#/components/schemas/Azure.Core.Foundations.ErrorResponse'
-      requestBody:
-        required: true
-        content:
-          application/json:
-            schema:
-              $ref: '#/components/schemas/ImageEmbeddingsOptions'
-        description: The parameters of the image embeddings request.
-  /info:
-    get:
-      operationId: getModelInfo
-      description: |-
-        Returns information about the AI model deployed.
-        The method makes a REST API call to the `/info` route on the given endpoint.
-        This method will only work when using Serverless API, Managed Compute, or Model .
-        inference endpoint. Azure OpenAI endpoints don't support i.
-      parameters:
-        - $ref: '#/components/parameters/Azure.Core.Foundations.ApiVersionParameter'
-        - name: model
-          in: query
-          required: false
-          description: The model deployment name you want information from.
-          schema:
-            type: string
-          explode: false
-      responses:
-        '200':
-          description: The request has succeeded.
-          content:
-            application/json:
-              schema:
-                $ref: '#/components/schemas/ModelInfo'
-        default:
-          description: An unexpected error response.
-          headers:
-            x-ms-error-code:
-              required: false
-              description: String error code indicating what went wrong.
-              schema:
-                type: string
-          content:
-            application/json:
-              schema:
-                $ref: '#/components/schemas/Azure.Core.Foundations.ErrorResponse'
-security:
-  - ApiKeyAuth: []
-  - BearerAuth: []
-  - OAuth2Auth:
-      - https://cognitiveservices.azure.com/.default
-components:
-  parameters:
-    Azure.Core.Foundations.ApiVersionParameter:
-      name: api-version
-      in: query
-      required: true
-      description: The API version to use for this operation.
-      schema:
-        type: string
-        minLength: 1
-      explode: false
-  schemas:
-    AudioContentFormat:
-      anyOf:
-        - type: string
-        - type: string
-          enum:
-            - wav
-            - mp3
-      description: A representation of the possible audio formats for audio.
-    Azure.Core.Foundations.Error:
-      type: object
-      required:
-        - code
-        - message
-      properties:
-        code:
-          type: string
-          description: One of a server-defined set of error codes.
-        message:
-          type: string
-          description: A human-readable representation of the error.
-        target:
-          type: string
-          description: The target of the error.
-        details:
-          type: array
-          items:
-            $ref: '#/components/schemas/Azure.Core.Foundations.Error'
-          description: An array of details about specific errors that led to this reported error.
-        innererror:
-          allOf:
-            - $ref: '#/components/schemas/Azure.Core.Foundations.InnerError'
-          description: An object containing more specific information than the current object about the error.
-      description: The error object.
-    Azure.Core.Foundations.ErrorResponse:
-      type: object
-      required:
-        - error
-      properties:
-        error:
-          allOf:
-            - $ref: '#/components/schemas/Azure.Core.Foundations.Error'
-          description: The error object.
-      description: A response containing error details.
-    Azure.Core.Foundations.InnerError:
-      type: object
-      properties:
-        code:
-          type: string
-          description: One of a server-defined set of error codes.
-        innererror:
-          allOf:
-            - $ref: '#/components/schemas/Azure.Core.Foundations.InnerError'
-          description: Inner error.
-      description: An object containing more specific information about the error. As per Microsoft One API guidelines - https://github.com/microsoft/api-guidelines/blob/vNext/azure/Guidelines.md#handling-errors.
-    ChatChoice:
-      type: object
-      required:
-        - index
-        - finish_reason
-        - message
-      properties:
-        index:
-          type: integer
-          format: int32
-          description: The ordered index associated with this chat completions choice.
-        finish_reason:
-          allOf:
-            - $ref: '#/components/schemas/CompletionsFinishReason'
-          nullable: true
-          description: The reason that this chat completions choice completed its generated.
-          readOnly: true
-        message:
-          allOf:
-            - $ref: '#/components/schemas/ChatResponseMessage'
-          description: The chat message for a given chat completions prompt.
-          readOnly: true
-      description: |-
-        The representation of a single prompt completion as part of an overall chat completions request.
-        Generally, `n` choices are generated per provided prompt with a default value of 1.
-        Token limits and other settings may limit the number of choices generated.
-    ChatCompletions:
-      type: object
-      required:
-        - id
-        - object
-        - created
-        - model
-        - choices
-        - usage
-      properties:
-        id:
-          type: string
-          description: A unique identifier associated with this chat completions response.
-        object:
-          type: string
-          enum:
-            - chat.completion
-          description: The response object type, which is always `chat.completion`.
-        created:
-          type: integer
-          format: unixtime
-          description: |-
-            The first timestamp associated with generation activity for this completions response,
-            represented as seconds since the beginning of the Unix epoch of 00:00 on 1 Jan 1970.
-          readOnly: true
-        model:
-          type: string
-          description: The model used for the chat completion.
-          readOnly: true
-        choices:
-          type: array
-          items:
-            $ref: '#/components/schemas/ChatChoice'
-          minItems: 1
-          description: |-
-            The collection of completions choices associated with this completions response.
-            Generally, `n` choices are generated per provided prompt with a default value of 1.
-            Token limits and other settings may limit the number of choices generated.
-          readOnly: true
-        usage:
-          allOf:
-            - $ref: '#/components/schemas/CompletionsUsage'
-          description: '  Usage information for tokens processed and generated as part of this completions operation.'
-          readOnly: true
-      description: |-
-        Representation of the response data from a chat completions request.
-        Completions support a wide variety of tasks and generate text that continues from or "completes"
-        provided prompt data.
-    ChatCompletionsAudio:
-      type: object
-      required:
-        - id
-        - expires_at
-        - data
-        - transcript
-      properties:
-        id:
-          type: string
-          description: |2-
-              Unique identifier for the audio response. This value can be used in chat history messages instead of passing 
-              the full audio object.
-          readOnly: true
-        expires_at:
-          type: integer
-          format: unixtime
-          description: |-
-            The Unix timestamp (in seconds) at which the audio piece expires and can't be any longer referenced by its ID in 
-            multi-turn conversations.
-          readOnly: true
-        data:
-          type: string
-          description: Base64 encoded audio data
-          readOnly: true
-        format:
-          allOf:
-            - $ref: '#/components/schemas/AudioContentFormat'
-          description: |-
-            The format of the audio content. If format is not provided, it will match the format used in the
-            input audio request.
-          readOnly: true
-        transcript:
-          type: string
-          description: The transcript of the audio file.
-          readOnly: true
-      description: A representation of the audio generated by the model.
-    ChatCompletionsModality:
-      anyOf:
-        - type: string
-        - type: string
-          enum:
-            - text
-            - audio
-      description: The modalities that the model is allowed to use for the chat completions response.
-    ChatCompletionsNamedToolChoice:
-      type: object
-      required:
-        - type
-        - function
-      properties:
-        type:
-          type: string
-          enum:
-            - function
-          description: The type of the tool. Currently, only `function` is supported.
-        function:
-          allOf:
-            - $ref: '#/components/schemas/ChatCompletionsNamedToolChoiceFunction'
-          description: The function that should be called.
-      description: A tool selection of a specific, named function tool that will limit chat completions to using the named function.
-    ChatCompletionsNamedToolChoiceFunction:
-      type: object
-      required:
-        - name
-      properties:
-        name:
-          type: string
-          description: The name of the function that should be called.
-      description: A tool selection of a specific, named function tool that will limit chat completions to using the named function.
-    ChatCompletionsOptions:
-      type: object
-      required:
-        - messages
-      properties:
-        messages:
-          type: array
-          items:
-            $ref: '#/components/schemas/ChatRequestMessage'
-          minItems: 1
-          description: |-
-            The collection of context messages associated with this chat completions request.
-            Typical usage begins with a chat message for the System role that provides instructions for
-            the behavior of the assistant, followed by alternating messages between the User and
-            Assistant roles.
-        frequency_penalty:
-          type: number
-          format: float
-          minimum: -2
-          maximum: 2
-          description: |-
-            A value that influences the probability of generated tokens appearing based on their cumulative
-            frequency in generated text.
-            Positive values will make tokens less likely to appear as their frequency increases and
-            decrease the likelihood of the model repeating the same statements verbatim.
-            Supported range is [-2, 2].
-          default: 0
-        stream:
-          type: boolean
-          description: A value indicating whether chat completions should be streamed for this request.
-        presence_penalty:
-          type: number
-          format: float
-          minimum: -2
-          maximum: 2
-          description: |-
-            A value that influences the probability of generated tokens appearing based on their existing
-            presence in generated text.
-            Positive values will make tokens less likely to appear when they already exist and increase the
-            model's likelihood to output new topics.
-            Supported range is [-2, 2].
-          default: 0
-        temperature:
-          type: number
-          format: float
-          minimum: 0
-          maximum: 1
-          description: |-
-            The sampling temperature to use that controls the apparent creativity of generated completions.
-            Higher values will make output more random while lower values will make results more focused
-            and deterministic.
-            It is not recommended to modify temperature and top_p for the same completions request as the
-            interaction of these two settings is difficult to predict.
-            Supported range is [0, 1].
-          default: 0.7
-        top_p:
-          type: number
-          format: float
-          minimum: 0
-          maximum: 1
-          description: |-
-            An alternative to sampling with temperature called nucleus sampling. This value causes the
-            model to consider the results of tokens with the provided probability mass. As an example, a
-            value of 0.15 will cause only the tokens comprising the top 15% of probability mass to be
-            considered.
-            It is not recommended to modify temperature and top_p for the same completions request as the
-            interaction of these two settings is difficult to predict.
-            Supported range is [0, 1].
-          default: 1
-        max_tokens:
-          type: integer
-          format: int32
-          minimum: 0
-          description: The maximum number of tokens to generate.
-        response_format:
-          allOf:
-            - $ref: '#/components/schemas/ChatCompletionsResponseFormat'
-          description: |-
-            An object specifying the format that the model must output.
-
-            Setting to `{ "type": "json_schema", "json_schema": {...} }` enables Structured Outputs which ensures the model will match your supplied JSON schema.
-
-            Setting to `{ "type": "json_object" }` enables JSON mode, which ensures the message the model generates is valid JSON.
-
-            **Important:** when using JSON mode, you **must** also instruct the model to produce JSON yourself via a system or user message. Without this, the model may generate an unending stream of whitespace until the generation reaches the token limit, resulting in a long-running and seemingly "stuck" request. Also note that the message content may be partially cut off if `finish_reason="length"`, which indicates the generation exceeded `max_tokens` or the conversation exceeded the max context length.
-        stop:
-          type: array
-          items:
-            type: string
-          minItems: 1
-          description: A collection of textual sequences that will end completions generation.
-        tools:
-          type: array
-          items:
-            $ref: '#/components/schemas/ChatCompletionsToolDefinition'
-          minItems: 1
-          description: |-
-            A list of tools the model may request to call. Currently, only functions are supported as a tool. The model
-            may response with a function call request and provide the input arguments in JSON format for that function.
-        tool_choice:
-          anyOf:
-            - $ref: '#/components/schemas/ChatCompletionsToolChoicePreset'
-            - $ref: '#/components/schemas/ChatCompletionsNamedToolChoice'
-          description: If specified, the model will configure which of the provided tools it can use for the chat completions response.
-        seed:
-          type: integer
-          format: int64
-          description: |-
-            If specified, the system will make a best effort to sample deterministically such that repeated requests with the
-            same seed and parameters should return the same result. Determinism is not guaranteed.
-        model:
-          type: string
-          description: ID of the specific AI model to use, if more than one model is available on the endpoint.
-        modalities:
-          type: array
-          items:
-            $ref: '#/components/schemas/ChatCompletionsModality'
-          description: |-
-            The modalities that the model is allowed to use for the chat completions response. The default modality
-            is `text`. Indicating an unsupported modality combination results in an 422 error.
-      additionalProperties: {}
-      description: |-
-        The configuration information for a chat completions request.
-        Completions support a wide variety of tasks and generate text that continues from or "completes"
-        provided prompt data.
-    ChatCompletionsResponseFormat:
-      type: object
-      required:
-        - type
-      properties:
-        type:
-          type: string
-          description: The response format type to use for chat completions.
-      discriminator:
-        propertyName: type
-        mapping:
-          text: '#/components/schemas/ChatCompletionsResponseFormatText'
-          json_object: '#/components/schemas/ChatCompletionsResponseFormatJsonObject'
-          json_schema: '#/components/schemas/ChatCompletionsResponseFormatJsonSchema'
-      description: |-
-        Represents the format that the model must output. Use this to enable JSON mode instead of the default text mode.
-        Note that to enable JSON mode, some AI models may also require you to instruct the model to produce JSON
-        via a system or user message.
-    ChatCompletionsResponseFormatJsonObject:
-      type: object
-      required:
-        - type
-      properties:
-        type:
-          type: string
-          enum:
-            - json_object
-          description: "Response format type: always 'json_object' for this object."
-      allOf:
-        - $ref: '#/components/schemas/ChatCompletionsResponseFormat'
-      description: |-
-        A response format for Chat Completions that restricts responses to emitting valid JSON objects.
-        Note that to enable JSON mode, some AI models may also require you to instruct the model to produce JSON
-        via a system or user message.
-    ChatCompletionsResponseFormatJsonSchema:
-      type: object
-      required:
-        - type
-        - json_schema
-      properties:
-        type:
-          type: string
-          enum:
-            - json_schema
-          description: 'The type of response format being defined: `json_schema`'
-        json_schema:
-          allOf:
-            - $ref: '#/components/schemas/ChatCompletionsResponseFormatJsonSchemaDefinition'
-          description: The definition of the required JSON schema in the response, and associated metadata.
-      allOf:
-        - $ref: '#/components/schemas/ChatCompletionsResponseFormat'
-      description: |-
-        A response format for Chat Completions that restricts responses to emitting valid JSON objects, with a
-        JSON schema specified by the caller.
-    ChatCompletionsResponseFormatJsonSchemaDefinition:
-      type: object
-      required:
-        - name
-        - schema
-      properties:
-        name:
-          type: string
-          description: The name of the response format. Must be a-z, A-Z, 0-9, or contain underscores and dashes, with a maximum length of 64.
-        schema:
-          type: object
-          additionalProperties: {}
-          description: The definition of the JSON schema
-        description:
-          type: string
-          description: A description of the response format, used by the AI model to determine how to generate responses in this format.
-        strict:
-          type: boolean
-          description: |-
-            Whether to enable strict schema adherence when generating the output.
-            If set to true, the model will always follow the exact schema defined in the `schema` field. Only a subset of
-            JSON Schema is supported when `strict` is `true`.
-          default: false
-      description: The definition of the required JSON schema in the response, and associated metadata.
-    ChatCompletionsResponseFormatText:
-      type: object
-      required:
-        - type
-      properties:
-        type:
-          type: string
-          enum:
-            - text
-          description: "Response format type: always 'text' for this object."
-      allOf:
-        - $ref: '#/components/schemas/ChatCompletionsResponseFormat'
-      description: A response format for Chat Completions that emits text responses. This is the default response format.
-    ChatCompletionsToolCall:
-      type: object
-      required:
-        - id
-        - type
-        - function
-      properties:
-        id:
-          type: string
-          description: The ID of the tool call.
-        type:
-          type: string
-          enum:
-            - function
-          description: The type of tool call. Currently, only `function` is supported.
-        function:
-          allOf:
-            - $ref: '#/components/schemas/FunctionCall'
-          description: The details of the function call requested by the AI model.
-      description: A function tool call requested by the AI model.
-    ChatCompletionsToolChoicePreset:
-      anyOf:
-        - type: string
-        - type: string
-          enum:
-            - auto
-            - none
-            - required
-      description: Represents a generic policy for how a chat completions tool may be selected.
-    ChatCompletionsToolDefinition:
-      type: object
-      required:
-        - type
-        - function
-      properties:
-        type:
-          type: string
-          enum:
-            - function
-          description: The type of the tool. Currently, only `function` is supported.
-        function:
-          allOf:
-            - $ref: '#/components/schemas/FunctionDefinition'
-          description: The function definition details for the function tool.
-      description: The definition of a chat completions tool that can call a function.
-    ChatMessageAudioContentItem:
-      type: object
-      required:
-        - type
-        - audio_url
-      properties:
-        type:
-          type: string
-          enum:
-            - audio_url
-          description: "The discriminated object type: always 'image_url' for this type."
-        audio_url:
-          allOf:
-            - $ref: '#/components/schemas/ChatMessageAudioUrl'
-          description: An internet location, which must be accessible to the model, from which the audio may be retrieved.
-      allOf:
-        - $ref: '#/components/schemas/ChatMessageContentItem'
-      description: A structured chat content item containing an audio reference.
-    ChatMessageAudioUrl:
-      type: object
-      required:
-        - url
-      properties:
-        url:
-          type: string
-          description: The URL of the audio.
-      description: An internet location from which the model may retrieve an audio.
-    ChatMessageContentItem:
-      type: object
-      required:
-        - type
-      properties:
-        type:
-          type: string
-          description: The discriminated object type.
-      discriminator:
-        propertyName: type
-        mapping:
-          text: '#/components/schemas/ChatMessageTextContentItem'
-          image_url: '#/components/schemas/ChatMessageImageContentItem'
-          audio_url: '#/components/schemas/ChatMessageAudioContentItem'
-          input_audio: '#/components/schemas/ChatMessageInputAudioContentItem'
-      description: An abstract representation of a structured content item within a chat message.
-    ChatMessageImageContentItem:
-      type: object
-      required:
-        - type
-        - image_url
-      properties:
-        type:
-          type: string
-          enum:
-            - image_url
-          description: "The discriminated object type: always 'image_url' for this type."
-        image_url:
-          allOf:
-            - $ref: '#/components/schemas/ChatMessageImageUrl'
-          description: An internet location, which must be accessible to the model,from which the image may be retrieved.
-      allOf:
-        - $ref: '#/components/schemas/ChatMessageContentItem'
-      description: A structured chat content item containing an image reference.
-    ChatMessageImageDetailLevel:
-      anyOf:
-        - type: string
-        - type: string
-          enum:
-            - auto
-            - low
-            - high
-      description: A representation of the possible image detail levels for image-based chat completions message content.
-    ChatMessageImageUrl:
-      type: object
-      required:
-        - url
-      properties:
-        url:
-          type: string
-          description: The URL of the image.
-        detail:
-          allOf:
-            - $ref: '#/components/schemas/ChatMessageImageDetailLevel'
-          description: |-
-            The evaluation quality setting to use, which controls relative prioritization of speed, token consumption, and
-            accuracy.
-      description: An internet location from which the model may retrieve an image.
-    ChatMessageInputAudio:
-      type: object
-      required:
-        - data
-        - format
-      properties:
-        data:
-          type: string
-          description: Base64 encoded audio data
-        format:
-          allOf:
-            - $ref: '#/components/schemas/AudioContentFormat'
-          description: The audio format of the audio content.
-      description: The details of an audio chat message content part.
-    ChatMessageInputAudioContentItem:
-      type: object
-      required:
-        - type
-        - format
-      properties:
-        type:
-          type: string
-          enum:
-            - input_audio
-          description: "The discriminated object type: always 'input_audio' for this type."
-        format:
-          allOf:
-            - $ref: '#/components/schemas/AudioContentFormat'
-          description: The audio format of the audio reference.
-      allOf:
-        - $ref: '#/components/schemas/ChatMessageContentItem'
-      description: A structured chat content item containing an audio content.
-    ChatMessageTextContentItem:
-      type: object
-      required:
-        - type
-        - text
-      properties:
-        type:
-          type: string
-          enum:
-            - text
-          description: "The discriminated object type: always 'text' for this type."
-        text:
-          type: string
-          description: The content of the message.
-      allOf:
-        - $ref: '#/components/schemas/ChatMessageContentItem'
-      description: A structured chat content item containing plain text.
-    ChatRequestAssistantMessage:
-      type: object
-      required:
-        - role
-      properties:
-        role:
-          type: string
-          enum:
-            - assistant
-          description: The chat role associated with this message, which is always 'assistant' for assistant messages.
-        content:
-          type: string
-          description: The content of the message.
-        tool_calls:
-          type: array
-          items:
-            $ref: '#/components/schemas/ChatCompletionsToolCall'
-          description: |-
-            The tool calls that must be resolved and have their outputs appended to subsequent input messages for the chat
-            completions request to resolve as configured.
-        audio:
-          allOf:
-            - $ref: '#/components/schemas/ChatRequestAudioReference'
-          description: '  The audio generated by a previous response in a multi-turn conversation.'
-      allOf:
-        - $ref: '#/components/schemas/ChatRequestMessage'
-      description: A request chat message representing response or action from the assistant.
-    ChatRequestAudioReference:
-      type: object
-      required:
-        - id
-      properties:
-        id:
-          type: string
-          description: '  Unique identifier for the audio response. This value corresponds to the id of a previous audio completion.'
-      description: A reference to an audio response generated by the model.
-    ChatRequestMessage:
-      type: object
-      required:
-        - role
-      properties:
-        role:
-          allOf:
-            - $ref: '#/components/schemas/ChatRole'
-          description: The chat role associated with this message.
-      discriminator:
-        propertyName: role
-        mapping:
-          system: '#/components/schemas/ChatRequestSystemMessage'
-          user: '#/components/schemas/ChatRequestUserMessage'
-          assistant: '#/components/schemas/ChatRequestAssistantMessage'
-          tool: '#/components/schemas/ChatRequestToolMessage'
-      description: An abstract representation of a chat message as provided in a request.
-    ChatRequestSystemMessage:
-      type: object
-      required:
-        - role
-        - content
-      properties:
-        role:
-          type: string
-          enum:
-            - system
-          description: The chat role associated with this message, which is always 'system' for system messages.
-        content:
-          type: string
-          description: The contents of the system message.
-      allOf:
-        - $ref: '#/components/schemas/ChatRequestMessage'
-      description: |-
-        A request chat message containing system instructions that influence how the model will generate a chat completions
-        response.
-    ChatRequestToolMessage:
-      type: object
-      required:
-        - role
-        - tool_call_id
-      properties:
-        role:
-          type: string
-          enum:
-            - tool
-          description: The chat role associated with this message, which is always 'tool' for tool messages.
-        content:
-          type: string
-          description: The content of the message.
-        tool_call_id:
-          type: string
-          description: The ID of the tool call resolved by the provided content.
-      allOf:
-        - $ref: '#/components/schemas/ChatRequestMessage'
-      description: A request chat message representing requested output from a configured tool.
-    ChatRequestUserMessage:
-      type: object
-      required:
-        - role
-        - content
-      properties:
-        role:
-          type: string
-          enum:
-            - user
-          description: The chat role associated with this message, which is always 'user' for user messages.
-        content:
-          anyOf:
-            - type: string
-            - type: array
-              items:
-                $ref: '#/components/schemas/ChatMessageContentItem'
-          description: The contents of the user message, with available input types varying by selected model.
-      allOf:
-        - $ref: '#/components/schemas/ChatRequestMessage'
-      description: A request chat message representing user input to the assistant.
-    ChatResponseMessage:
-      type: object
-      required:
-        - role
-        - content
-      properties:
-        role:
-          allOf:
-            - $ref: '#/components/schemas/ChatRole'
-          description: The chat role associated with the message.
-          readOnly: true
-        content:
-          type: string
-          nullable: true
-          description: The content of the message.
-          readOnly: true
-        reasoning_content:
-          type: string
-          description: The reasoning content the model used for generating the response
-          readOnly: true
-        tool_calls:
-          type: array
-          items:
-            $ref: '#/components/schemas/ChatCompletionsToolCall'
-          description: |-
-            The tool calls that must be resolved and have their outputs appended to subsequent input messages for the chat
-            completions request to resolve as configured.
-          readOnly: true
-        audio:
-          allOf:
-            - $ref: '#/components/schemas/ChatCompletionsAudio'
-          description: '  The audio generated by the model as a response to the messages if the model is configured to generate audio.'
-          readOnly: true
-      description: A representation of a chat message as received in a response.
-    ChatRole:
-      anyOf:
-        - type: string
-        - type: string
-          enum:
-            - system
-            - developer
-            - user
-            - assistant
-            - tool
-      description: A description of the intended purpose of a message within a chat completions interaction.
-    CompletionsFinishReason:
-      anyOf:
-        - type: string
-        - type: string
-          enum:
-            - stop
-            - length
-            - content_filter
-            - tool_calls
-      description: Representation of the manner in which a completions response concluded.
-    CompletionsUsage:
-      type: object
-      required:
-        - completion_tokens
-        - prompt_tokens
-        - total_tokens
-      properties:
-        completion_tokens:
-          type: integer
-          format: int32
-          description: The number of tokens generated across all completions emissions.
-          readOnly: true
-        prompt_tokens:
-          type: integer
-          format: int32
-          description: The number of tokens in the provided prompts for the completions request.
-          readOnly: true
-        total_tokens:
-          type: integer
-          format: int32
-          description: The total number of tokens processed for the completions request and response.
-          readOnly: true
-        completion_tokens_details:
-          allOf:
-            - $ref: '#/components/schemas/CompletionsUsageDetails'
-          description: Breakdown of tokens used in a completion.
-          readOnly: true
-        prompt_tokens_details:
-          allOf:
-            - $ref: '#/components/schemas/PromptUsageDetails'
-          description: Breakdown of tokens used in the prompt/chat history.
-          readOnly: true
-      description: |-
-        Representation of the token counts processed for a completions request.
-        Counts consider all tokens across prompts, choices, choice alternates, best_of generations, and
-        other consumers.
-    CompletionsUsageDetails:
-      type: object
-      required:
-        - audio_tokens
-        - reasoning_tokens
-        - total_tokens
-      properties:
-        audio_tokens:
-          type: integer
-          format: int32
-          description: The number of tokens corresponding to audio input.
-          readOnly: true
-        reasoning_tokens:
-          type: integer
-          format: int32
-          description: The number of tokens corresponding to reasoning.
-          readOnly: true
-        total_tokens:
-          type: integer
-          format: int32
-          description: The total number of tokens processed for the completions request and response.
-          readOnly: true
-      description: A breakdown of tokens used in a completion.
-    EmbeddingEncodingFormat:
-      anyOf:
-        - type: string
-        - type: string
-          enum:
-            - base64
-            - binary
-            - float
-            - int8
-            - ubinary
-            - uint8
-      description: |-
-        Specifies the types of embeddings to generate. Compressed embeddings types like `uint8`, `int8`, `ubinary` and 
-        `binary`, may reduce storage costs without sacrificing the integrity of the data. Returns a 422 error if the
-        model doesn't support the value or parameter. Read the model's documentation to know the values supported by
-        the your model.
-    EmbeddingInputType:
-      anyOf:
-        - type: string
-        - type: string
-          enum:
-            - text
-            - query
-            - document
-      description: Represents the input types used for embedding search.
-    EmbeddingItem:
-      type: object
-      required:
-        - embedding
-        - index
-        - object
-      properties:
-        embedding:
-          type: array
-          items:
-            type: number
-            format: float
-          description: |-
-            List of embedding values for the input prompt. These represent a measurement of the
-            vector-based relatedness of the provided input. Or a base64 encoded string of the embedding vector.
-          readOnly: true
-        index:
-          type: integer
-          format: int32
-          description: Index of the prompt to which the EmbeddingItem corresponds.
-          readOnly: true
-        object:
-          type: string
-          enum:
-            - embedding
-          description: The object type of this embeddings item. Will always be `embedding`.
-      description: Representation of a single embeddings relatedness comparison.
-    EmbeddingsOptions:
-      type: object
-      required:
-        - input
-      properties:
-        input:
-          type: array
-          items:
-            type: string
-          description: |-
-            Input text to embed, encoded as a string or array of tokens.
-            To embed multiple inputs in a single request, pass an array
-            of strings or array of token arrays.
-        dimensions:
-          type: integer
-          format: int32
-          description: |-
-            Optional. The number of dimensions the resulting output embeddings should have.
-            Passing null causes the model to use its default value.
-            Returns a 422 error if the model doesn't support the value or parameter.
-        encoding_format:
-          allOf:
-            - $ref: '#/components/schemas/EmbeddingEncodingFormat'
-          description: Optional. The desired format for the returned embeddings.
-        input_type:
-          allOf:
-            - $ref: '#/components/schemas/EmbeddingInputType'
-          description: |-
-            Optional. The type of the input.
-            Returns a 422 error if the model doesn't support the value or parameter.
-        model:
-          type: string
-          description: ID of the specific AI model to use, if more than one model is available on the endpoint.
-      additionalProperties: {}
-      description: The configuration information for an embeddings request.
-    EmbeddingsResult:
-      type: object
-      required:
-        - id
-        - data
-        - usage
-        - object
-        - model
-      properties:
-        id:
-          type: string
-          description: Unique identifier for the embeddings result.
-          readOnly: true
-        data:
-          type: array
-          items:
-            $ref: '#/components/schemas/EmbeddingItem'
-          description: Embedding values for the prompts submitted in the request.
-          readOnly: true
-        usage:
-          allOf:
-            - $ref: '#/components/schemas/EmbeddingsUsage'
-          description: Usage counts for tokens input using the embeddings API.
-          readOnly: true
-        object:
-          type: string
-          enum:
-            - list
-          description: The object type of the embeddings result. Will always be `list`.
-        model:
-          type: string
-          description: The model ID used to generate this result.
-          readOnly: true
-      description: |-
-        Representation of the response data from an embeddings request.
-        Embeddings measure the relatedness of text strings and are commonly used for search, clustering,
-        recommendations, and other similar scenarios.
-    EmbeddingsUsage:
-      type: object
-      required:
-        - prompt_tokens
-        - total_tokens
-      properties:
-        prompt_tokens:
-          type: integer
-          format: int32
-          description: Number of tokens in the request.
-          readOnly: true
-        total_tokens:
-          type: integer
-          format: int32
-          description: |-
-            Total number of tokens transacted in this request/response. Should equal the
-            number of tokens in the request.
-          readOnly: true
-      description: Measurement of the amount of tokens used in this request and response.
-    ExtraParameters:
-      anyOf:
-        - type: string
-        - type: string
-          enum:
-            - error
-            - drop
-            - pass-through
-      description: Controls what happens if extra parameters, undefined by the REST API, are passed in the JSON request payload.
-    FunctionCall:
-      type: object
-      required:
-        - name
-        - arguments
-      properties:
-        name:
-          type: string
-          description: The name of the function to call.
-          readOnly: true
-        arguments:
-          type: string
-          description: |-
-            The arguments to call the function with, as generated by the model in JSON format.
-            Note that the model does not always generate valid JSON, and may hallucinate parameters
-            not defined by your function schema. Validate the arguments in your code before calling
-            your function.
-          readOnly: true
-      description: The name and arguments of a function that should be called, as generated by the model.
-    FunctionDefinition:
-      type: object
-      required:
-        - name
-      properties:
-        name:
-          type: string
-          description: The name of the function to be called.
-        description:
-          type: string
-          description: |-
-            A description of what the function does. The model will use this description when selecting the function and
-            interpreting its parameters.
-        parameters:
-          type: object
-          additionalProperties: {}
-          description: The parameters the function accepts, described as a JSON Schema object.
-      description: The definition of a caller-specified function that chat completions may invoke in response to matching user input.
-    ImageEmbeddingInput:
-      type: object
-      required:
-        - image
-      properties:
-        image:
-          type: string
-          description: 'The input image encoded in base64 string as a data URL. Example: `data:image/{format};base64,{data}`.'
-        text:
-          type: string
-          description: |-
-            Optional. The text input to feed into the model (like DINO, CLIP).
-            Returns a 422 error if the model doesn't support the value or parameter.
-      description: Represents an image with optional text.
-    ImageEmbeddingsOptions:
-      type: object
-      required:
-        - input
-      properties:
-        input:
-          type: array
-          items:
-            $ref: '#/components/schemas/ImageEmbeddingInput'
-          description: |-
-            Input image to embed. To embed multiple inputs in a single request, pass an array.
-            The input must not exceed the max input tokens for the model.
-        dimensions:
-          type: integer
-          format: int32
-          description: |-
-            Optional. The number of dimensions the resulting output embeddings should have.
-            Passing null causes the model to use its default value.
-            Returns a 422 error if the model doesn't support the value or parameter.
-        encoding_format:
-          allOf:
-            - $ref: '#/components/schemas/EmbeddingEncodingFormat'
-          description: |-
-            Optional. The number of dimensions the resulting output embeddings should have.
-            Passing null causes the model to use its default value.
-            Returns a 422 error if the model doesn't support the value or parameter.
-        input_type:
-          allOf:
-            - $ref: '#/components/schemas/EmbeddingInputType'
-          description: |-
-            Optional. The type of the input.
-            Returns a 422 error if the model doesn't support the value or parameter.
-        model:
-          type: string
-          description: ID of the specific AI model to use, if more than one model is available on the endpoint.
-      additionalProperties: {}
-      description: The configuration information for an image embeddings request.
-    ModelInfo:
-      type: object
-      required:
-        - model_name
-        - model_type
-        - model_provider_name
-      properties:
-        model_name:
-          type: string
-          description: 'The name of the AI model. For example: `Phi21`'
-          readOnly: true
-        model_type:
-          allOf:
-            - $ref: '#/components/schemas/ModelType'
-          description: The type of the AI model. A Unique identifier for the profile.
-          readOnly: true
-        model_provider_name:
-          type: string
-          description: 'The model provider name. For example: `Microsoft`'
-          readOnly: true
-      description: Represents some basic information about the AI model.
-    ModelType:
-      anyOf:
-        - type: string
-        - type: string
-          enum:
-            - embeddings
-            - chat-completion
-      description: The type of AI model
-    PromptUsageDetails:
-      type: object
-      required:
-        - audio_tokens
-        - cached_tokens
-      properties:
-        audio_tokens:
-          type: integer
-          format: int32
-          description: The number of tokens corresponding to audio input.
-          readOnly: true
-        cached_tokens:
-          type: integer
-          format: int32
-          description: The total number of tokens cached.
-          readOnly: true
-      description: A breakdown of tokens used in the prompt/chat history.
-    StreamingChatChoiceUpdate:
-      type: object
-      required:
-        - index
-        - finish_reason
-        - delta
-      properties:
-        index:
-          type: integer
-          format: int32
-          description: The ordered index associated with this chat completions choice.
-        finish_reason:
-          allOf:
-            - $ref: '#/components/schemas/CompletionsFinishReason'
-          nullable: true
-          description: The reason that this chat completions choice completed its generated.
-          readOnly: true
-        delta:
-          allOf:
-            - $ref: '#/components/schemas/StreamingChatResponseMessageUpdate'
-          description: An update to the chat message for a given chat completions prompt.
-          readOnly: true
-      description: |-
-        Represents an update to a single prompt completion when the service is streaming updates 
-        using Server Sent Events (SSE).
-        Generally, `n` choices are generated per provided prompt with a default value of 1.
-        Token limits and other settings may limit the number of choices generated.
-    StreamingChatCompletionsUpdate:
-      type: object
-      required:
-        - id
-        - object
-        - created
-        - model
-        - choices
-      properties:
-        id:
-          type: string
-          description: A unique identifier associated with this chat completions response.
-        object:
-          type: string
-          enum:
-            - chat.completion
-          description: The response object type, which is always `chat.completion`.
-        created:
-          type: integer
-          format: unixtime
-          description: |-
-            The first timestamp associated with generation activity for this completions response,
-            represented as seconds since the beginning of the Unix epoch of 00:00 on 1 Jan 1970.
-          readOnly: true
-        model:
-          type: string
-          description: The model used for the chat completion.
-          readOnly: true
-        choices:
-          type: array
-          items:
-            $ref: '#/components/schemas/StreamingChatChoiceUpdate'
-          minItems: 1
-          description: |-
-            An update to the collection of completion choices associated with this completions response.
-            Generally, `n` choices are generated per provided prompt with a default value of 1.
-            Token limits and other settings may limit the number of choices generated.
-          readOnly: true
-        usage:
-          allOf:
-            - $ref: '#/components/schemas/CompletionsUsage'
-          description: Usage information for tokens processed and generated as part of this completions operation.
-          readOnly: true
-      description: |-
-        Represents a response update to a chat completions request, when the service is streaming updates 
-        using Server Sent Events (SSE).
-        Completions support a wide variety of tasks and generate text that continues from or "completes"
-        provided prompt data.
-    StreamingChatResponseMessageUpdate:
-      type: object
-      properties:
-        role:
-          allOf:
-            - $ref: '#/components/schemas/ChatRole'
-          description: The chat role associated with the message. If present, should always be 'assistant'
-          readOnly: true
-        content:
-          type: string
-          description: The content of the message.
-          readOnly: true
-        reasoning_content:
-          type: string
-          description: The reasoning content the model used for generating the response
-          readOnly: true
-        tool_calls:
-          type: array
-          items:
-            $ref: '#/components/schemas/StreamingChatResponseToolCallUpdate'
-          description: |-
-            The tool calls that must be resolved and have their outputs appended to subsequent input messages for the chat
-            completions request to resolve as configured.
-          readOnly: true
-      description: A representation of a chat message update as received in a streaming response.
-    StreamingChatResponseToolCallUpdate:
-      type: object
-      required:
-        - id
-        - function
-      properties:
-        id:
-          type: string
-          description: The ID of the tool call.
-          readOnly: true
-        function:
-          allOf:
-            - $ref: '#/components/schemas/FunctionCall'
-          description: Updates to the function call requested by the AI model.
-          readOnly: true
-      description: An update to the function tool call information requested by the AI model.
-    Versions:
-      type: string
-      enum:
-        - 2024-05-01-preview
-        - '2025-04-01'
-        - '2025-05-01'
-        - 2025-05-15-preview
-      description: The AI.Model service versions.
-  securitySchemes:
-    ApiKeyAuth:
-      type: apiKey
-      in: header
-      name: api-key
-    BearerAuth:
-      type: http
-      scheme: Bearer
-    OAuth2Auth:
-      type: oauth2
-      flows:
-        implicit:
-          authorizationUrl: https://login.microsoftonline.com/common/oauth2/v2.0/authorize
-          scopes:
-            https://cognitiveservices.azure.com/.default: ''
-servers:
-  - url: https://{resource}.services.ai.azure.com/api/models
-    description: AI Model Inference
-    variables:
-      resource:
-        default: ''
-        description: The Azure AI Services resource name, for example 'my-resource'
diff --git a/specification/ai/data-plane/ModelInference/openapi/2025-05-01/openapi.yaml b/specification/ai/data-plane/ModelInference/openapi/2025-05-01/openapi.yaml
deleted file mode 100644
index 31e4650200d5..000000000000
--- a/specification/ai/data-plane/ModelInference/openapi/2025-05-01/openapi.yaml
+++ /dev/null
@@ -1,1477 +0,0 @@
-openapi: 3.0.0
-info:
-  title: AI Model Inference
-  version: '2025-05-01'
-tags: []
-paths:
-  /chat/completions:
-    post:
-      operationId: getChatCompletions
-      description: |-
-        Gets chat completions for the provided chat messages.
-        Completions support a wide variety of tasks and generate text that continues from or "completes"
-        provided prompt data. The method makes a REST API call to the `/chat/completions` route
-        on the given endpoint.
-      parameters:
-        - $ref: '#/components/parameters/Azure.Core.Foundations.ApiVersionParameter'
-        - name: extra-parameters
-          in: header
-          required: false
-          description: |-
-            Controls what happens if extra parameters, undefined by the REST API,
-            are passed in the JSON request payload.
-            This sets the HTTP request header `extra-parameters`.
-          schema:
-            $ref: '#/components/schemas/ExtraParameters'
-      responses:
-        '200':
-          description: The request has succeeded.
-          content:
-            application/json:
-              schema:
-                $ref: '#/components/schemas/ChatCompletions'
-        default:
-          description: An unexpected error response.
-          headers:
-            x-ms-error-code:
-              required: false
-              description: String error code indicating what went wrong.
-              schema:
-                type: string
-          content:
-            application/json:
-              schema:
-                $ref: '#/components/schemas/Azure.Core.Foundations.ErrorResponse'
-      requestBody:
-        required: true
-        content:
-          application/json:
-            schema:
-              $ref: '#/components/schemas/ChatCompletionsOptions'
-        description: The parameters of the chat completions request.
-  /embeddings:
-    post:
-      operationId: getEmbeddings
-      description: |-
-        Return the embedding vectors for given text prompts.
-        The method makes a REST API call to the `/embeddings` route on the given endpoint.
-      parameters:
-        - $ref: '#/components/parameters/Azure.Core.Foundations.ApiVersionParameter'
-        - name: extra-parameters
-          in: header
-          required: false
-          description: |-
-            Controls what happens if extra parameters, undefined by the REST API,
-            are passed in the JSON request payload.
-            This sets the HTTP request header `extra-parameters`.
-          schema:
-            $ref: '#/components/schemas/ExtraParameters'
-      responses:
-        '200':
-          description: The request has succeeded.
-          content:
-            application/json:
-              schema:
-                $ref: '#/components/schemas/EmbeddingsResult'
-        default:
-          description: An unexpected error response.
-          headers:
-            x-ms-error-code:
-              required: false
-              description: String error code indicating what went wrong.
-              schema:
-                type: string
-          content:
-            application/json:
-              schema:
-                $ref: '#/components/schemas/Azure.Core.Foundations.ErrorResponse'
-      requestBody:
-        required: true
-        content:
-          application/json:
-            schema:
-              $ref: '#/components/schemas/EmbeddingsOptions'
-        description: The parameters of the embeddings request.
-  /images/embeddings:
-    post:
-      operationId: getImageEmbeddings
-      description: |-
-        Return the embedding vectors for given images.
-        The method makes a REST API call to the `/images/embeddings` route on the given endpoint.
-      parameters:
-        - $ref: '#/components/parameters/Azure.Core.Foundations.ApiVersionParameter'
-        - name: extra-parameters
-          in: header
-          required: false
-          description: |-
-            Controls what happens if extra parameters, undefined by the REST API,
-            are passed in the JSON request payload.
-            This sets the HTTP request header `extra-parameters`.
-          schema:
-            $ref: '#/components/schemas/ExtraParameters'
-      responses:
-        '200':
-          description: The request has succeeded.
-          content:
-            application/json:
-              schema:
-                $ref: '#/components/schemas/EmbeddingsResult'
-        default:
-          description: An unexpected error response.
-          headers:
-            x-ms-error-code:
-              required: false
-              description: String error code indicating what went wrong.
-              schema:
-                type: string
-          content:
-            application/json:
-              schema:
-                $ref: '#/components/schemas/Azure.Core.Foundations.ErrorResponse'
-      requestBody:
-        required: true
-        content:
-          application/json:
-            schema:
-              $ref: '#/components/schemas/ImageEmbeddingsOptions'
-        description: The parameters of the image embeddings request.
-  /info:
-    get:
-      operationId: getModelInfo
-      description: |-
-        Returns information about the AI model deployed.
-        The method makes a REST API call to the `/info` route on the given endpoint.
-        This method will only work when using Serverless API, Managed Compute, or Model .
-        inference endpoint. Azure OpenAI endpoints don't support i.
-      parameters:
-        - $ref: '#/components/parameters/Azure.Core.Foundations.ApiVersionParameter'
-        - name: model
-          in: query
-          required: false
-          description: The model deployment name you want information from.
-          schema:
-            type: string
-          explode: false
-      responses:
-        '200':
-          description: The request has succeeded.
-          content:
-            application/json:
-              schema:
-                $ref: '#/components/schemas/ModelInfo'
-        default:
-          description: An unexpected error response.
-          headers:
-            x-ms-error-code:
-              required: false
-              description: String error code indicating what went wrong.
-              schema:
-                type: string
-          content:
-            application/json:
-              schema:
-                $ref: '#/components/schemas/Azure.Core.Foundations.ErrorResponse'
-security:
-  - ApiKeyAuth: []
-  - BearerAuth: []
-  - OAuth2Auth:
-      - https://cognitiveservices.azure.com/.default
-components:
-  parameters:
-    Azure.Core.Foundations.ApiVersionParameter:
-      name: api-version
-      in: query
-      required: true
-      description: The API version to use for this operation.
-      schema:
-        type: string
-        minLength: 1
-      explode: false
-  schemas:
-    AudioContentFormat:
-      anyOf:
-        - type: string
-        - type: string
-          enum:
-            - wav
-            - mp3
-      description: A representation of the possible audio formats for audio.
-    Azure.Core.Foundations.Error:
-      type: object
-      required:
-        - code
-        - message
-      properties:
-        code:
-          type: string
-          description: One of a server-defined set of error codes.
-        message:
-          type: string
-          description: A human-readable representation of the error.
-        target:
-          type: string
-          description: The target of the error.
-        details:
-          type: array
-          items:
-            $ref: '#/components/schemas/Azure.Core.Foundations.Error'
-          description: An array of details about specific errors that led to this reported error.
-        innererror:
-          allOf:
-            - $ref: '#/components/schemas/Azure.Core.Foundations.InnerError'
-          description: An object containing more specific information than the current object about the error.
-      description: The error object.
-    Azure.Core.Foundations.ErrorResponse:
-      type: object
-      required:
-        - error
-      properties:
-        error:
-          allOf:
-            - $ref: '#/components/schemas/Azure.Core.Foundations.Error'
-          description: The error object.
-      description: A response containing error details.
-    Azure.Core.Foundations.InnerError:
-      type: object
-      properties:
-        code:
-          type: string
-          description: One of a server-defined set of error codes.
-        innererror:
-          allOf:
-            - $ref: '#/components/schemas/Azure.Core.Foundations.InnerError'
-          description: Inner error.
-      description: An object containing more specific information about the error. As per Microsoft One API guidelines - https://github.com/microsoft/api-guidelines/blob/vNext/azure/Guidelines.md#handling-errors.
-    ChatChoice:
-      type: object
-      required:
-        - index
-        - finish_reason
-        - message
-      properties:
-        index:
-          type: integer
-          format: int32
-          description: The ordered index associated with this chat completions choice.
-        finish_reason:
-          allOf:
-            - $ref: '#/components/schemas/CompletionsFinishReason'
-          nullable: true
-          description: The reason that this chat completions choice completed its generated.
-          readOnly: true
-        message:
-          allOf:
-            - $ref: '#/components/schemas/ChatResponseMessage'
-          description: The chat message for a given chat completions prompt.
-          readOnly: true
-      description: |-
-        The representation of a single prompt completion as part of an overall chat completions request.
-        Generally, `n` choices are generated per provided prompt with a default value of 1.
-        Token limits and other settings may limit the number of choices generated.
-    ChatCompletions:
-      type: object
-      required:
-        - id
-        - object
-        - created
-        - model
-        - choices
-        - usage
-      properties:
-        id:
-          type: string
-          description: A unique identifier associated with this chat completions response.
-        object:
-          type: string
-          enum:
-            - chat.completion
-          description: The response object type, which is always `chat.completion`.
-        created:
-          type: integer
-          format: unixtime
-          description: |-
-            The first timestamp associated with generation activity for this completions response,
-            represented as seconds since the beginning of the Unix epoch of 00:00 on 1 Jan 1970.
-          readOnly: true
-        model:
-          type: string
-          description: The model used for the chat completion.
-          readOnly: true
-        choices:
-          type: array
-          items:
-            $ref: '#/components/schemas/ChatChoice'
-          minItems: 1
-          description: |-
-            The collection of completions choices associated with this completions response.
-            Generally, `n` choices are generated per provided prompt with a default value of 1.
-            Token limits and other settings may limit the number of choices generated.
-          readOnly: true
-        usage:
-          allOf:
-            - $ref: '#/components/schemas/CompletionsUsage'
-          description: '  Usage information for tokens processed and generated as part of this completions operation.'
-          readOnly: true
-      description: |-
-        Representation of the response data from a chat completions request.
-        Completions support a wide variety of tasks and generate text that continues from or "completes"
-        provided prompt data.
-    ChatCompletionsAudio:
-      type: object
-      required:
-        - id
-        - expires_at
-        - data
-        - transcript
-      properties:
-        id:
-          type: string
-          description: |2-
-              Unique identifier for the audio response. This value can be used in chat history messages instead of passing 
-              the full audio object.
-          readOnly: true
-        expires_at:
-          type: integer
-          format: unixtime
-          description: |-
-            The Unix timestamp (in seconds) at which the audio piece expires and can't be any longer referenced by its ID in 
-            multi-turn conversations.
-          readOnly: true
-        data:
-          type: string
-          description: Base64 encoded audio data
-          readOnly: true
-        format:
-          allOf:
-            - $ref: '#/components/schemas/AudioContentFormat'
-          description: |-
-            The format of the audio content. If format is not provided, it will match the format used in the
-            input audio request.
-          readOnly: true
-        transcript:
-          type: string
-          description: The transcript of the audio file.
-          readOnly: true
-      description: A representation of the audio generated by the model.
-    ChatCompletionsModality:
-      anyOf:
-        - type: string
-        - type: string
-          enum:
-            - text
-            - audio
-      description: The modalities that the model is allowed to use for the chat completions response.
-    ChatCompletionsNamedToolChoice:
-      type: object
-      required:
-        - type
-        - function
-      properties:
-        type:
-          type: string
-          enum:
-            - function
-          description: The type of the tool. Currently, only `function` is supported.
-        function:
-          allOf:
-            - $ref: '#/components/schemas/ChatCompletionsNamedToolChoiceFunction'
-          description: The function that should be called.
-      description: A tool selection of a specific, named function tool that will limit chat completions to using the named function.
-    ChatCompletionsNamedToolChoiceFunction:
-      type: object
-      required:
-        - name
-      properties:
-        name:
-          type: string
-          description: The name of the function that should be called.
-      description: A tool selection of a specific, named function tool that will limit chat completions to using the named function.
-    ChatCompletionsOptions:
-      type: object
-      required:
-        - messages
-      properties:
-        messages:
-          type: array
-          items:
-            $ref: '#/components/schemas/ChatRequestMessage'
-          minItems: 1
-          description: |-
-            The collection of context messages associated with this chat completions request.
-            Typical usage begins with a chat message for the System role that provides instructions for
-            the behavior of the assistant, followed by alternating messages between the User and
-            Assistant roles.
-        frequency_penalty:
-          type: number
-          format: float
-          minimum: -2
-          maximum: 2
-          description: |-
-            A value that influences the probability of generated tokens appearing based on their cumulative
-            frequency in generated text.
-            Positive values will make tokens less likely to appear as their frequency increases and
-            decrease the likelihood of the model repeating the same statements verbatim.
-            Supported range is [-2, 2].
-          default: 0
-        stream:
-          type: boolean
-          description: A value indicating whether chat completions should be streamed for this request.
-        presence_penalty:
-          type: number
-          format: float
-          minimum: -2
-          maximum: 2
-          description: |-
-            A value that influences the probability of generated tokens appearing based on their existing
-            presence in generated text.
-            Positive values will make tokens less likely to appear when they already exist and increase the
-            model's likelihood to output new topics.
-            Supported range is [-2, 2].
-          default: 0
-        temperature:
-          type: number
-          format: float
-          minimum: 0
-          maximum: 1
-          description: |-
-            The sampling temperature to use that controls the apparent creativity of generated completions.
-            Higher values will make output more random while lower values will make results more focused
-            and deterministic.
-            It is not recommended to modify temperature and top_p for the same completions request as the
-            interaction of these two settings is difficult to predict.
-            Supported range is [0, 1].
-          default: 0.7
-        top_p:
-          type: number
-          format: float
-          minimum: 0
-          maximum: 1
-          description: |-
-            An alternative to sampling with temperature called nucleus sampling. This value causes the
-            model to consider the results of tokens with the provided probability mass. As an example, a
-            value of 0.15 will cause only the tokens comprising the top 15% of probability mass to be
-            considered.
-            It is not recommended to modify temperature and top_p for the same completions request as the
-            interaction of these two settings is difficult to predict.
-            Supported range is [0, 1].
-          default: 1
-        max_tokens:
-          type: integer
-          format: int32
-          minimum: 0
-          description: The maximum number of tokens to generate.
-        response_format:
-          allOf:
-            - $ref: '#/components/schemas/ChatCompletionsResponseFormat'
-          description: |-
-            An object specifying the format that the model must output.
-
-            Setting to `{ "type": "json_schema", "json_schema": {...} }` enables Structured Outputs which ensures the model will match your supplied JSON schema.
-
-            Setting to `{ "type": "json_object" }` enables JSON mode, which ensures the message the model generates is valid JSON.
-
-            **Important:** when using JSON mode, you **must** also instruct the model to produce JSON yourself via a system or user message. Without this, the model may generate an unending stream of whitespace until the generation reaches the token limit, resulting in a long-running and seemingly "stuck" request. Also note that the message content may be partially cut off if `finish_reason="length"`, which indicates the generation exceeded `max_tokens` or the conversation exceeded the max context length.
-        stop:
-          type: array
-          items:
-            type: string
-          minItems: 1
-          description: A collection of textual sequences that will end completions generation.
-        tools:
-          type: array
-          items:
-            $ref: '#/components/schemas/ChatCompletionsToolDefinition'
-          minItems: 1
-          description: |-
-            A list of tools the model may request to call. Currently, only functions are supported as a tool. The model
-            may response with a function call request and provide the input arguments in JSON format for that function.
-        tool_choice:
-          anyOf:
-            - $ref: '#/components/schemas/ChatCompletionsToolChoicePreset'
-            - $ref: '#/components/schemas/ChatCompletionsNamedToolChoice'
-          description: If specified, the model will configure which of the provided tools it can use for the chat completions response.
-        seed:
-          type: integer
-          format: int64
-          description: |-
-            If specified, the system will make a best effort to sample deterministically such that repeated requests with the
-            same seed and parameters should return the same result. Determinism is not guaranteed.
-        model:
-          type: string
-          description: ID of the specific AI model to use, if more than one model is available on the endpoint.
-        modalities:
-          type: array
-          items:
-            $ref: '#/components/schemas/ChatCompletionsModality'
-          description: |-
-            The modalities that the model is allowed to use for the chat completions response. The default modality
-            is `text`. Indicating an unsupported modality combination results in an 422 error.
-        user_security_context:
-          allOf:
-            - $ref: '#/components/schemas/UserSecurityContext'
-          description: |-
-            User security context contains several parameters that describe the AI application itself, and the end user that interacts with the AI application. 
-            These fields assist your security operations teams to investigate and mitigate security incidents by providing a comprehensive approach to protecting your AI applications. 
-            [Learn more](https://aka.ms/TP4AI/Documentation/EndUserContext) about protecting AI applications using Microsoft Defender for Cloud.
-      additionalProperties: {}
-      description: |-
-        The configuration information for a chat completions request.
-        Completions support a wide variety of tasks and generate text that continues from or "completes"
-        provided prompt data.
-    ChatCompletionsResponseFormat:
-      type: object
-      required:
-        - type
-      properties:
-        type:
-          type: string
-          description: The response format type to use for chat completions.
-      discriminator:
-        propertyName: type
-        mapping:
-          text: '#/components/schemas/ChatCompletionsResponseFormatText'
-          json_object: '#/components/schemas/ChatCompletionsResponseFormatJsonObject'
-          json_schema: '#/components/schemas/ChatCompletionsResponseFormatJsonSchema'
-      description: |-
-        Represents the format that the model must output. Use this to enable JSON mode instead of the default text mode.
-        Note that to enable JSON mode, some AI models may also require you to instruct the model to produce JSON
-        via a system or user message.
-    ChatCompletionsResponseFormatJsonObject:
-      type: object
-      required:
-        - type
-      properties:
-        type:
-          type: string
-          enum:
-            - json_object
-          description: "Response format type: always 'json_object' for this object."
-      allOf:
-        - $ref: '#/components/schemas/ChatCompletionsResponseFormat'
-      description: |-
-        A response format for Chat Completions that restricts responses to emitting valid JSON objects.
-        Note that to enable JSON mode, some AI models may also require you to instruct the model to produce JSON
-        via a system or user message.
-    ChatCompletionsResponseFormatJsonSchema:
-      type: object
-      required:
-        - type
-        - json_schema
-      properties:
-        type:
-          type: string
-          enum:
-            - json_schema
-          description: 'The type of response format being defined: `json_schema`'
-        json_schema:
-          allOf:
-            - $ref: '#/components/schemas/ChatCompletionsResponseFormatJsonSchemaDefinition'
-          description: The definition of the required JSON schema in the response, and associated metadata.
-      allOf:
-        - $ref: '#/components/schemas/ChatCompletionsResponseFormat'
-      description: |-
-        A response format for Chat Completions that restricts responses to emitting valid JSON objects, with a
-        JSON schema specified by the caller.
-    ChatCompletionsResponseFormatJsonSchemaDefinition:
-      type: object
-      required:
-        - name
-        - schema
-      properties:
-        name:
-          type: string
-          description: The name of the response format. Must be a-z, A-Z, 0-9, or contain underscores and dashes, with a maximum length of 64.
-        schema:
-          type: object
-          additionalProperties: {}
-          description: The definition of the JSON schema
-        description:
-          type: string
-          description: A description of the response format, used by the AI model to determine how to generate responses in this format.
-        strict:
-          type: boolean
-          description: |-
-            Whether to enable strict schema adherence when generating the output.
-            If set to true, the model will always follow the exact schema defined in the `schema` field. Only a subset of
-            JSON Schema is supported when `strict` is `true`.
-          default: false
-      description: The definition of the required JSON schema in the response, and associated metadata.
-    ChatCompletionsResponseFormatText:
-      type: object
-      required:
-        - type
-      properties:
-        type:
-          type: string
-          enum:
-            - text
-          description: "Response format type: always 'text' for this object."
-      allOf:
-        - $ref: '#/components/schemas/ChatCompletionsResponseFormat'
-      description: A response format for Chat Completions that emits text responses. This is the default response format.
-    ChatCompletionsToolCall:
-      type: object
-      required:
-        - id
-        - type
-        - function
-      properties:
-        id:
-          type: string
-          description: The ID of the tool call.
-        type:
-          type: string
-          enum:
-            - function
-          description: The type of tool call. Currently, only `function` is supported.
-        function:
-          allOf:
-            - $ref: '#/components/schemas/FunctionCall'
-          description: The details of the function call requested by the AI model.
-      description: A function tool call requested by the AI model.
-    ChatCompletionsToolChoicePreset:
-      anyOf:
-        - type: string
-        - type: string
-          enum:
-            - auto
-            - none
-            - required
-      description: Represents a generic policy for how a chat completions tool may be selected.
-    ChatCompletionsToolDefinition:
-      type: object
-      required:
-        - type
-        - function
-      properties:
-        type:
-          type: string
-          enum:
-            - function
-          description: The type of the tool. Currently, only `function` is supported.
-        function:
-          allOf:
-            - $ref: '#/components/schemas/FunctionDefinition'
-          description: The function definition details for the function tool.
-      description: The definition of a chat completions tool that can call a function.
-    ChatMessageAudioContentItem:
-      type: object
-      required:
-        - type
-        - audio_url
-      properties:
-        type:
-          type: string
-          enum:
-            - audio_url
-          description: "The discriminated object type: always 'image_url' for this type."
-        audio_url:
-          allOf:
-            - $ref: '#/components/schemas/ChatMessageAudioUrl'
-          description: An internet location, which must be accessible to the model, from which the audio may be retrieved.
-      allOf:
-        - $ref: '#/components/schemas/ChatMessageContentItem'
-      description: A structured chat content item containing an audio reference.
-    ChatMessageAudioUrl:
-      type: object
-      required:
-        - url
-      properties:
-        url:
-          type: string
-          description: The URL of the audio.
-      description: An internet location from which the model may retrieve an audio.
-    ChatMessageContentItem:
-      type: object
-      required:
-        - type
-      properties:
-        type:
-          type: string
-          description: The discriminated object type.
-      discriminator:
-        propertyName: type
-        mapping:
-          text: '#/components/schemas/ChatMessageTextContentItem'
-          image_url: '#/components/schemas/ChatMessageImageContentItem'
-          audio_url: '#/components/schemas/ChatMessageAudioContentItem'
-          input_audio: '#/components/schemas/ChatMessageInputAudioContentItem'
-      description: An abstract representation of a structured content item within a chat message.
-    ChatMessageImageContentItem:
-      type: object
-      required:
-        - type
-        - image_url
-      properties:
-        type:
-          type: string
-          enum:
-            - image_url
-          description: "The discriminated object type: always 'image_url' for this type."
-        image_url:
-          allOf:
-            - $ref: '#/components/schemas/ChatMessageImageUrl'
-          description: An internet location, which must be accessible to the model,from which the image may be retrieved.
-      allOf:
-        - $ref: '#/components/schemas/ChatMessageContentItem'
-      description: A structured chat content item containing an image reference.
-    ChatMessageImageDetailLevel:
-      anyOf:
-        - type: string
-        - type: string
-          enum:
-            - auto
-            - low
-            - high
-      description: A representation of the possible image detail levels for image-based chat completions message content.
-    ChatMessageImageUrl:
-      type: object
-      required:
-        - url
-      properties:
-        url:
-          type: string
-          description: The URL of the image.
-        detail:
-          allOf:
-            - $ref: '#/components/schemas/ChatMessageImageDetailLevel'
-          description: |-
-            The evaluation quality setting to use, which controls relative prioritization of speed, token consumption, and
-            accuracy.
-      description: An internet location from which the model may retrieve an image.
-    ChatMessageInputAudio:
-      type: object
-      required:
-        - data
-        - format
-      properties:
-        data:
-          type: string
-          description: Base64 encoded audio data
-        format:
-          allOf:
-            - $ref: '#/components/schemas/AudioContentFormat'
-          description: The audio format of the audio content.
-      description: The details of an audio chat message content part.
-    ChatMessageInputAudioContentItem:
-      type: object
-      required:
-        - type
-        - format
-      properties:
-        type:
-          type: string
-          enum:
-            - input_audio
-          description: "The discriminated object type: always 'input_audio' for this type."
-        format:
-          allOf:
-            - $ref: '#/components/schemas/AudioContentFormat'
-          description: The audio format of the audio reference.
-      allOf:
-        - $ref: '#/components/schemas/ChatMessageContentItem'
-      description: A structured chat content item containing an audio content.
-    ChatMessageTextContentItem:
-      type: object
-      required:
-        - type
-        - text
-      properties:
-        type:
-          type: string
-          enum:
-            - text
-          description: "The discriminated object type: always 'text' for this type."
-        text:
-          type: string
-          description: The content of the message.
-      allOf:
-        - $ref: '#/components/schemas/ChatMessageContentItem'
-      description: A structured chat content item containing plain text.
-    ChatRequestAssistantMessage:
-      type: object
-      required:
-        - role
-      properties:
-        role:
-          type: string
-          enum:
-            - assistant
-          description: The chat role associated with this message, which is always 'assistant' for assistant messages.
-        content:
-          type: string
-          description: The content of the message.
-        tool_calls:
-          type: array
-          items:
-            $ref: '#/components/schemas/ChatCompletionsToolCall'
-          description: |-
-            The tool calls that must be resolved and have their outputs appended to subsequent input messages for the chat
-            completions request to resolve as configured.
-        audio:
-          allOf:
-            - $ref: '#/components/schemas/ChatRequestAudioReference'
-          description: '  The audio generated by a previous response in a multi-turn conversation.'
-      allOf:
-        - $ref: '#/components/schemas/ChatRequestMessage'
-      description: A request chat message representing response or action from the assistant.
-    ChatRequestAudioReference:
-      type: object
-      required:
-        - id
-      properties:
-        id:
-          type: string
-          description: '  Unique identifier for the audio response. This value corresponds to the id of a previous audio completion.'
-      description: A reference to an audio response generated by the model.
-    ChatRequestMessage:
-      type: object
-      required:
-        - role
-      properties:
-        role:
-          allOf:
-            - $ref: '#/components/schemas/ChatRole'
-          description: The chat role associated with this message.
-      discriminator:
-        propertyName: role
-        mapping:
-          system: '#/components/schemas/ChatRequestSystemMessage'
-          user: '#/components/schemas/ChatRequestUserMessage'
-          assistant: '#/components/schemas/ChatRequestAssistantMessage'
-          tool: '#/components/schemas/ChatRequestToolMessage'
-      description: An abstract representation of a chat message as provided in a request.
-    ChatRequestSystemMessage:
-      type: object
-      required:
-        - role
-        - content
-      properties:
-        role:
-          type: string
-          enum:
-            - system
-          description: The chat role associated with this message, which is always 'system' for system messages.
-        content:
-          type: string
-          description: The contents of the system message.
-      allOf:
-        - $ref: '#/components/schemas/ChatRequestMessage'
-      description: |-
-        A request chat message containing system instructions that influence how the model will generate a chat completions
-        response.
-    ChatRequestToolMessage:
-      type: object
-      required:
-        - role
-        - tool_call_id
-      properties:
-        role:
-          type: string
-          enum:
-            - tool
-          description: The chat role associated with this message, which is always 'tool' for tool messages.
-        content:
-          type: string
-          description: The content of the message.
-        tool_call_id:
-          type: string
-          description: The ID of the tool call resolved by the provided content.
-      allOf:
-        - $ref: '#/components/schemas/ChatRequestMessage'
-      description: A request chat message representing requested output from a configured tool.
-    ChatRequestUserMessage:
-      type: object
-      required:
-        - role
-        - content
-      properties:
-        role:
-          type: string
-          enum:
-            - user
-          description: The chat role associated with this message, which is always 'user' for user messages.
-        content:
-          anyOf:
-            - type: string
-            - type: array
-              items:
-                $ref: '#/components/schemas/ChatMessageContentItem'
-          description: The contents of the user message, with available input types varying by selected model.
-      allOf:
-        - $ref: '#/components/schemas/ChatRequestMessage'
-      description: A request chat message representing user input to the assistant.
-    ChatResponseMessage:
-      type: object
-      required:
-        - role
-        - content
-      properties:
-        role:
-          allOf:
-            - $ref: '#/components/schemas/ChatRole'
-          description: The chat role associated with the message.
-          readOnly: true
-        content:
-          type: string
-          nullable: true
-          description: The content of the message.
-          readOnly: true
-        reasoning_content:
-          type: string
-          description: The reasoning content the model used for generating the response
-          readOnly: true
-        tool_calls:
-          type: array
-          items:
-            $ref: '#/components/schemas/ChatCompletionsToolCall'
-          description: |-
-            The tool calls that must be resolved and have their outputs appended to subsequent input messages for the chat
-            completions request to resolve as configured.
-          readOnly: true
-        audio:
-          allOf:
-            - $ref: '#/components/schemas/ChatCompletionsAudio'
-          description: '  The audio generated by the model as a response to the messages if the model is configured to generate audio.'
-          readOnly: true
-      description: A representation of a chat message as received in a response.
-    ChatRole:
-      anyOf:
-        - type: string
-        - type: string
-          enum:
-            - system
-            - developer
-            - user
-            - assistant
-            - tool
-      description: A description of the intended purpose of a message within a chat completions interaction.
-    CompletionsFinishReason:
-      anyOf:
-        - type: string
-        - type: string
-          enum:
-            - stop
-            - length
-            - content_filter
-            - tool_calls
-      description: Representation of the manner in which a completions response concluded.
-    CompletionsUsage:
-      type: object
-      required:
-        - completion_tokens
-        - prompt_tokens
-        - total_tokens
-      properties:
-        completion_tokens:
-          type: integer
-          format: int32
-          description: The number of tokens generated across all completions emissions.
-          readOnly: true
-        prompt_tokens:
-          type: integer
-          format: int32
-          description: The number of tokens in the provided prompts for the completions request.
-          readOnly: true
-        total_tokens:
-          type: integer
-          format: int32
-          description: The total number of tokens processed for the completions request and response.
-          readOnly: true
-        completion_tokens_details:
-          allOf:
-            - $ref: '#/components/schemas/CompletionsUsageDetails'
-          description: Breakdown of tokens used in a completion.
-          readOnly: true
-        prompt_tokens_details:
-          allOf:
-            - $ref: '#/components/schemas/PromptUsageDetails'
-          description: Breakdown of tokens used in the prompt/chat history.
-          readOnly: true
-      description: |-
-        Representation of the token counts processed for a completions request.
-        Counts consider all tokens across prompts, choices, choice alternates, best_of generations, and
-        other consumers.
-    CompletionsUsageDetails:
-      type: object
-      required:
-        - audio_tokens
-        - reasoning_tokens
-        - total_tokens
-      properties:
-        audio_tokens:
-          type: integer
-          format: int32
-          description: The number of tokens corresponding to audio input.
-          readOnly: true
-        reasoning_tokens:
-          type: integer
-          format: int32
-          description: The number of tokens corresponding to reasoning.
-          readOnly: true
-        total_tokens:
-          type: integer
-          format: int32
-          description: The total number of tokens processed for the completions request and response.
-          readOnly: true
-      description: A breakdown of tokens used in a completion.
-    EmbeddingEncodingFormat:
-      anyOf:
-        - type: string
-        - type: string
-          enum:
-            - base64
-            - binary
-            - float
-            - int8
-            - ubinary
-            - uint8
-      description: |-
-        Specifies the types of embeddings to generate. Compressed embeddings types like `uint8`, `int8`, `ubinary` and 
-        `binary`, may reduce storage costs without sacrificing the integrity of the data. Returns a 422 error if the
-        model doesn't support the value or parameter. Read the model's documentation to know the values supported by
-        the your model.
-    EmbeddingInputType:
-      anyOf:
-        - type: string
-        - type: string
-          enum:
-            - text
-            - query
-            - document
-      description: Represents the input types used for embedding search.
-    EmbeddingItem:
-      type: object
-      required:
-        - embedding
-        - index
-        - object
-      properties:
-        embedding:
-          type: array
-          items:
-            type: number
-            format: float
-          description: |-
-            List of embedding values for the input prompt. These represent a measurement of the
-            vector-based relatedness of the provided input. Or a base64 encoded string of the embedding vector.
-          readOnly: true
-        index:
-          type: integer
-          format: int32
-          description: Index of the prompt to which the EmbeddingItem corresponds.
-          readOnly: true
-        object:
-          type: string
-          enum:
-            - embedding
-          description: The object type of this embeddings item. Will always be `embedding`.
-      description: Representation of a single embeddings relatedness comparison.
-    EmbeddingsOptions:
-      type: object
-      required:
-        - input
-      properties:
-        input:
-          type: array
-          items:
-            type: string
-          description: |-
-            Input text to embed, encoded as a string or array of tokens.
-            To embed multiple inputs in a single request, pass an array
-            of strings or array of token arrays.
-        dimensions:
-          type: integer
-          format: int32
-          description: |-
-            Optional. The number of dimensions the resulting output embeddings should have.
-            Passing null causes the model to use its default value.
-            Returns a 422 error if the model doesn't support the value or parameter.
-        encoding_format:
-          allOf:
-            - $ref: '#/components/schemas/EmbeddingEncodingFormat'
-          description: Optional. The desired format for the returned embeddings.
-        input_type:
-          allOf:
-            - $ref: '#/components/schemas/EmbeddingInputType'
-          description: |-
-            Optional. The type of the input.
-            Returns a 422 error if the model doesn't support the value or parameter.
-        model:
-          type: string
-          description: ID of the specific AI model to use, if more than one model is available on the endpoint.
-      additionalProperties: {}
-      description: The configuration information for an embeddings request.
-    EmbeddingsResult:
-      type: object
-      required:
-        - id
-        - data
-        - usage
-        - object
-        - model
-      properties:
-        id:
-          type: string
-          description: Unique identifier for the embeddings result.
-          readOnly: true
-        data:
-          type: array
-          items:
-            $ref: '#/components/schemas/EmbeddingItem'
-          description: Embedding values for the prompts submitted in the request.
-          readOnly: true
-        usage:
-          allOf:
-            - $ref: '#/components/schemas/EmbeddingsUsage'
-          description: Usage counts for tokens input using the embeddings API.
-          readOnly: true
-        object:
-          type: string
-          enum:
-            - list
-          description: The object type of the embeddings result. Will always be `list`.
-        model:
-          type: string
-          description: The model ID used to generate this result.
-          readOnly: true
-      description: |-
-        Representation of the response data from an embeddings request.
-        Embeddings measure the relatedness of text strings and are commonly used for search, clustering,
-        recommendations, and other similar scenarios.
-    EmbeddingsUsage:
-      type: object
-      required:
-        - prompt_tokens
-        - total_tokens
-      properties:
-        prompt_tokens:
-          type: integer
-          format: int32
-          description: Number of tokens in the request.
-          readOnly: true
-        total_tokens:
-          type: integer
-          format: int32
-          description: |-
-            Total number of tokens transacted in this request/response. Should equal the
-            number of tokens in the request.
-          readOnly: true
-      description: Measurement of the amount of tokens used in this request and response.
-    ExtraParameters:
-      anyOf:
-        - type: string
-        - type: string
-          enum:
-            - error
-            - drop
-            - pass-through
-      description: Controls what happens if extra parameters, undefined by the REST API, are passed in the JSON request payload.
-    FunctionCall:
-      type: object
-      required:
-        - name
-        - arguments
-      properties:
-        name:
-          type: string
-          description: The name of the function to call.
-          readOnly: true
-        arguments:
-          type: string
-          description: |-
-            The arguments to call the function with, as generated by the model in JSON format.
-            Note that the model does not always generate valid JSON, and may hallucinate parameters
-            not defined by your function schema. Validate the arguments in your code before calling
-            your function.
-          readOnly: true
-      description: The name and arguments of a function that should be called, as generated by the model.
-    FunctionDefinition:
-      type: object
-      required:
-        - name
-      properties:
-        name:
-          type: string
-          description: The name of the function to be called.
-        description:
-          type: string
-          description: |-
-            A description of what the function does. The model will use this description when selecting the function and
-            interpreting its parameters.
-        parameters:
-          type: object
-          additionalProperties: {}
-          description: The parameters the function accepts, described as a JSON Schema object.
-      description: The definition of a caller-specified function that chat completions may invoke in response to matching user input.
-    ImageEmbeddingInput:
-      type: object
-      required:
-        - image
-      properties:
-        image:
-          type: string
-          description: 'The input image encoded in base64 string as a data URL. Example: `data:image/{format};base64,{data}`.'
-        text:
-          type: string
-          description: |-
-            Optional. The text input to feed into the model (like DINO, CLIP).
-            Returns a 422 error if the model doesn't support the value or parameter.
-      description: Represents an image with optional text.
-    ImageEmbeddingsOptions:
-      type: object
-      required:
-        - input
-      properties:
-        input:
-          type: array
-          items:
-            $ref: '#/components/schemas/ImageEmbeddingInput'
-          description: |-
-            Input image to embed. To embed multiple inputs in a single request, pass an array.
-            The input must not exceed the max input tokens for the model.
-        dimensions:
-          type: integer
-          format: int32
-          description: |-
-            Optional. The number of dimensions the resulting output embeddings should have.
-            Passing null causes the model to use its default value.
-            Returns a 422 error if the model doesn't support the value or parameter.
-        encoding_format:
-          allOf:
-            - $ref: '#/components/schemas/EmbeddingEncodingFormat'
-          description: |-
-            Optional. The number of dimensions the resulting output embeddings should have.
-            Passing null causes the model to use its default value.
-            Returns a 422 error if the model doesn't support the value or parameter.
-        input_type:
-          allOf:
-            - $ref: '#/components/schemas/EmbeddingInputType'
-          description: |-
-            Optional. The type of the input.
-            Returns a 422 error if the model doesn't support the value or parameter.
-        model:
-          type: string
-          description: ID of the specific AI model to use, if more than one model is available on the endpoint.
-      additionalProperties: {}
-      description: The configuration information for an image embeddings request.
-    ModelInfo:
-      type: object
-      required:
-        - model_name
-        - model_type
-        - model_provider_name
-      properties:
-        model_name:
-          type: string
-          description: 'The name of the AI model. For example: `Phi21`'
-          readOnly: true
-        model_type:
-          allOf:
-            - $ref: '#/components/schemas/ModelType'
-          description: The type of the AI model. A Unique identifier for the profile.
-          readOnly: true
-        model_provider_name:
-          type: string
-          description: 'The model provider name. For example: `Microsoft`'
-          readOnly: true
-      description: Represents some basic information about the AI model.
-    ModelType:
-      anyOf:
-        - type: string
-        - type: string
-          enum:
-            - embeddings
-            - chat-completion
-      description: The type of AI model
-    PromptUsageDetails:
-      type: object
-      required:
-        - audio_tokens
-        - cached_tokens
-      properties:
-        audio_tokens:
-          type: integer
-          format: int32
-          description: The number of tokens corresponding to audio input.
-          readOnly: true
-        cached_tokens:
-          type: integer
-          format: int32
-          description: The total number of tokens cached.
-          readOnly: true
-      description: A breakdown of tokens used in the prompt/chat history.
-    StreamingChatChoiceUpdate:
-      type: object
-      required:
-        - index
-        - finish_reason
-        - delta
-      properties:
-        index:
-          type: integer
-          format: int32
-          description: The ordered index associated with this chat completions choice.
-        finish_reason:
-          allOf:
-            - $ref: '#/components/schemas/CompletionsFinishReason'
-          nullable: true
-          description: The reason that this chat completions choice completed its generated.
-          readOnly: true
-        delta:
-          allOf:
-            - $ref: '#/components/schemas/StreamingChatResponseMessageUpdate'
-          description: An update to the chat message for a given chat completions prompt.
-          readOnly: true
-      description: |-
-        Represents an update to a single prompt completion when the service is streaming updates 
-        using Server Sent Events (SSE).
-        Generally, `n` choices are generated per provided prompt with a default value of 1.
-        Token limits and other settings may limit the number of choices generated.
-    StreamingChatCompletionsUpdate:
-      type: object
-      required:
-        - id
-        - object
-        - created
-        - model
-        - choices
-      properties:
-        id:
-          type: string
-          description: A unique identifier associated with this chat completions response.
-        object:
-          type: string
-          enum:
-            - chat.completion
-          description: The response object type, which is always `chat.completion`.
-        created:
-          type: integer
-          format: unixtime
-          description: |-
-            The first timestamp associated with generation activity for this completions response,
-            represented as seconds since the beginning of the Unix epoch of 00:00 on 1 Jan 1970.
-          readOnly: true
-        model:
-          type: string
-          description: The model used for the chat completion.
-          readOnly: true
-        choices:
-          type: array
-          items:
-            $ref: '#/components/schemas/StreamingChatChoiceUpdate'
-          minItems: 1
-          description: |-
-            An update to the collection of completion choices associated with this completions response.
-            Generally, `n` choices are generated per provided prompt with a default value of 1.
-            Token limits and other settings may limit the number of choices generated.
-          readOnly: true
-        usage:
-          allOf:
-            - $ref: '#/components/schemas/CompletionsUsage'
-          description: Usage information for tokens processed and generated as part of this completions operation.
-          readOnly: true
-      description: |-
-        Represents a response update to a chat completions request, when the service is streaming updates 
-        using Server Sent Events (SSE).
-        Completions support a wide variety of tasks and generate text that continues from or "completes"
-        provided prompt data.
-    StreamingChatResponseMessageUpdate:
-      type: object
-      properties:
-        role:
-          allOf:
-            - $ref: '#/components/schemas/ChatRole'
-          description: The chat role associated with the message. If present, should always be 'assistant'
-          readOnly: true
-        content:
-          type: string
-          description: The content of the message.
-          readOnly: true
-        reasoning_content:
-          type: string
-          description: The reasoning content the model used for generating the response
-          readOnly: true
-        tool_calls:
-          type: array
-          items:
-            $ref: '#/components/schemas/StreamingChatResponseToolCallUpdate'
-          description: |-
-            The tool calls that must be resolved and have their outputs appended to subsequent input messages for the chat
-            completions request to resolve as configured.
-          readOnly: true
-      description: A representation of a chat message update as received in a streaming response.
-    StreamingChatResponseToolCallUpdate:
-      type: object
-      required:
-        - id
-        - function
-      properties:
-        id:
-          type: string
-          description: The ID of the tool call.
-          readOnly: true
-        function:
-          allOf:
-            - $ref: '#/components/schemas/FunctionCall'
-          description: Updates to the function call requested by the AI model.
-          readOnly: true
-      description: An update to the function tool call information requested by the AI model.
-    UserSecurityContext:
-      type: object
-      properties:
-        application_name:
-          type: string
-          maxLength: 100
-          description: The name of the application. Sensitive personal information should not be included in this field.
-        end_user_id:
-          type: string
-          minLength: 36
-          maxLength: 36
-          pattern: ^[0-9a-fA-F]{8}-[0-9a-fA-F]{4}-[0-9a-fA-F]{4}-[0-9a-fA-F]{4}-[0-9a-fA-F]{12}$
-          description: This identifier is the Microsoft Entra ID (formerly Azure Active Directory) user object ID used to authenticate end-users within the generative AI application. Sensitive personal information should not be included in this field.
-        end_user_tenant_id:
-          type: string
-          minLength: 36
-          maxLength: 36
-          pattern: ^[0-9a-fA-F]{8}-[0-9a-fA-F]{4}-[0-9a-fA-F]{4}-[0-9a-fA-F]{4}-[0-9a-fA-F]{12}$
-          description: The Microsoft 365 tenant ID the end user belongs to. It's required when the generative AI application is multi tenant.
-        source_ip:
-          type: string
-          minLength: 2
-          maxLength: 45
-          description: Captures the original client's IP address, accepting both IPv4 and IPv6 formats.
-      description: |-
-        User security context contains several parameters that describe the AI application itself, and the end user that interacts with the AI application. 
-        These fields assist your security operations teams to investigate and mitigate security incidents by providing a comprehensive approach to protecting your AI applications. 
-        [Learn more](https://aka.ms/TP4AI/Documentation/EndUserContext) about protecting AI applications using Microsoft Defender for Cloud.
-    Versions:
-      type: string
-      enum:
-        - 2024-05-01-preview
-        - '2025-04-01'
-        - '2025-05-01'
-        - 2025-05-15-preview
-      description: The AI.Model service versions.
-  securitySchemes:
-    ApiKeyAuth:
-      type: apiKey
-      in: header
-      name: api-key
-    BearerAuth:
-      type: http
-      scheme: Bearer
-    OAuth2Auth:
-      type: oauth2
-      flows:
-        implicit:
-          authorizationUrl: https://login.microsoftonline.com/common/oauth2/v2.0/authorize
-          scopes:
-            https://cognitiveservices.azure.com/.default: ''
-servers:
-  - url: https://{resource}.services.ai.azure.com/api/models
-    description: AI Model Inference
-    variables:
-      resource:
-        default: ''
-        description: The Azure AI Services resource name, for example 'my-resource'
diff --git a/specification/ai/data-plane/ModelInference/openapi/2025-05-15-preview/openapi.yaml b/specification/ai/data-plane/ModelInference/openapi/2025-05-15-preview/openapi.yaml
deleted file mode 100644
index 9a98b6d6438c..000000000000
--- a/specification/ai/data-plane/ModelInference/openapi/2025-05-15-preview/openapi.yaml
+++ /dev/null
@@ -1,1701 +0,0 @@
-openapi: 3.0.0
-info:
-  title: AI Model Inference
-  version: 2025-05-15-preview
-tags: []
-paths:
-  /chat/completions:
-    post:
-      operationId: getChatCompletions
-      description: |-
-        Gets chat completions for the provided chat messages.
-        Completions support a wide variety of tasks and generate text that continues from or "completes"
-        provided prompt data. The method makes a REST API call to the `/chat/completions` route
-        on the given endpoint.
-      parameters:
-        - $ref: '#/components/parameters/Azure.Core.Foundations.ApiVersionParameter'
-        - name: extra-parameters
-          in: header
-          required: false
-          description: |-
-            Controls what happens if extra parameters, undefined by the REST API,
-            are passed in the JSON request payload.
-            This sets the HTTP request header `extra-parameters`.
-          schema:
-            $ref: '#/components/schemas/ExtraParameters'
-      responses:
-        '200':
-          description: The request has succeeded.
-          content:
-            application/json:
-              schema:
-                $ref: '#/components/schemas/ChatCompletions'
-        default:
-          description: An unexpected error response.
-          headers:
-            x-ms-error-code:
-              required: false
-              description: String error code indicating what went wrong.
-              schema:
-                type: string
-          content:
-            application/json:
-              schema:
-                $ref: '#/components/schemas/Azure.Core.Foundations.ErrorResponse'
-      requestBody:
-        required: true
-        content:
-          application/json:
-            schema:
-              $ref: '#/components/schemas/ChatCompletionsOptions'
-        description: The parameters of the chat completions request.
-  /embeddings:
-    post:
-      operationId: getEmbeddings
-      description: |-
-        Return the embedding vectors for given text prompts.
-        The method makes a REST API call to the `/embeddings` route on the given endpoint.
-      parameters:
-        - $ref: '#/components/parameters/Azure.Core.Foundations.ApiVersionParameter'
-        - name: extra-parameters
-          in: header
-          required: false
-          description: |-
-            Controls what happens if extra parameters, undefined by the REST API,
-            are passed in the JSON request payload.
-            This sets the HTTP request header `extra-parameters`.
-          schema:
-            $ref: '#/components/schemas/ExtraParameters'
-      responses:
-        '200':
-          description: The request has succeeded.
-          content:
-            application/json:
-              schema:
-                $ref: '#/components/schemas/EmbeddingsResult'
-        default:
-          description: An unexpected error response.
-          headers:
-            x-ms-error-code:
-              required: false
-              description: String error code indicating what went wrong.
-              schema:
-                type: string
-          content:
-            application/json:
-              schema:
-                $ref: '#/components/schemas/Azure.Core.Foundations.ErrorResponse'
-      requestBody:
-        required: true
-        content:
-          application/json:
-            schema:
-              $ref: '#/components/schemas/EmbeddingsOptions'
-        description: The parameters of the embeddings request.
-  /images/embeddings:
-    post:
-      operationId: getImageEmbeddings
-      description: |-
-        Return the embedding vectors for given images.
-        The method makes a REST API call to the `/images/embeddings` route on the given endpoint.
-      parameters:
-        - $ref: '#/components/parameters/Azure.Core.Foundations.ApiVersionParameter'
-        - name: extra-parameters
-          in: header
-          required: false
-          description: |-
-            Controls what happens if extra parameters, undefined by the REST API,
-            are passed in the JSON request payload.
-            This sets the HTTP request header `extra-parameters`.
-          schema:
-            $ref: '#/components/schemas/ExtraParameters'
-      responses:
-        '200':
-          description: The request has succeeded.
-          content:
-            application/json:
-              schema:
-                $ref: '#/components/schemas/EmbeddingsResult'
-        default:
-          description: An unexpected error response.
-          headers:
-            x-ms-error-code:
-              required: false
-              description: String error code indicating what went wrong.
-              schema:
-                type: string
-          content:
-            application/json:
-              schema:
-                $ref: '#/components/schemas/Azure.Core.Foundations.ErrorResponse'
-      requestBody:
-        required: true
-        content:
-          application/json:
-            schema:
-              $ref: '#/components/schemas/ImageEmbeddingsOptions'
-        description: The parameters of the image embeddings request.
-  /images/generations:
-    post:
-      operationId: getImageGenerations
-      description: |-
-        Generates an image based on a text or image prompt.
-        The method makes a REST API call to the `/images/generations` route on the given endpoint.
-      parameters:
-        - $ref: '#/components/parameters/Azure.Core.Foundations.ApiVersionParameter'
-        - name: extra-parameters
-          in: header
-          required: false
-          description: |-
-            Controls what happens if extra parameters, undefined by the REST API,
-            are passed in the JSON request payload.
-            This sets the HTTP request header `extra-parameters`.
-          schema:
-            $ref: '#/components/schemas/ExtraParameters'
-      responses:
-        '200':
-          description: The request has succeeded.
-          content:
-            application/json:
-              schema:
-                $ref: '#/components/schemas/ImageGenerationsResult'
-        default:
-          description: An unexpected error response.
-          headers:
-            x-ms-error-code:
-              required: false
-              description: String error code indicating what went wrong.
-              schema:
-                type: string
-          content:
-            application/json:
-              schema:
-                $ref: '#/components/schemas/Azure.Core.Foundations.ErrorResponse'
-      requestBody:
-        required: true
-        content:
-          application/json:
-            schema:
-              $ref: '#/components/schemas/ImageGenerationsOptions'
-  /info:
-    get:
-      operationId: getModelInfo
-      description: |-
-        Returns information about the AI model deployed.
-        The method makes a REST API call to the `/info` route on the given endpoint.
-        This method will only work when using Serverless API, Managed Compute, or Model .
-        inference endpoint. Azure OpenAI endpoints don't support i.
-      parameters:
-        - $ref: '#/components/parameters/Azure.Core.Foundations.ApiVersionParameter'
-        - name: model
-          in: query
-          required: false
-          description: The model deployment name you want information from.
-          schema:
-            type: string
-          explode: false
-      responses:
-        '200':
-          description: The request has succeeded.
-          content:
-            application/json:
-              schema:
-                $ref: '#/components/schemas/ModelInfo'
-        default:
-          description: An unexpected error response.
-          headers:
-            x-ms-error-code:
-              required: false
-              description: String error code indicating what went wrong.
-              schema:
-                type: string
-          content:
-            application/json:
-              schema:
-                $ref: '#/components/schemas/Azure.Core.Foundations.ErrorResponse'
-security:
-  - ApiKeyAuth: []
-  - BearerAuth: []
-  - OAuth2Auth:
-      - https://cognitiveservices.azure.com/.default
-components:
-  parameters:
-    Azure.Core.Foundations.ApiVersionParameter:
-      name: api-version
-      in: query
-      required: true
-      description: The API version to use for this operation.
-      schema:
-        type: string
-        minLength: 1
-      explode: false
-  schemas:
-    AudioContentFormat:
-      anyOf:
-        - type: string
-        - type: string
-          enum:
-            - wav
-            - mp3
-      description: A representation of the possible audio formats for audio.
-    Azure.Core.Foundations.Error:
-      type: object
-      required:
-        - code
-        - message
-      properties:
-        code:
-          type: string
-          description: One of a server-defined set of error codes.
-        message:
-          type: string
-          description: A human-readable representation of the error.
-        target:
-          type: string
-          description: The target of the error.
-        details:
-          type: array
-          items:
-            $ref: '#/components/schemas/Azure.Core.Foundations.Error'
-          description: An array of details about specific errors that led to this reported error.
-        innererror:
-          allOf:
-            - $ref: '#/components/schemas/Azure.Core.Foundations.InnerError'
-          description: An object containing more specific information than the current object about the error.
-      description: The error object.
-    Azure.Core.Foundations.ErrorResponse:
-      type: object
-      required:
-        - error
-      properties:
-        error:
-          allOf:
-            - $ref: '#/components/schemas/Azure.Core.Foundations.Error'
-          description: The error object.
-      description: A response containing error details.
-    Azure.Core.Foundations.InnerError:
-      type: object
-      properties:
-        code:
-          type: string
-          description: One of a server-defined set of error codes.
-        innererror:
-          allOf:
-            - $ref: '#/components/schemas/Azure.Core.Foundations.InnerError'
-          description: Inner error.
-      description: An object containing more specific information about the error. As per Microsoft One API guidelines - https://github.com/microsoft/api-guidelines/blob/vNext/azure/Guidelines.md#handling-errors.
-    ChatChoice:
-      type: object
-      required:
-        - index
-        - finish_reason
-        - message
-      properties:
-        index:
-          type: integer
-          format: int32
-          description: The ordered index associated with this chat completions choice.
-        finish_reason:
-          allOf:
-            - $ref: '#/components/schemas/CompletionsFinishReason'
-          nullable: true
-          description: The reason that this chat completions choice completed its generated.
-          readOnly: true
-        message:
-          allOf:
-            - $ref: '#/components/schemas/ChatResponseMessage'
-          description: The chat message for a given chat completions prompt.
-          readOnly: true
-      description: |-
-        The representation of a single prompt completion as part of an overall chat completions request.
-        Generally, `n` choices are generated per provided prompt with a default value of 1.
-        Token limits and other settings may limit the number of choices generated.
-    ChatCompletions:
-      type: object
-      required:
-        - id
-        - object
-        - created
-        - model
-        - choices
-        - usage
-      properties:
-        id:
-          type: string
-          description: A unique identifier associated with this chat completions response.
-        object:
-          type: string
-          enum:
-            - chat.completion
-          description: The response object type, which is always `chat.completion`.
-        created:
-          type: integer
-          format: unixtime
-          description: |-
-            The first timestamp associated with generation activity for this completions response,
-            represented as seconds since the beginning of the Unix epoch of 00:00 on 1 Jan 1970.
-          readOnly: true
-        model:
-          type: string
-          description: The model used for the chat completion.
-          readOnly: true
-        choices:
-          type: array
-          items:
-            $ref: '#/components/schemas/ChatChoice'
-          minItems: 1
-          description: |-
-            The collection of completions choices associated with this completions response.
-            Generally, `n` choices are generated per provided prompt with a default value of 1.
-            Token limits and other settings may limit the number of choices generated.
-          readOnly: true
-        usage:
-          allOf:
-            - $ref: '#/components/schemas/CompletionsUsage'
-          description: '  Usage information for tokens processed and generated as part of this completions operation.'
-          readOnly: true
-      description: |-
-        Representation of the response data from a chat completions request.
-        Completions support a wide variety of tasks and generate text that continues from or "completes"
-        provided prompt data.
-    ChatCompletionsAudio:
-      type: object
-      required:
-        - id
-        - expires_at
-        - data
-        - transcript
-      properties:
-        id:
-          type: string
-          description: |2-
-              Unique identifier for the audio response. This value can be used in chat history messages instead of passing 
-              the full audio object.
-          readOnly: true
-        expires_at:
-          type: integer
-          format: unixtime
-          description: |-
-            The Unix timestamp (in seconds) at which the audio piece expires and can't be any longer referenced by its ID in 
-            multi-turn conversations.
-          readOnly: true
-        data:
-          type: string
-          description: Base64 encoded audio data
-          readOnly: true
-        format:
-          allOf:
-            - $ref: '#/components/schemas/AudioContentFormat'
-          description: |-
-            The format of the audio content. If format is not provided, it will match the format used in the
-            input audio request.
-          readOnly: true
-        transcript:
-          type: string
-          description: The transcript of the audio file.
-          readOnly: true
-      description: A representation of the audio generated by the model.
-    ChatCompletionsModality:
-      anyOf:
-        - type: string
-        - type: string
-          enum:
-            - text
-            - audio
-      description: The modalities that the model is allowed to use for the chat completions response.
-    ChatCompletionsNamedToolChoice:
-      type: object
-      required:
-        - type
-        - function
-      properties:
-        type:
-          type: string
-          enum:
-            - function
-          description: The type of the tool. Currently, only `function` is supported.
-        function:
-          allOf:
-            - $ref: '#/components/schemas/ChatCompletionsNamedToolChoiceFunction'
-          description: The function that should be called.
-      description: A tool selection of a specific, named function tool that will limit chat completions to using the named function.
-    ChatCompletionsNamedToolChoiceFunction:
-      type: object
-      required:
-        - name
-      properties:
-        name:
-          type: string
-          description: The name of the function that should be called.
-      description: A tool selection of a specific, named function tool that will limit chat completions to using the named function.
-    ChatCompletionsOptions:
-      type: object
-      required:
-        - messages
-      properties:
-        messages:
-          type: array
-          items:
-            $ref: '#/components/schemas/ChatRequestMessage'
-          minItems: 1
-          description: |-
-            The collection of context messages associated with this chat completions request.
-            Typical usage begins with a chat message for the System role that provides instructions for
-            the behavior of the assistant, followed by alternating messages between the User and
-            Assistant roles.
-        frequency_penalty:
-          type: number
-          format: float
-          minimum: -2
-          maximum: 2
-          description: |-
-            A value that influences the probability of generated tokens appearing based on their cumulative
-            frequency in generated text.
-            Positive values will make tokens less likely to appear as their frequency increases and
-            decrease the likelihood of the model repeating the same statements verbatim.
-            Supported range is [-2, 2].
-          default: 0
-        stream:
-          type: boolean
-          description: A value indicating whether chat completions should be streamed for this request.
-        presence_penalty:
-          type: number
-          format: float
-          minimum: -2
-          maximum: 2
-          description: |-
-            A value that influences the probability of generated tokens appearing based on their existing
-            presence in generated text.
-            Positive values will make tokens less likely to appear when they already exist and increase the
-            model's likelihood to output new topics.
-            Supported range is [-2, 2].
-          default: 0
-        temperature:
-          type: number
-          format: float
-          minimum: 0
-          maximum: 1
-          description: |-
-            The sampling temperature to use that controls the apparent creativity of generated completions.
-            Higher values will make output more random while lower values will make results more focused
-            and deterministic.
-            It is not recommended to modify temperature and top_p for the same completions request as the
-            interaction of these two settings is difficult to predict.
-            Supported range is [0, 1].
-          default: 0.7
-        top_p:
-          type: number
-          format: float
-          minimum: 0
-          maximum: 1
-          description: |-
-            An alternative to sampling with temperature called nucleus sampling. This value causes the
-            model to consider the results of tokens with the provided probability mass. As an example, a
-            value of 0.15 will cause only the tokens comprising the top 15% of probability mass to be
-            considered.
-            It is not recommended to modify temperature and top_p for the same completions request as the
-            interaction of these two settings is difficult to predict.
-            Supported range is [0, 1].
-          default: 1
-        max_tokens:
-          type: integer
-          format: int32
-          minimum: 0
-          description: The maximum number of tokens to generate.
-        response_format:
-          allOf:
-            - $ref: '#/components/schemas/ChatCompletionsResponseFormat'
-          description: |-
-            An object specifying the format that the model must output.
-
-            Setting to `{ "type": "json_schema", "json_schema": {...} }` enables Structured Outputs which ensures the model will match your supplied JSON schema.
-
-            Setting to `{ "type": "json_object" }` enables JSON mode, which ensures the message the model generates is valid JSON.
-
-            **Important:** when using JSON mode, you **must** also instruct the model to produce JSON yourself via a system or user message. Without this, the model may generate an unending stream of whitespace until the generation reaches the token limit, resulting in a long-running and seemingly "stuck" request. Also note that the message content may be partially cut off if `finish_reason="length"`, which indicates the generation exceeded `max_tokens` or the conversation exceeded the max context length.
-        stop:
-          type: array
-          items:
-            type: string
-          minItems: 1
-          description: A collection of textual sequences that will end completions generation.
-        tools:
-          type: array
-          items:
-            $ref: '#/components/schemas/ChatCompletionsToolDefinition'
-          minItems: 1
-          description: |-
-            A list of tools the model may request to call. Currently, only functions are supported as a tool. The model
-            may response with a function call request and provide the input arguments in JSON format for that function.
-        tool_choice:
-          anyOf:
-            - $ref: '#/components/schemas/ChatCompletionsToolChoicePreset'
-            - $ref: '#/components/schemas/ChatCompletionsNamedToolChoice'
-          description: If specified, the model will configure which of the provided tools it can use for the chat completions response.
-        seed:
-          type: integer
-          format: int64
-          description: |-
-            If specified, the system will make a best effort to sample deterministically such that repeated requests with the
-            same seed and parameters should return the same result. Determinism is not guaranteed.
-        model:
-          type: string
-          description: ID of the specific AI model to use, if more than one model is available on the endpoint.
-        modalities:
-          type: array
-          items:
-            $ref: '#/components/schemas/ChatCompletionsModality'
-          description: |-
-            The modalities that the model is allowed to use for the chat completions response. The default modality
-            is `text`. Indicating an unsupported modality combination results in an 422 error.
-        user_security_context:
-          allOf:
-            - $ref: '#/components/schemas/UserSecurityContext'
-          description: |-
-            User security context contains several parameters that describe the AI application itself, and the end user that interacts with the AI application. 
-            These fields assist your security operations teams to investigate and mitigate security incidents by providing a comprehensive approach to protecting your AI applications. 
-            [Learn more](https://aka.ms/TP4AI/Documentation/EndUserContext) about protecting AI applications using Microsoft Defender for Cloud.
-      additionalProperties: {}
-      description: |-
-        The configuration information for a chat completions request.
-        Completions support a wide variety of tasks and generate text that continues from or "completes"
-        provided prompt data.
-    ChatCompletionsResponseFormat:
-      type: object
-      required:
-        - type
-      properties:
-        type:
-          type: string
-          description: The response format type to use for chat completions.
-      discriminator:
-        propertyName: type
-        mapping:
-          text: '#/components/schemas/ChatCompletionsResponseFormatText'
-          json_object: '#/components/schemas/ChatCompletionsResponseFormatJsonObject'
-          json_schema: '#/components/schemas/ChatCompletionsResponseFormatJsonSchema'
-      description: |-
-        Represents the format that the model must output. Use this to enable JSON mode instead of the default text mode.
-        Note that to enable JSON mode, some AI models may also require you to instruct the model to produce JSON
-        via a system or user message.
-    ChatCompletionsResponseFormatJsonObject:
-      type: object
-      required:
-        - type
-      properties:
-        type:
-          type: string
-          enum:
-            - json_object
-          description: "Response format type: always 'json_object' for this object."
-      allOf:
-        - $ref: '#/components/schemas/ChatCompletionsResponseFormat'
-      description: |-
-        A response format for Chat Completions that restricts responses to emitting valid JSON objects.
-        Note that to enable JSON mode, some AI models may also require you to instruct the model to produce JSON
-        via a system or user message.
-    ChatCompletionsResponseFormatJsonSchema:
-      type: object
-      required:
-        - type
-        - json_schema
-      properties:
-        type:
-          type: string
-          enum:
-            - json_schema
-          description: 'The type of response format being defined: `json_schema`'
-        json_schema:
-          allOf:
-            - $ref: '#/components/schemas/ChatCompletionsResponseFormatJsonSchemaDefinition'
-          description: The definition of the required JSON schema in the response, and associated metadata.
-      allOf:
-        - $ref: '#/components/schemas/ChatCompletionsResponseFormat'
-      description: |-
-        A response format for Chat Completions that restricts responses to emitting valid JSON objects, with a
-        JSON schema specified by the caller.
-    ChatCompletionsResponseFormatJsonSchemaDefinition:
-      type: object
-      required:
-        - name
-        - schema
-      properties:
-        name:
-          type: string
-          description: The name of the response format. Must be a-z, A-Z, 0-9, or contain underscores and dashes, with a maximum length of 64.
-        schema:
-          type: object
-          additionalProperties: {}
-          description: The definition of the JSON schema
-        description:
-          type: string
-          description: A description of the response format, used by the AI model to determine how to generate responses in this format.
-        strict:
-          type: boolean
-          description: |-
-            Whether to enable strict schema adherence when generating the output.
-            If set to true, the model will always follow the exact schema defined in the `schema` field. Only a subset of
-            JSON Schema is supported when `strict` is `true`.
-          default: false
-      description: The definition of the required JSON schema in the response, and associated metadata.
-    ChatCompletionsResponseFormatText:
-      type: object
-      required:
-        - type
-      properties:
-        type:
-          type: string
-          enum:
-            - text
-          description: "Response format type: always 'text' for this object."
-      allOf:
-        - $ref: '#/components/schemas/ChatCompletionsResponseFormat'
-      description: A response format for Chat Completions that emits text responses. This is the default response format.
-    ChatCompletionsToolCall:
-      type: object
-      required:
-        - id
-        - type
-        - function
-      properties:
-        id:
-          type: string
-          description: The ID of the tool call.
-        type:
-          type: string
-          enum:
-            - function
-          description: The type of tool call. Currently, only `function` is supported.
-        function:
-          allOf:
-            - $ref: '#/components/schemas/FunctionCall'
-          description: The details of the function call requested by the AI model.
-      description: A function tool call requested by the AI model.
-    ChatCompletionsToolChoicePreset:
-      anyOf:
-        - type: string
-        - type: string
-          enum:
-            - auto
-            - none
-            - required
-      description: Represents a generic policy for how a chat completions tool may be selected.
-    ChatCompletionsToolDefinition:
-      type: object
-      required:
-        - type
-        - function
-      properties:
-        type:
-          type: string
-          enum:
-            - function
-          description: The type of the tool. Currently, only `function` is supported.
-        function:
-          allOf:
-            - $ref: '#/components/schemas/FunctionDefinition'
-          description: The function definition details for the function tool.
-      description: The definition of a chat completions tool that can call a function.
-    ChatMessageAudioContentItem:
-      type: object
-      required:
-        - type
-        - audio_url
-      properties:
-        type:
-          type: string
-          enum:
-            - audio_url
-          description: "The discriminated object type: always 'image_url' for this type."
-        audio_url:
-          allOf:
-            - $ref: '#/components/schemas/ChatMessageAudioUrl'
-          description: An internet location, which must be accessible to the model, from which the audio may be retrieved.
-      allOf:
-        - $ref: '#/components/schemas/ChatMessageContentItem'
-      description: A structured chat content item containing an audio reference.
-    ChatMessageAudioUrl:
-      type: object
-      required:
-        - url
-      properties:
-        url:
-          type: string
-          description: The URL of the audio.
-      description: An internet location from which the model may retrieve an audio.
-    ChatMessageContentItem:
-      type: object
-      required:
-        - type
-      properties:
-        type:
-          type: string
-          description: The discriminated object type.
-      discriminator:
-        propertyName: type
-        mapping:
-          text: '#/components/schemas/ChatMessageTextContentItem'
-          image_url: '#/components/schemas/ChatMessageImageContentItem'
-          audio_url: '#/components/schemas/ChatMessageAudioContentItem'
-          input_audio: '#/components/schemas/ChatMessageInputAudioContentItem'
-      description: An abstract representation of a structured content item within a chat message.
-    ChatMessageImageContentItem:
-      type: object
-      required:
-        - type
-        - image_url
-      properties:
-        type:
-          type: string
-          enum:
-            - image_url
-          description: "The discriminated object type: always 'image_url' for this type."
-        image_url:
-          allOf:
-            - $ref: '#/components/schemas/ChatMessageImageUrl'
-          description: An internet location, which must be accessible to the model,from which the image may be retrieved.
-      allOf:
-        - $ref: '#/components/schemas/ChatMessageContentItem'
-      description: A structured chat content item containing an image reference.
-    ChatMessageImageDetailLevel:
-      anyOf:
-        - type: string
-        - type: string
-          enum:
-            - auto
-            - low
-            - high
-      description: A representation of the possible image detail levels for image-based chat completions message content.
-    ChatMessageImageUrl:
-      type: object
-      required:
-        - url
-      properties:
-        url:
-          type: string
-          description: The URL of the image.
-        detail:
-          allOf:
-            - $ref: '#/components/schemas/ChatMessageImageDetailLevel'
-          description: |-
-            The evaluation quality setting to use, which controls relative prioritization of speed, token consumption, and
-            accuracy.
-      description: An internet location from which the model may retrieve an image.
-    ChatMessageInputAudio:
-      type: object
-      required:
-        - data
-        - format
-      properties:
-        data:
-          type: string
-          description: Base64 encoded audio data
-        format:
-          allOf:
-            - $ref: '#/components/schemas/AudioContentFormat'
-          description: The audio format of the audio content.
-      description: The details of an audio chat message content part.
-    ChatMessageInputAudioContentItem:
-      type: object
-      required:
-        - type
-        - format
-      properties:
-        type:
-          type: string
-          enum:
-            - input_audio
-          description: "The discriminated object type: always 'input_audio' for this type."
-        format:
-          allOf:
-            - $ref: '#/components/schemas/AudioContentFormat'
-          description: The audio format of the audio reference.
-      allOf:
-        - $ref: '#/components/schemas/ChatMessageContentItem'
-      description: A structured chat content item containing an audio content.
-    ChatMessageTextContentItem:
-      type: object
-      required:
-        - type
-        - text
-      properties:
-        type:
-          type: string
-          enum:
-            - text
-          description: "The discriminated object type: always 'text' for this type."
-        text:
-          type: string
-          description: The content of the message.
-      allOf:
-        - $ref: '#/components/schemas/ChatMessageContentItem'
-      description: A structured chat content item containing plain text.
-    ChatRequestAssistantMessage:
-      type: object
-      required:
-        - role
-      properties:
-        role:
-          type: string
-          enum:
-            - assistant
-          description: The chat role associated with this message, which is always 'assistant' for assistant messages.
-        content:
-          type: string
-          description: The content of the message.
-        tool_calls:
-          type: array
-          items:
-            $ref: '#/components/schemas/ChatCompletionsToolCall'
-          description: |-
-            The tool calls that must be resolved and have their outputs appended to subsequent input messages for the chat
-            completions request to resolve as configured.
-        audio:
-          allOf:
-            - $ref: '#/components/schemas/ChatRequestAudioReference'
-          description: '  The audio generated by a previous response in a multi-turn conversation.'
-      allOf:
-        - $ref: '#/components/schemas/ChatRequestMessage'
-      description: A request chat message representing response or action from the assistant.
-    ChatRequestAudioReference:
-      type: object
-      required:
-        - id
-      properties:
-        id:
-          type: string
-          description: '  Unique identifier for the audio response. This value corresponds to the id of a previous audio completion.'
-      description: A reference to an audio response generated by the model.
-    ChatRequestMessage:
-      type: object
-      required:
-        - role
-      properties:
-        role:
-          allOf:
-            - $ref: '#/components/schemas/ChatRole'
-          description: The chat role associated with this message.
-      discriminator:
-        propertyName: role
-        mapping:
-          system: '#/components/schemas/ChatRequestSystemMessage'
-          user: '#/components/schemas/ChatRequestUserMessage'
-          assistant: '#/components/schemas/ChatRequestAssistantMessage'
-          tool: '#/components/schemas/ChatRequestToolMessage'
-      description: An abstract representation of a chat message as provided in a request.
-    ChatRequestSystemMessage:
-      type: object
-      required:
-        - role
-        - content
-      properties:
-        role:
-          type: string
-          enum:
-            - system
-          description: The chat role associated with this message, which is always 'system' for system messages.
-        content:
-          type: string
-          description: The contents of the system message.
-      allOf:
-        - $ref: '#/components/schemas/ChatRequestMessage'
-      description: |-
-        A request chat message containing system instructions that influence how the model will generate a chat completions
-        response.
-    ChatRequestToolMessage:
-      type: object
-      required:
-        - role
-        - tool_call_id
-      properties:
-        role:
-          type: string
-          enum:
-            - tool
-          description: The chat role associated with this message, which is always 'tool' for tool messages.
-        content:
-          type: string
-          description: The content of the message.
-        tool_call_id:
-          type: string
-          description: The ID of the tool call resolved by the provided content.
-      allOf:
-        - $ref: '#/components/schemas/ChatRequestMessage'
-      description: A request chat message representing requested output from a configured tool.
-    ChatRequestUserMessage:
-      type: object
-      required:
-        - role
-        - content
-      properties:
-        role:
-          type: string
-          enum:
-            - user
-          description: The chat role associated with this message, which is always 'user' for user messages.
-        content:
-          anyOf:
-            - type: string
-            - type: array
-              items:
-                $ref: '#/components/schemas/ChatMessageContentItem'
-          description: The contents of the user message, with available input types varying by selected model.
-      allOf:
-        - $ref: '#/components/schemas/ChatRequestMessage'
-      description: A request chat message representing user input to the assistant.
-    ChatResponseMessage:
-      type: object
-      required:
-        - role
-        - content
-      properties:
-        role:
-          allOf:
-            - $ref: '#/components/schemas/ChatRole'
-          description: The chat role associated with the message.
-          readOnly: true
-        content:
-          type: string
-          nullable: true
-          description: The content of the message.
-          readOnly: true
-        reasoning_content:
-          type: string
-          description: The reasoning content the model used for generating the response
-          readOnly: true
-        tool_calls:
-          type: array
-          items:
-            $ref: '#/components/schemas/ChatCompletionsToolCall'
-          description: |-
-            The tool calls that must be resolved and have their outputs appended to subsequent input messages for the chat
-            completions request to resolve as configured.
-          readOnly: true
-        audio:
-          allOf:
-            - $ref: '#/components/schemas/ChatCompletionsAudio'
-          description: '  The audio generated by the model as a response to the messages if the model is configured to generate audio.'
-          readOnly: true
-      description: A representation of a chat message as received in a response.
-    ChatRole:
-      anyOf:
-        - type: string
-        - type: string
-          enum:
-            - system
-            - developer
-            - user
-            - assistant
-            - tool
-      description: A description of the intended purpose of a message within a chat completions interaction.
-    CompletionsFinishReason:
-      anyOf:
-        - type: string
-        - type: string
-          enum:
-            - stop
-            - length
-            - content_filter
-            - tool_calls
-      description: Representation of the manner in which a completions response concluded.
-    CompletionsUsage:
-      type: object
-      required:
-        - completion_tokens
-        - prompt_tokens
-        - total_tokens
-      properties:
-        completion_tokens:
-          type: integer
-          format: int32
-          description: The number of tokens generated across all completions emissions.
-          readOnly: true
-        prompt_tokens:
-          type: integer
-          format: int32
-          description: The number of tokens in the provided prompts for the completions request.
-          readOnly: true
-        total_tokens:
-          type: integer
-          format: int32
-          description: The total number of tokens processed for the completions request and response.
-          readOnly: true
-        completion_tokens_details:
-          allOf:
-            - $ref: '#/components/schemas/CompletionsUsageDetails'
-          description: Breakdown of tokens used in a completion.
-          readOnly: true
-        prompt_tokens_details:
-          allOf:
-            - $ref: '#/components/schemas/PromptUsageDetails'
-          description: Breakdown of tokens used in the prompt/chat history.
-          readOnly: true
-      description: |-
-        Representation of the token counts processed for a completions request.
-        Counts consider all tokens across prompts, choices, choice alternates, best_of generations, and
-        other consumers.
-    CompletionsUsageDetails:
-      type: object
-      required:
-        - audio_tokens
-        - reasoning_tokens
-        - total_tokens
-      properties:
-        audio_tokens:
-          type: integer
-          format: int32
-          description: The number of tokens corresponding to audio input.
-          readOnly: true
-        reasoning_tokens:
-          type: integer
-          format: int32
-          description: The number of tokens corresponding to reasoning.
-          readOnly: true
-        total_tokens:
-          type: integer
-          format: int32
-          description: The total number of tokens processed for the completions request and response.
-          readOnly: true
-      description: A breakdown of tokens used in a completion.
-    EmbeddingEncodingFormat:
-      anyOf:
-        - type: string
-        - type: string
-          enum:
-            - base64
-            - binary
-            - float
-            - int8
-            - ubinary
-            - uint8
-      description: |-
-        Specifies the types of embeddings to generate. Compressed embeddings types like `uint8`, `int8`, `ubinary` and 
-        `binary`, may reduce storage costs without sacrificing the integrity of the data. Returns a 422 error if the
-        model doesn't support the value or parameter. Read the model's documentation to know the values supported by
-        the your model.
-    EmbeddingInputType:
-      anyOf:
-        - type: string
-        - type: string
-          enum:
-            - text
-            - query
-            - document
-      description: Represents the input types used for embedding search.
-    EmbeddingItem:
-      type: object
-      required:
-        - embedding
-        - index
-        - object
-      properties:
-        embedding:
-          type: array
-          items:
-            type: number
-            format: float
-          description: |-
-            List of embedding values for the input prompt. These represent a measurement of the
-            vector-based relatedness of the provided input. Or a base64 encoded string of the embedding vector.
-          readOnly: true
-        index:
-          type: integer
-          format: int32
-          description: Index of the prompt to which the EmbeddingItem corresponds.
-          readOnly: true
-        object:
-          type: string
-          enum:
-            - embedding
-          description: The object type of this embeddings item. Will always be `embedding`.
-      description: Representation of a single embeddings relatedness comparison.
-    EmbeddingsOptions:
-      type: object
-      required:
-        - input
-      properties:
-        input:
-          type: array
-          items:
-            type: string
-          description: |-
-            Input text to embed, encoded as a string or array of tokens.
-            To embed multiple inputs in a single request, pass an array
-            of strings or array of token arrays.
-        dimensions:
-          type: integer
-          format: int32
-          description: |-
-            Optional. The number of dimensions the resulting output embeddings should have.
-            Passing null causes the model to use its default value.
-            Returns a 422 error if the model doesn't support the value or parameter.
-        encoding_format:
-          allOf:
-            - $ref: '#/components/schemas/EmbeddingEncodingFormat'
-          description: Optional. The desired format for the returned embeddings.
-        input_type:
-          allOf:
-            - $ref: '#/components/schemas/EmbeddingInputType'
-          description: |-
-            Optional. The type of the input.
-            Returns a 422 error if the model doesn't support the value or parameter.
-        model:
-          type: string
-          description: ID of the specific AI model to use, if more than one model is available on the endpoint.
-      additionalProperties: {}
-      description: The configuration information for an embeddings request.
-    EmbeddingsResult:
-      type: object
-      required:
-        - id
-        - data
-        - usage
-        - object
-        - model
-      properties:
-        id:
-          type: string
-          description: Unique identifier for the embeddings result.
-          readOnly: true
-        data:
-          type: array
-          items:
-            $ref: '#/components/schemas/EmbeddingItem'
-          description: Embedding values for the prompts submitted in the request.
-          readOnly: true
-        usage:
-          allOf:
-            - $ref: '#/components/schemas/EmbeddingsUsage'
-          description: Usage counts for tokens input using the embeddings API.
-          readOnly: true
-        object:
-          type: string
-          enum:
-            - list
-          description: The object type of the embeddings result. Will always be `list`.
-        model:
-          type: string
-          description: The model ID used to generate this result.
-          readOnly: true
-      description: |-
-        Representation of the response data from an embeddings request.
-        Embeddings measure the relatedness of text strings and are commonly used for search, clustering,
-        recommendations, and other similar scenarios.
-    EmbeddingsUsage:
-      type: object
-      required:
-        - prompt_tokens
-        - total_tokens
-      properties:
-        prompt_tokens:
-          type: integer
-          format: int32
-          description: Number of tokens in the request.
-          readOnly: true
-        total_tokens:
-          type: integer
-          format: int32
-          description: |-
-            Total number of tokens transacted in this request/response. Should equal the
-            number of tokens in the request.
-          readOnly: true
-      description: Measurement of the amount of tokens used in this request and response.
-    ExtraParameters:
-      anyOf:
-        - type: string
-        - type: string
-          enum:
-            - error
-            - drop
-            - pass-through
-      description: Controls what happens if extra parameters, undefined by the REST API, are passed in the JSON request payload.
-    FunctionCall:
-      type: object
-      required:
-        - name
-        - arguments
-      properties:
-        name:
-          type: string
-          description: The name of the function to call.
-          readOnly: true
-        arguments:
-          type: string
-          description: |-
-            The arguments to call the function with, as generated by the model in JSON format.
-            Note that the model does not always generate valid JSON, and may hallucinate parameters
-            not defined by your function schema. Validate the arguments in your code before calling
-            your function.
-          readOnly: true
-      description: The name and arguments of a function that should be called, as generated by the model.
-    FunctionDefinition:
-      type: object
-      required:
-        - name
-      properties:
-        name:
-          type: string
-          description: The name of the function to be called.
-        description:
-          type: string
-          description: |-
-            A description of what the function does. The model will use this description when selecting the function and
-            interpreting its parameters.
-        parameters:
-          type: object
-          additionalProperties: {}
-          description: The parameters the function accepts, described as a JSON Schema object.
-      description: The definition of a caller-specified function that chat completions may invoke in response to matching user input.
-    ImageEmbeddingInput:
-      type: object
-      required:
-        - image
-      properties:
-        image:
-          type: string
-          description: 'The input image encoded in base64 string as a data URL. Example: `data:image/{format};base64,{data}`.'
-        text:
-          type: string
-          description: |-
-            Optional. The text input to feed into the model (like DINO, CLIP).
-            Returns a 422 error if the model doesn't support the value or parameter.
-      description: Represents an image with optional text.
-    ImageEmbeddingsOptions:
-      type: object
-      required:
-        - input
-      properties:
-        input:
-          type: array
-          items:
-            $ref: '#/components/schemas/ImageEmbeddingInput'
-          description: |-
-            Input image to embed. To embed multiple inputs in a single request, pass an array.
-            The input must not exceed the max input tokens for the model.
-        dimensions:
-          type: integer
-          format: int32
-          description: |-
-            Optional. The number of dimensions the resulting output embeddings should have.
-            Passing null causes the model to use its default value.
-            Returns a 422 error if the model doesn't support the value or parameter.
-        encoding_format:
-          allOf:
-            - $ref: '#/components/schemas/EmbeddingEncodingFormat'
-          description: |-
-            Optional. The number of dimensions the resulting output embeddings should have.
-            Passing null causes the model to use its default value.
-            Returns a 422 error if the model doesn't support the value or parameter.
-        input_type:
-          allOf:
-            - $ref: '#/components/schemas/EmbeddingInputType'
-          description: |-
-            Optional. The type of the input.
-            Returns a 422 error if the model doesn't support the value or parameter.
-        model:
-          type: string
-          description: ID of the specific AI model to use, if more than one model is available on the endpoint.
-      additionalProperties: {}
-      description: The configuration information for an image embeddings request.
-    ImageGenerationItem:
-      type: object
-      required:
-        - image
-      properties:
-        image:
-          type: string
-          description: The image generated, encoded in base64.
-        seed:
-          type: integer
-          format: int64
-          description: The seed that can be used to generate the image.
-      description: Representation of a single image generation.
-    ImageGenerationOutputFormat:
-      anyOf:
-        - type: string
-        - type: string
-          enum:
-            - png
-            - jpg
-      description: The image generation format to use in the output.
-    ImageGenerationPreset:
-      anyOf:
-        - type: string
-        - type: string
-          enum:
-            - 672x1566
-            - 768x1366
-            - 836x1254
-            - 916x1145
-            - 1024x1024
-            - 1145x916
-            - 1254x836
-            - 1366x768
-            - 1566x672
-      description: The preset size of the image to generate.
-    ImageGenerationPrompt:
-      type: object
-      required:
-        - image
-      properties:
-        image:
-          type: string
-          description: |-
-            The input image encoded in base64 string as a data URL.
-            Example: `data:image/{format};base64,{data}`."
-        strength:
-          type: number
-          format: float
-          description: |-
-            The degree at which the generation process uses the image prompt as guidance. 1 indicates
-            a generation process that fully follows the input image. 0 a generation a process that doesn't
-            take into consideration the prompt image.
-          default: 1
-      description: Represents an image to use for guidance during the generation.
-    ImageGenerationQuality:
-      anyOf:
-        - type: string
-        - type: string
-          enum:
-            - hd
-            - standard
-      description: The quality of the image to generate.
-    ImageGenerationSize:
-      type: object
-      required:
-        - width
-        - height
-      properties:
-        width:
-          type: integer
-          format: int32
-          description: The width of the image to generate, in pixels.
-        height:
-          type: integer
-          format: int32
-          description: The height of the image to generate, in pixels.
-      description: The size of the image to generate.
-    ImageGenerationUsage:
-      type: object
-      required:
-        - image_generations
-      properties:
-        image_generations:
-          type: integer
-          format: int32
-          description: The number of image generations performed.
-      description: Usage counts for image generation API.
-    ImageGenerationsOptions:
-      type: object
-      required:
-        - prompt
-        - size
-        - 'n'
-      properties:
-        prompt:
-          type: string
-          description: |-
-            The prompt to use for the image geneartion. Read the model documentation to understand
-            which language you should use to prompt the model and get specific results.
-        negative_prompt:
-          type: string
-          description: |-
-            Optional. The prompt to use for the negative image generation. Read the model documentation
-            to understand to prompt the model and get specific results. If the model doesn't support
-            negative prompts, a 422 error is returned.
-        image_prompt:
-          allOf:
-            - $ref: '#/components/schemas/ImageGenerationPrompt'
-          description: Optional. An image to use as guidance for the image generation process.
-        size:
-          anyOf:
-            - $ref: '#/components/schemas/ImageGenerationPreset'
-            - $ref: '#/components/schemas/ImageGenerationSize'
-          description: The size of the image to generate.
-        output_format:
-          allOf:
-            - $ref: '#/components/schemas/ImageGenerationOutputFormat'
-          description: |-
-            Optional. The format in which to generate the image.
-            Returns a 422 error if the model doesn't support the value or parameter.
-          default: png
-        quality:
-          allOf:
-            - $ref: '#/components/schemas/ImageGenerationQuality'
-          description: |-
-            Optional. The quality of the image to generate.
-            Returns a 422 error if the model doesn't support the value or parameter.
-          default: standard
-        'n':
-          type: integer
-          format: int32
-          description: The number of images to generate. Defaults to 1.
-          default: 1
-        model:
-          type: string
-          description: ID of the specific AI model to use, if more than one model is available on the endpoint.
-        seed:
-          type: integer
-          format: int64
-          description: |-
-            If specified, the system will make a best effort to sample deterministically such that repeated requests with the
-            same seed and parameters should return the same result. Determinism is not guaranteed.
-      additionalProperties: {}
-      description: The configuration information for an image generation request.
-    ImageGenerationsResult:
-      type: object
-      required:
-        - id
-        - created
-        - model
-        - data
-        - usage
-        - object
-      properties:
-        id:
-          type: string
-          description: Unique identifier for the generation result.
-        created:
-          type: integer
-          format: unixtime
-          description: |-
-            The first timestamp associated with generation activity for this completions response,
-            represented as seconds since the beginning of the Unix epoch of 00:00 on 1 Jan 1970.
-        model:
-          type: string
-          description: The model used for the chat completion.
-        data:
-          type: array
-          items:
-            $ref: '#/components/schemas/ImageGenerationItem'
-          description: Embedding values for the prompts submitted in the request.
-        usage:
-          allOf:
-            - $ref: '#/components/schemas/ImageGenerationUsage'
-          description: Usage counts for tokens input using the embeddings API.
-        object:
-          type: string
-          enum:
-            - image.generation
-          description: The object type of the image generation result. Will always be `list`.
-      description: The result of an image generation request.
-    ModelInfo:
-      type: object
-      required:
-        - model_name
-        - model_type
-        - model_provider_name
-      properties:
-        model_name:
-          type: string
-          description: 'The name of the AI model. For example: `Phi21`'
-          readOnly: true
-        model_type:
-          allOf:
-            - $ref: '#/components/schemas/ModelType'
-          description: The type of the AI model. A Unique identifier for the profile.
-          readOnly: true
-        model_provider_name:
-          type: string
-          description: 'The model provider name. For example: `Microsoft`'
-          readOnly: true
-      description: Represents some basic information about the AI model.
-    ModelType:
-      anyOf:
-        - type: string
-        - type: string
-          enum:
-            - embeddings
-            - chat-completion
-      description: The type of AI model
-    PromptUsageDetails:
-      type: object
-      required:
-        - audio_tokens
-        - cached_tokens
-      properties:
-        audio_tokens:
-          type: integer
-          format: int32
-          description: The number of tokens corresponding to audio input.
-          readOnly: true
-        cached_tokens:
-          type: integer
-          format: int32
-          description: The total number of tokens cached.
-          readOnly: true
-      description: A breakdown of tokens used in the prompt/chat history.
-    StreamingChatChoiceUpdate:
-      type: object
-      required:
-        - index
-        - finish_reason
-        - delta
-      properties:
-        index:
-          type: integer
-          format: int32
-          description: The ordered index associated with this chat completions choice.
-        finish_reason:
-          allOf:
-            - $ref: '#/components/schemas/CompletionsFinishReason'
-          nullable: true
-          description: The reason that this chat completions choice completed its generated.
-          readOnly: true
-        delta:
-          allOf:
-            - $ref: '#/components/schemas/StreamingChatResponseMessageUpdate'
-          description: An update to the chat message for a given chat completions prompt.
-          readOnly: true
-      description: |-
-        Represents an update to a single prompt completion when the service is streaming updates 
-        using Server Sent Events (SSE).
-        Generally, `n` choices are generated per provided prompt with a default value of 1.
-        Token limits and other settings may limit the number of choices generated.
-    StreamingChatCompletionsUpdate:
-      type: object
-      required:
-        - id
-        - object
-        - created
-        - model
-        - choices
-      properties:
-        id:
-          type: string
-          description: A unique identifier associated with this chat completions response.
-        object:
-          type: string
-          enum:
-            - chat.completion
-          description: The response object type, which is always `chat.completion`.
-        created:
-          type: integer
-          format: unixtime
-          description: |-
-            The first timestamp associated with generation activity for this completions response,
-            represented as seconds since the beginning of the Unix epoch of 00:00 on 1 Jan 1970.
-          readOnly: true
-        model:
-          type: string
-          description: The model used for the chat completion.
-          readOnly: true
-        choices:
-          type: array
-          items:
-            $ref: '#/components/schemas/StreamingChatChoiceUpdate'
-          minItems: 1
-          description: |-
-            An update to the collection of completion choices associated with this completions response.
-            Generally, `n` choices are generated per provided prompt with a default value of 1.
-            Token limits and other settings may limit the number of choices generated.
-          readOnly: true
-        usage:
-          allOf:
-            - $ref: '#/components/schemas/CompletionsUsage'
-          description: Usage information for tokens processed and generated as part of this completions operation.
-          readOnly: true
-      description: |-
-        Represents a response update to a chat completions request, when the service is streaming updates 
-        using Server Sent Events (SSE).
-        Completions support a wide variety of tasks and generate text that continues from or "completes"
-        provided prompt data.
-    StreamingChatResponseMessageUpdate:
-      type: object
-      properties:
-        role:
-          allOf:
-            - $ref: '#/components/schemas/ChatRole'
-          description: The chat role associated with the message. If present, should always be 'assistant'
-          readOnly: true
-        content:
-          type: string
-          description: The content of the message.
-          readOnly: true
-        reasoning_content:
-          type: string
-          description: The reasoning content the model used for generating the response
-          readOnly: true
-        tool_calls:
-          type: array
-          items:
-            $ref: '#/components/schemas/StreamingChatResponseToolCallUpdate'
-          description: |-
-            The tool calls that must be resolved and have their outputs appended to subsequent input messages for the chat
-            completions request to resolve as configured.
-          readOnly: true
-      description: A representation of a chat message update as received in a streaming response.
-    StreamingChatResponseToolCallUpdate:
-      type: object
-      required:
-        - id
-        - function
-      properties:
-        id:
-          type: string
-          description: The ID of the tool call.
-          readOnly: true
-        function:
-          allOf:
-            - $ref: '#/components/schemas/FunctionCall'
-          description: Updates to the function call requested by the AI model.
-          readOnly: true
-      description: An update to the function tool call information requested by the AI model.
-    UserSecurityContext:
-      type: object
-      properties:
-        application_name:
-          type: string
-          maxLength: 100
-          description: The name of the application. Sensitive personal information should not be included in this field.
-        end_user_id:
-          type: string
-          minLength: 36
-          maxLength: 36
-          pattern: ^[0-9a-fA-F]{8}-[0-9a-fA-F]{4}-[0-9a-fA-F]{4}-[0-9a-fA-F]{4}-[0-9a-fA-F]{12}$
-          description: This identifier is the Microsoft Entra ID (formerly Azure Active Directory) user object ID used to authenticate end-users within the generative AI application. Sensitive personal information should not be included in this field.
-        end_user_tenant_id:
-          type: string
-          minLength: 36
-          maxLength: 36
-          pattern: ^[0-9a-fA-F]{8}-[0-9a-fA-F]{4}-[0-9a-fA-F]{4}-[0-9a-fA-F]{4}-[0-9a-fA-F]{12}$
-          description: The Microsoft 365 tenant ID the end user belongs to. It's required when the generative AI application is multi tenant.
-        source_ip:
-          type: string
-          minLength: 2
-          maxLength: 45
-          description: Captures the original client's IP address, accepting both IPv4 and IPv6 formats.
-      description: |-
-        User security context contains several parameters that describe the AI application itself, and the end user that interacts with the AI application. 
-        These fields assist your security operations teams to investigate and mitigate security incidents by providing a comprehensive approach to protecting your AI applications. 
-        [Learn more](https://aka.ms/TP4AI/Documentation/EndUserContext) about protecting AI applications using Microsoft Defender for Cloud.
-    Versions:
-      type: string
-      enum:
-        - 2024-05-01-preview
-        - '2025-04-01'
-        - '2025-05-01'
-        - 2025-05-15-preview
-      description: The AI.Model service versions.
-  securitySchemes:
-    ApiKeyAuth:
-      type: apiKey
-      in: header
-      name: api-key
-    BearerAuth:
-      type: http
-      scheme: Bearer
-    OAuth2Auth:
-      type: oauth2
-      flows:
-        implicit:
-          authorizationUrl: https://login.microsoftonline.com/common/oauth2/v2.0/authorize
-          scopes:
-            https://cognitiveservices.azure.com/.default: ''
-servers:
-  - url: https://{resource}.services.ai.azure.com/api/models
-    description: AI Model Inference
-    variables:
-      resource:
-        default: ''
-        description: The Azure AI Services resource name, for example 'my-resource'
diff --git a/specification/ai/data-plane/ModelInference/preview/2024-05-01-preview/openapi.json b/specification/ai/data-plane/ModelInference/preview/2024-05-01-preview/openapi.json
index 55168ef6d3b9..31bb74e0b009 100644
--- a/specification/ai/data-plane/ModelInference/preview/2024-05-01-preview/openapi.json
+++ b/specification/ai/data-plane/ModelInference/preview/2024-05-01-preview/openapi.json
@@ -13,7 +13,7 @@
     "https"
   ],
   "x-ms-parameterized-host": {
-    "hostTemplate": "https://{resource}.services.ai.azure.com/api/models",
+    "hostTemplate": "https://{resource}.services.ai.azure.com/models",
     "useSchemePrefix": false,
     "parameters": [
       {
diff --git a/specification/ai/data-plane/ModelInference/preview/2025-05-15-preview/examples/GetChatCompletions_AudioModality_Gen.json b/specification/ai/data-plane/ModelInference/preview/2025-05-15-preview/examples/GetChatCompletions_AudioModality_Gen.json
deleted file mode 100644
index 8ecb38ed11a7..000000000000
--- a/specification/ai/data-plane/ModelInference/preview/2025-05-15-preview/examples/GetChatCompletions_AudioModality_Gen.json
+++ /dev/null
@@ -1,99 +0,0 @@
-{
-  "title": "Chat completion with audio content",
-  "operationId": "GetChatCompletions",
-  "parameters": {
-    "api-version": "2025-05-15-preview",
-    "extra-parameters": "error",
-    "body": {
-      "modalities": [
-        "text",
-        "audio"
-      ],
-      "messages": [
-        {
-          "role": "system",
-          "content": "You are a helpful assistant"
-        },
-        {
-          "role": "user",
-          "content": [
-            {
-              "type": "input_audio",
-              "input_audio": {
-                "data": "<base64 encoded audio data>",
-                "format": "wav"
-              }
-            }
-          ]
-        },
-        {
-          "role": "assistant",
-          "content": null,
-          "audio": {
-            "id": "abcdef1234"
-          }
-        },
-        {
-          "role": "user",
-          "content": [
-            {
-              "type": "input_audio",
-              "input_audio": {
-                "data": "<base64 encoded audio data>",
-                "format": "wav"
-              }
-            }
-          ]
-        }
-      ],
-      "frequency_penalty": 0,
-      "presence_penalty": 0,
-      "temperature": 0,
-      "top_p": 0,
-      "seed": 21,
-      "model": "my-model-name"
-    }
-  },
-  "responses": {
-    "200": {
-      "body": {
-        "id": "kgousajxgzyhugvqekuswuqbk",
-        "object": "chat.completion",
-        "created": 1696522361,
-        "model": "my-model-name",
-        "usage": {
-          "completion_tokens": 19,
-          "prompt_tokens": 28,
-          "total_tokens": 16,
-          "completion_tokens_details": {
-            "audio_tokens": 5,
-            "reasoning_tokens": 0,
-            "total_tokens": 5
-          },
-          "prompt_tokens_details": {
-            "audio_tokens": 10,
-            "cached_tokens": 0
-          }
-        },
-        "choices": [
-          {
-            "index": 0,
-            "finish_reason": "stop",
-            "message": {
-              "role": "assistant",
-              "content": null,
-              "tool_calls": null,
-              "audio": {
-                "id": "abcdef1234",
-                "format": "wav",
-                "data": "<base64 encoded audio data>",
-                "expires_at": 1896522361,
-                "transcript": "This is a sample transcript"
-              }
-            }
-          }
-        ]
-      }
-    }
-  }
-}
diff --git a/specification/ai/data-plane/ModelInference/preview/2025-05-15-preview/examples/GetChatCompletions_MaximumSet_Gen.json b/specification/ai/data-plane/ModelInference/preview/2025-05-15-preview/examples/GetChatCompletions_MaximumSet_Gen.json
deleted file mode 100644
index 9f291b43f7e9..000000000000
--- a/specification/ai/data-plane/ModelInference/preview/2025-05-15-preview/examples/GetChatCompletions_MaximumSet_Gen.json
+++ /dev/null
@@ -1,95 +0,0 @@
-{
-  "title": "Chat completion with multiple parameters and chat history",
-  "operationId": "GetChatCompletions",
-  "parameters": {
-    "api-version": "2025-05-15-preview",
-    "extra-parameters": "error",
-    "body": {
-      "modalities": [
-        "text"
-      ],
-      "messages": [
-        {
-          "role": "system",
-          "content": "You are a helpful assistant"
-        },
-        {
-          "role": "user",
-          "content": "Explain Riemann's conjecture"
-        },
-        {
-          "role": "assistant",
-          "content": "The Riemann Conjecture is a deep mathematical conjecture around prime numbers and how they can be predicted. It was first published in Riemann's groundbreaking 1859 paper. The conjecture states that the Riemann zeta function has its zeros only at the negative even integers and complex numbers with real part 1/21. Many consider it to be the most important unsolved problem in pure mathematics. The Riemann hypothesis is a way to predict the probability that numbers in a certain range are prime that was also devised by German mathematician Bernhard Riemann in 18594."
-        },
-        {
-          "role": "user",
-          "content": "Ist it proved?"
-        }
-      ],
-      "frequency_penalty": 0,
-      "stream": true,
-      "presence_penalty": 0,
-      "temperature": 0,
-      "top_p": 0,
-      "max_tokens": 255,
-      "response_format": {
-        "type": "text"
-      },
-      "stop": [
-        "<|endoftext|>"
-      ],
-      "tools": [
-        {
-          "type": "function",
-          "function": {
-            "name": "my-function-name",
-            "description": "A function useful to know if a theroem is proved or not"
-          }
-        }
-      ],
-      "seed": 21,
-      "model": "my-model-name"
-    }
-  },
-  "responses": {
-    "200": {
-      "body": {
-        "id": "kgousajxgzyhugvqekuswuqbk",
-        "object": "chat.completion",
-        "created": 18,
-        "model": "my-model-name",
-        "usage": {
-          "completion_tokens": 19,
-          "prompt_tokens": 28,
-          "total_tokens": 16,
-          "completion_tokens_details": {
-            "audio_tokens": 5,
-            "reasoning_tokens": 0,
-            "total_tokens": 5
-          }
-        },
-        "choices": [
-          {
-            "index": 7,
-            "finish_reason": "stop",
-            "message": {
-              "role": "assistant",
-              "content": null,
-              "reasoning_content": null,
-              "tool_calls": [
-                {
-                  "id": "yrobmilsrugmbwukmzo",
-                  "type": "function",
-                  "function": {
-                    "name": "my-function-name",
-                    "arguments": "{ \"arg1\": \"value1\", \"arg2\": \"value2\" }"
-                  }
-                }
-              ]
-            }
-          }
-        ]
-      }
-    }
-  }
-}
diff --git a/specification/ai/data-plane/ModelInference/preview/2025-05-15-preview/examples/GetChatCompletions_MinimumSet_Gen.json b/specification/ai/data-plane/ModelInference/preview/2025-05-15-preview/examples/GetChatCompletions_MinimumSet_Gen.json
deleted file mode 100644
index 2aac392805a5..000000000000
--- a/specification/ai/data-plane/ModelInference/preview/2025-05-15-preview/examples/GetChatCompletions_MinimumSet_Gen.json
+++ /dev/null
@@ -1,40 +0,0 @@
-{
-  "title": "Simple chat completion",
-  "operationId": "GetChatCompletions",
-  "parameters": {
-    "api-version": "2025-05-15-preview",
-    "body": {
-      "messages": [
-        {
-          "role": "user",
-          "content": "Explain Riemann's conjecture"
-        }
-      ]
-    }
-  },
-  "responses": {
-    "200": {
-      "body": {
-        "id": "kgousajxgzyhugvqekuswuqbk",
-        "object": "chat.completion",
-        "created": 1234567890,
-        "model": "my-model-name",
-        "usage": {
-          "prompt_tokens": 205,
-          "completion_tokens": 5,
-          "total_tokens": 210
-        },
-        "choices": [
-          {
-            "index": 0,
-            "finish_reason": "stop",
-            "message": {
-              "role": "assistant",
-              "content": "The Riemann Conjecture is a deep mathematical conjecture around prime numbers and how they can be predicted. It was first published in Riemann's groundbreaking 1859 paper. The conjecture states that the Riemann zeta function has its zeros only at the negative even integers and complex numbers with real part 1/21. Many consider it to be the most important unsolved problem in pure mathematics. The Riemann hypothesis is a way to predict the probability that numbers in a certain range are prime that was also devised by German mathematician Bernhard Riemann in 18594"
-            }
-          }
-        ]
-      }
-    }
-  }
-}
diff --git a/specification/ai/data-plane/ModelInference/preview/2025-05-15-preview/examples/GetEmbeddings_MaximumSet_Gen.json b/specification/ai/data-plane/ModelInference/preview/2025-05-15-preview/examples/GetEmbeddings_MaximumSet_Gen.json
deleted file mode 100644
index 13f078da618f..000000000000
--- a/specification/ai/data-plane/ModelInference/preview/2025-05-15-preview/examples/GetEmbeddings_MaximumSet_Gen.json
+++ /dev/null
@@ -1,50 +0,0 @@
-{
-  "title": "Create text embeddings with dimension, encoding, and input type",
-  "operationId": "GetEmbeddings",
-  "parameters": {
-    "api-version": "2025-05-15-preview",
-    "extra-parameters": "error",
-    "body": {
-      "input": [
-        "This is a very good text"
-      ],
-      "dimensions": 1024,
-      "encoding_format": "float",
-      "input_type": "text",
-      "model": "my-model-name"
-    }
-  },
-  "responses": {
-    "200": {
-      "body": {
-        "id": "cknxthfa",
-        "data": [
-          {
-            "index": 0,
-            "object": "embedding",
-            "embedding": [
-              0,
-              0,
-              0,
-              0,
-              0,
-              0,
-              0,
-              0,
-              0,
-              0,
-              0,
-              0
-            ]
-          }
-        ],
-        "object": "list",
-        "model": "my-model-name",
-        "usage": {
-          "prompt_tokens": 15,
-          "total_tokens": 15
-        }
-      }
-    }
-  }
-}
diff --git a/specification/ai/data-plane/ModelInference/preview/2025-05-15-preview/examples/GetEmbeddings_MinimumSet_Gen.json b/specification/ai/data-plane/ModelInference/preview/2025-05-15-preview/examples/GetEmbeddings_MinimumSet_Gen.json
deleted file mode 100644
index ebb6fc2511ee..000000000000
--- a/specification/ai/data-plane/ModelInference/preview/2025-05-15-preview/examples/GetEmbeddings_MinimumSet_Gen.json
+++ /dev/null
@@ -1,45 +0,0 @@
-{
-  "title": "Create text embeddings",
-  "operationId": "GetEmbeddings",
-  "parameters": {
-    "api-version": "2025-05-15-preview",
-    "body": {
-      "input": [
-        "This is a very good text"
-      ]
-    }
-  },
-  "responses": {
-    "200": {
-      "body": {
-        "id": "cknxthfa",
-        "data": [
-          {
-            "index": 0,
-            "object": "embedding",
-            "embedding": [
-              0,
-              0,
-              0,
-              0,
-              0,
-              0,
-              0,
-              0,
-              0,
-              0,
-              0,
-              0
-            ]
-          }
-        ],
-        "object": "list",
-        "model": "my-model-name",
-        "usage": {
-          "prompt_tokens": 15,
-          "total_tokens": 15
-        }
-      }
-    }
-  }
-}
diff --git a/specification/ai/data-plane/ModelInference/preview/2025-05-15-preview/examples/GetImageEmbeddings_MaximumSet_Gen.json b/specification/ai/data-plane/ModelInference/preview/2025-05-15-preview/examples/GetImageEmbeddings_MaximumSet_Gen.json
deleted file mode 100644
index bd35a7d22f0c..000000000000
--- a/specification/ai/data-plane/ModelInference/preview/2025-05-15-preview/examples/GetImageEmbeddings_MaximumSet_Gen.json
+++ /dev/null
@@ -1,53 +0,0 @@
-{
-  "title": "maximum set image embeddings",
-  "operationId": "GetImageEmbeddings",
-  "parameters": {
-    "api-version": "2025-05-15-preview",
-    "extra-parameters": "error",
-    "body": {
-      "input": [
-        {
-          "image": "puqkvvlvgcjyzughesnkena",
-          "text": "azrzyjsmnuefqpowpvfmyobeehqsni"
-        }
-      ],
-      "dimensions": 1024,
-      "encoding_format": "float",
-      "input_type": "text",
-      "model": "my-model-name"
-    }
-  },
-  "responses": {
-    "200": {
-      "body": {
-        "id": "cknxthfa",
-        "data": [
-          {
-            "index": 0,
-            "object": "embedding",
-            "embedding": [
-              0,
-              0,
-              0,
-              0,
-              0,
-              0,
-              0,
-              0,
-              0,
-              0,
-              0,
-              0
-            ]
-          }
-        ],
-        "object": "list",
-        "model": "my-model-name",
-        "usage": {
-          "prompt_tokens": 15,
-          "total_tokens": 15
-        }
-      }
-    }
-  }
-}
diff --git a/specification/ai/data-plane/ModelInference/preview/2025-05-15-preview/examples/GetImageEmbeddings_MinimumSet_Gen.json b/specification/ai/data-plane/ModelInference/preview/2025-05-15-preview/examples/GetImageEmbeddings_MinimumSet_Gen.json
deleted file mode 100644
index f8677ebc8a31..000000000000
--- a/specification/ai/data-plane/ModelInference/preview/2025-05-15-preview/examples/GetImageEmbeddings_MinimumSet_Gen.json
+++ /dev/null
@@ -1,47 +0,0 @@
-{
-  "title": "Generate image embeddings",
-  "operationId": "GetImageEmbeddings",
-  "parameters": {
-    "api-version": "2025-05-15-preview",
-    "body": {
-      "input": [
-        {
-          "image": "gvmojtfooxixxzayrditjlyymg"
-        }
-      ]
-    }
-  },
-  "responses": {
-    "200": {
-      "body": {
-        "id": "cknxthfa",
-        "data": [
-          {
-            "index": 0,
-            "object": "embedding",
-            "embedding": [
-              0,
-              0,
-              0,
-              0,
-              0,
-              0,
-              0,
-              0,
-              0,
-              0,
-              0,
-              0
-            ]
-          }
-        ],
-        "object": "list",
-        "model": "my-model-name",
-        "usage": {
-          "prompt_tokens": 15,
-          "total_tokens": 15
-        }
-      }
-    }
-  }
-}
diff --git a/specification/ai/data-plane/ModelInference/preview/2025-05-15-preview/examples/GetImageGenerations_MaximumSet_Gen.json b/specification/ai/data-plane/ModelInference/preview/2025-05-15-preview/examples/GetImageGenerations_MaximumSet_Gen.json
deleted file mode 100644
index 3e04eec83312..000000000000
--- a/specification/ai/data-plane/ModelInference/preview/2025-05-15-preview/examples/GetImageGenerations_MaximumSet_Gen.json
+++ /dev/null
@@ -1,41 +0,0 @@
-{
-  "title": "Generate an image based on a prompt, a negative prompt, and an image",
-  "operationId": "GetImageGenerations",
-  "parameters": {
-    "api-version": "2025-05-15-preview",
-    "extra-parameters": "error",
-    "body": {
-      "prompt": "An image of a cat",
-      "negative_prompt": "a dog",
-      "image_prompt": {
-        "image": "<base64 encoded image data>",
-        "strength": 0.7
-      },
-      "size": "1024x1024",
-      "output_format": "png",
-      "quality": "standard",
-      "n": 1,
-      "model": "Stable-Image-Ultra",
-      "seed": 42
-    }
-  },
-  "responses": {
-    "200": {
-      "body": {
-        "id": "cknxthfa",
-        "created": 18,
-        "data": [
-          {
-            "image": "<base64 encoded image data>",
-            "seed": 42
-          }
-        ],
-        "object": "image.generation",
-        "model": "Stable-Image-Ultra",
-        "usage": {
-          "image_generations": 1
-        }
-      }
-    }
-  }
-}
diff --git a/specification/ai/data-plane/ModelInference/preview/2025-05-15-preview/examples/GetImageGenerations_MinimumSet_Gen.json b/specification/ai/data-plane/ModelInference/preview/2025-05-15-preview/examples/GetImageGenerations_MinimumSet_Gen.json
deleted file mode 100644
index 2045052b5223..000000000000
--- a/specification/ai/data-plane/ModelInference/preview/2025-05-15-preview/examples/GetImageGenerations_MinimumSet_Gen.json
+++ /dev/null
@@ -1,32 +0,0 @@
-{
-  "title": "Generate a simple image based on a prompt",
-  "operationId": "GetImageGenerations",
-  "parameters": {
-    "api-version": "2025-05-15-preview",
-    "body": {
-      "prompt": "An image of a cat",
-      "size": "1024x1024",
-      "model": "Stable-Image-Ultra",
-      "n": 1
-    }
-  },
-  "responses": {
-    "200": {
-      "body": {
-        "id": "cknxthfa",
-        "created": 18,
-        "data": [
-          {
-            "image": "<base64 encoded image data>",
-            "seed": 42
-          }
-        ],
-        "object": "image.generation",
-        "model": "Stable-Image-Ultra",
-        "usage": {
-          "image_generations": 1
-        }
-      }
-    }
-  }
-}
diff --git a/specification/ai/data-plane/ModelInference/preview/2025-05-15-preview/examples/GetModelInfo_MaximumSet_Gen.json b/specification/ai/data-plane/ModelInference/preview/2025-05-15-preview/examples/GetModelInfo_MaximumSet_Gen.json
deleted file mode 100644
index 534a51a834bd..000000000000
--- a/specification/ai/data-plane/ModelInference/preview/2025-05-15-preview/examples/GetModelInfo_MaximumSet_Gen.json
+++ /dev/null
@@ -1,17 +0,0 @@
-{
-  "title": "Get model information for a model deployment",
-  "operationId": "GetModelInfo",
-  "parameters": {
-    "api-version": "2025-05-15-preview",
-    "model": "Phi-3.5-mini-instruct"
-  },
-  "responses": {
-    "200": {
-      "body": {
-        "model_name": "Phi-3.5-mini-instruct",
-        "model_type": "chat-completion",
-        "model_provider_name": "Microsoft"
-      }
-    }
-  }
-}
diff --git a/specification/ai/data-plane/ModelInference/preview/2025-05-15-preview/examples/GetModelInfo_MinimumSet_Gen.json b/specification/ai/data-plane/ModelInference/preview/2025-05-15-preview/examples/GetModelInfo_MinimumSet_Gen.json
deleted file mode 100644
index ad29d906732a..000000000000
--- a/specification/ai/data-plane/ModelInference/preview/2025-05-15-preview/examples/GetModelInfo_MinimumSet_Gen.json
+++ /dev/null
@@ -1,16 +0,0 @@
-{
-  "title": "Get model information",
-  "operationId": "GetModelInfo",
-  "parameters": {
-    "api-version": "2025-05-15-preview"
-  },
-  "responses": {
-    "200": {
-      "body": {
-        "model_name": "Phi-3.5-mini-instruct",
-        "model_type": "chat-completion",
-        "model_provider_name": "Microsoft"
-      }
-    }
-  }
-}
diff --git a/specification/ai/data-plane/ModelInference/preview/2025-05-15-preview/openapi.json b/specification/ai/data-plane/ModelInference/preview/2025-05-15-preview/openapi.json
deleted file mode 100644
index 8de6377ef4e1..000000000000
--- a/specification/ai/data-plane/ModelInference/preview/2025-05-15-preview/openapi.json
+++ /dev/null
@@ -1,2360 +0,0 @@
-{
-  "swagger": "2.0",
-  "info": {
-    "title": "AI Model Inference",
-    "version": "2025-05-15-preview",
-    "x-typespec-generated": [
-      {
-        "emitter": "@azure-tools/typespec-autorest"
-      }
-    ]
-  },
-  "schemes": [
-    "https"
-  ],
-  "x-ms-parameterized-host": {
-    "hostTemplate": "https://{resource}.services.ai.azure.com/api/models",
-    "useSchemePrefix": false,
-    "parameters": [
-      {
-        "name": "resource",
-        "in": "path",
-        "description": "The Azure AI Services resource name, for example 'my-resource'",
-        "required": true,
-        "type": "string"
-      }
-    ]
-  },
-  "produces": [
-    "application/json"
-  ],
-  "consumes": [
-    "application/json"
-  ],
-  "security": [
-    {
-      "ApiKeyAuth": []
-    },
-    {
-      "OAuth2Auth": [
-        "https://cognitiveservices.azure.com/.default"
-      ]
-    }
-  ],
-  "securityDefinitions": {
-    "ApiKeyAuth": {
-      "type": "apiKey",
-      "name": "api-key",
-      "in": "header"
-    },
-    "OAuth2Auth": {
-      "type": "oauth2",
-      "flow": "implicit",
-      "authorizationUrl": "https://login.microsoftonline.com/common/oauth2/v2.0/authorize",
-      "scopes": {
-        "https://cognitiveservices.azure.com/.default": ""
-      }
-    }
-  },
-  "tags": [],
-  "paths": {
-    "/chat/completions": {
-      "post": {
-        "operationId": "GetChatCompletions",
-        "description": "Gets chat completions for the provided chat messages.\nCompletions support a wide variety of tasks and generate text that continues from or \"completes\"\nprovided prompt data. The method makes a REST API call to the `/chat/completions` route\non the given endpoint.",
-        "parameters": [
-          {
-            "$ref": "#/parameters/Azure.Core.Foundations.ApiVersionParameter"
-          },
-          {
-            "name": "extra-parameters",
-            "in": "header",
-            "description": "Controls what happens if extra parameters, undefined by the REST API,\nare passed in the JSON request payload.\nThis sets the HTTP request header `extra-parameters`.",
-            "required": false,
-            "type": "string",
-            "enum": [
-              "error",
-              "drop",
-              "pass-through"
-            ],
-            "x-ms-enum": {
-              "name": "ExtraParameters",
-              "modelAsString": true,
-              "values": [
-                {
-                  "name": "error",
-                  "value": "error",
-                  "description": "The service will error if it detected extra parameters in the request payload. This is the service default."
-                },
-                {
-                  "name": "drop",
-                  "value": "drop",
-                  "description": "The service will ignore (drop) extra parameters in the request payload. It will only pass the known parameters to the back-end AI model."
-                },
-                {
-                  "name": "pass_through",
-                  "value": "pass-through",
-                  "description": "The service will pass extra parameters to the back-end AI model."
-                }
-              ]
-            },
-            "x-ms-client-name": "extra_params"
-          },
-          {
-            "name": "body",
-            "in": "body",
-            "description": "The parameters of the chat completions request.",
-            "required": true,
-            "schema": {
-              "$ref": "#/definitions/ChatCompletionsOptions"
-            }
-          }
-        ],
-        "responses": {
-          "200": {
-            "description": "The request has succeeded.",
-            "schema": {
-              "$ref": "#/definitions/ChatCompletions"
-            }
-          },
-          "default": {
-            "description": "An unexpected error response.",
-            "schema": {
-              "$ref": "#/definitions/Azure.Core.Foundations.ErrorResponse"
-            },
-            "headers": {
-              "x-ms-error-code": {
-                "type": "string",
-                "description": "String error code indicating what went wrong."
-              }
-            }
-          }
-        },
-        "x-ms-examples": {
-          "Chat completion with audio content": {
-            "$ref": "./examples/GetChatCompletions_AudioModality_Gen.json"
-          },
-          "Chat completion with multiple parameters and chat history": {
-            "$ref": "./examples/GetChatCompletions_MaximumSet_Gen.json"
-          },
-          "Simple chat completion": {
-            "$ref": "./examples/GetChatCompletions_MinimumSet_Gen.json"
-          }
-        }
-      }
-    },
-    "/embeddings": {
-      "post": {
-        "operationId": "GetEmbeddings",
-        "description": "Return the embedding vectors for given text prompts.\nThe method makes a REST API call to the `/embeddings` route on the given endpoint.",
-        "parameters": [
-          {
-            "$ref": "#/parameters/Azure.Core.Foundations.ApiVersionParameter"
-          },
-          {
-            "name": "extra-parameters",
-            "in": "header",
-            "description": "Controls what happens if extra parameters, undefined by the REST API,\nare passed in the JSON request payload.\nThis sets the HTTP request header `extra-parameters`.",
-            "required": false,
-            "type": "string",
-            "enum": [
-              "error",
-              "drop",
-              "pass-through"
-            ],
-            "x-ms-enum": {
-              "name": "ExtraParameters",
-              "modelAsString": true,
-              "values": [
-                {
-                  "name": "error",
-                  "value": "error",
-                  "description": "The service will error if it detected extra parameters in the request payload. This is the service default."
-                },
-                {
-                  "name": "drop",
-                  "value": "drop",
-                  "description": "The service will ignore (drop) extra parameters in the request payload. It will only pass the known parameters to the back-end AI model."
-                },
-                {
-                  "name": "pass_through",
-                  "value": "pass-through",
-                  "description": "The service will pass extra parameters to the back-end AI model."
-                }
-              ]
-            },
-            "x-ms-client-name": "extra_params"
-          },
-          {
-            "name": "body",
-            "in": "body",
-            "description": "The parameters of the embeddings request.",
-            "required": true,
-            "schema": {
-              "$ref": "#/definitions/EmbeddingsOptions"
-            }
-          }
-        ],
-        "responses": {
-          "200": {
-            "description": "The request has succeeded.",
-            "schema": {
-              "$ref": "#/definitions/EmbeddingsResult"
-            }
-          },
-          "default": {
-            "description": "An unexpected error response.",
-            "schema": {
-              "$ref": "#/definitions/Azure.Core.Foundations.ErrorResponse"
-            },
-            "headers": {
-              "x-ms-error-code": {
-                "type": "string",
-                "description": "String error code indicating what went wrong."
-              }
-            }
-          }
-        },
-        "x-ms-examples": {
-          "Create text embeddings": {
-            "$ref": "./examples/GetEmbeddings_MinimumSet_Gen.json"
-          },
-          "Create text embeddings with dimension, encoding, and input type": {
-            "$ref": "./examples/GetEmbeddings_MaximumSet_Gen.json"
-          }
-        }
-      }
-    },
-    "/images/embeddings": {
-      "post": {
-        "operationId": "GetImageEmbeddings",
-        "description": "Return the embedding vectors for given images.\nThe method makes a REST API call to the `/images/embeddings` route on the given endpoint.",
-        "parameters": [
-          {
-            "$ref": "#/parameters/Azure.Core.Foundations.ApiVersionParameter"
-          },
-          {
-            "name": "extra-parameters",
-            "in": "header",
-            "description": "Controls what happens if extra parameters, undefined by the REST API,\nare passed in the JSON request payload.\nThis sets the HTTP request header `extra-parameters`.",
-            "required": false,
-            "type": "string",
-            "enum": [
-              "error",
-              "drop",
-              "pass-through"
-            ],
-            "x-ms-enum": {
-              "name": "ExtraParameters",
-              "modelAsString": true,
-              "values": [
-                {
-                  "name": "error",
-                  "value": "error",
-                  "description": "The service will error if it detected extra parameters in the request payload. This is the service default."
-                },
-                {
-                  "name": "drop",
-                  "value": "drop",
-                  "description": "The service will ignore (drop) extra parameters in the request payload. It will only pass the known parameters to the back-end AI model."
-                },
-                {
-                  "name": "pass_through",
-                  "value": "pass-through",
-                  "description": "The service will pass extra parameters to the back-end AI model."
-                }
-              ]
-            },
-            "x-ms-client-name": "extra_params"
-          },
-          {
-            "name": "body",
-            "in": "body",
-            "description": "The parameters of the image embeddings request.",
-            "required": true,
-            "schema": {
-              "$ref": "#/definitions/ImageEmbeddingsOptions"
-            }
-          }
-        ],
-        "responses": {
-          "200": {
-            "description": "The request has succeeded.",
-            "schema": {
-              "$ref": "#/definitions/EmbeddingsResult"
-            }
-          },
-          "default": {
-            "description": "An unexpected error response.",
-            "schema": {
-              "$ref": "#/definitions/Azure.Core.Foundations.ErrorResponse"
-            },
-            "headers": {
-              "x-ms-error-code": {
-                "type": "string",
-                "description": "String error code indicating what went wrong."
-              }
-            }
-          }
-        },
-        "x-ms-examples": {
-          "Generate image embeddings": {
-            "$ref": "./examples/GetImageEmbeddings_MinimumSet_Gen.json"
-          },
-          "maximum set image embeddings": {
-            "$ref": "./examples/GetImageEmbeddings_MaximumSet_Gen.json"
-          }
-        }
-      }
-    },
-    "/images/generations": {
-      "post": {
-        "operationId": "GetImageGenerations",
-        "description": "Generates an image based on a text or image prompt.\nThe method makes a REST API call to the `/images/generations` route on the given endpoint.",
-        "parameters": [
-          {
-            "$ref": "#/parameters/Azure.Core.Foundations.ApiVersionParameter"
-          },
-          {
-            "name": "extra-parameters",
-            "in": "header",
-            "description": "Controls what happens if extra parameters, undefined by the REST API,\nare passed in the JSON request payload.\nThis sets the HTTP request header `extra-parameters`.",
-            "required": false,
-            "type": "string",
-            "enum": [
-              "error",
-              "drop",
-              "pass-through"
-            ],
-            "x-ms-enum": {
-              "name": "ExtraParameters",
-              "modelAsString": true,
-              "values": [
-                {
-                  "name": "error",
-                  "value": "error",
-                  "description": "The service will error if it detected extra parameters in the request payload. This is the service default."
-                },
-                {
-                  "name": "drop",
-                  "value": "drop",
-                  "description": "The service will ignore (drop) extra parameters in the request payload. It will only pass the known parameters to the back-end AI model."
-                },
-                {
-                  "name": "pass_through",
-                  "value": "pass-through",
-                  "description": "The service will pass extra parameters to the back-end AI model."
-                }
-              ]
-            },
-            "x-ms-client-name": "extra_params"
-          },
-          {
-            "name": "body",
-            "in": "body",
-            "required": true,
-            "schema": {
-              "$ref": "#/definitions/ImageGenerationsOptions"
-            }
-          }
-        ],
-        "responses": {
-          "200": {
-            "description": "The request has succeeded.",
-            "schema": {
-              "$ref": "#/definitions/ImageGenerationsResult"
-            }
-          },
-          "default": {
-            "description": "An unexpected error response.",
-            "schema": {
-              "$ref": "#/definitions/Azure.Core.Foundations.ErrorResponse"
-            },
-            "headers": {
-              "x-ms-error-code": {
-                "type": "string",
-                "description": "String error code indicating what went wrong."
-              }
-            }
-          }
-        },
-        "x-ms-examples": {
-          "Generate a simple image based on a prompt": {
-            "$ref": "./examples/GetImageGenerations_MinimumSet_Gen.json"
-          },
-          "Generate an image based on a prompt, a negative prompt, and an image": {
-            "$ref": "./examples/GetImageGenerations_MaximumSet_Gen.json"
-          }
-        }
-      }
-    },
-    "/info": {
-      "get": {
-        "operationId": "GetModelInfo",
-        "description": "Returns information about the AI model deployed.\nThe method makes a REST API call to the `/info` route on the given endpoint.\nThis method will only work when using Serverless API, Managed Compute, or Model .\ninference endpoint. Azure OpenAI endpoints don't support i.",
-        "parameters": [
-          {
-            "$ref": "#/parameters/Azure.Core.Foundations.ApiVersionParameter"
-          },
-          {
-            "name": "model",
-            "in": "query",
-            "description": "The model deployment name you want information from.",
-            "required": false,
-            "type": "string"
-          }
-        ],
-        "responses": {
-          "200": {
-            "description": "The request has succeeded.",
-            "schema": {
-              "$ref": "#/definitions/ModelInfo"
-            }
-          },
-          "default": {
-            "description": "An unexpected error response.",
-            "schema": {
-              "$ref": "#/definitions/Azure.Core.Foundations.ErrorResponse"
-            },
-            "headers": {
-              "x-ms-error-code": {
-                "type": "string",
-                "description": "String error code indicating what went wrong."
-              }
-            }
-          }
-        },
-        "x-ms-examples": {
-          "Get model information": {
-            "$ref": "./examples/GetModelInfo_MinimumSet_Gen.json"
-          },
-          "Get model information for a model deployment": {
-            "$ref": "./examples/GetModelInfo_MaximumSet_Gen.json"
-          }
-        }
-      }
-    }
-  },
-  "definitions": {
-    "AudioContentFormat": {
-      "type": "string",
-      "description": "A representation of the possible audio formats for audio.",
-      "enum": [
-        "wav",
-        "mp3"
-      ],
-      "x-ms-enum": {
-        "name": "AudioContentFormat",
-        "modelAsString": true,
-        "values": [
-          {
-            "name": "wav",
-            "value": "wav",
-            "description": "Specifies audio in WAV format."
-          },
-          {
-            "name": "mp3",
-            "value": "mp3",
-            "description": "Specifies audio in MP3 format."
-          }
-        ]
-      }
-    },
-    "Azure.Core.Foundations.Error": {
-      "type": "object",
-      "description": "The error object.",
-      "properties": {
-        "code": {
-          "type": "string",
-          "description": "One of a server-defined set of error codes."
-        },
-        "message": {
-          "type": "string",
-          "description": "A human-readable representation of the error."
-        },
-        "target": {
-          "type": "string",
-          "description": "The target of the error."
-        },
-        "details": {
-          "type": "array",
-          "description": "An array of details about specific errors that led to this reported error.",
-          "items": {
-            "$ref": "#/definitions/Azure.Core.Foundations.Error"
-          },
-          "x-ms-identifiers": []
-        },
-        "innererror": {
-          "$ref": "#/definitions/Azure.Core.Foundations.InnerError",
-          "description": "An object containing more specific information than the current object about the error."
-        }
-      },
-      "required": [
-        "code",
-        "message"
-      ]
-    },
-    "Azure.Core.Foundations.ErrorResponse": {
-      "type": "object",
-      "description": "A response containing error details.",
-      "properties": {
-        "error": {
-          "$ref": "#/definitions/Azure.Core.Foundations.Error",
-          "description": "The error object."
-        }
-      },
-      "required": [
-        "error"
-      ]
-    },
-    "Azure.Core.Foundations.InnerError": {
-      "type": "object",
-      "description": "An object containing more specific information about the error. As per Microsoft One API guidelines - https://github.com/microsoft/api-guidelines/blob/vNext/azure/Guidelines.md#handling-errors.",
-      "properties": {
-        "code": {
-          "type": "string",
-          "description": "One of a server-defined set of error codes."
-        },
-        "innererror": {
-          "$ref": "#/definitions/Azure.Core.Foundations.InnerError",
-          "description": "Inner error."
-        }
-      }
-    },
-    "ChatChoice": {
-      "type": "object",
-      "description": "The representation of a single prompt completion as part of an overall chat completions request.\nGenerally, `n` choices are generated per provided prompt with a default value of 1.\nToken limits and other settings may limit the number of choices generated.",
-      "properties": {
-        "index": {
-          "type": "integer",
-          "format": "int32",
-          "description": "The ordered index associated with this chat completions choice."
-        },
-        "finish_reason": {
-          "$ref": "#/definitions/CompletionsFinishReason",
-          "description": "The reason that this chat completions choice completed its generated.",
-          "x-nullable": true,
-          "readOnly": true
-        },
-        "message": {
-          "$ref": "#/definitions/ChatResponseMessage",
-          "description": "The chat message for a given chat completions prompt.",
-          "readOnly": true
-        }
-      },
-      "required": [
-        "index",
-        "finish_reason",
-        "message"
-      ]
-    },
-    "ChatCompletions": {
-      "type": "object",
-      "description": "Representation of the response data from a chat completions request.\nCompletions support a wide variety of tasks and generate text that continues from or \"completes\"\nprovided prompt data.",
-      "properties": {
-        "id": {
-          "type": "string",
-          "description": "A unique identifier associated with this chat completions response."
-        },
-        "object": {
-          "type": "string",
-          "description": "The response object type, which is always `chat.completion`.",
-          "enum": [
-            "chat.completion"
-          ],
-          "x-ms-enum": {
-            "modelAsString": false
-          }
-        },
-        "created": {
-          "type": "integer",
-          "format": "unixtime",
-          "description": "The first timestamp associated with generation activity for this completions response,\nrepresented as seconds since the beginning of the Unix epoch of 00:00 on 1 Jan 1970.",
-          "readOnly": true
-        },
-        "model": {
-          "type": "string",
-          "description": "The model used for the chat completion.",
-          "readOnly": true
-        },
-        "choices": {
-          "type": "array",
-          "description": "The collection of completions choices associated with this completions response.\nGenerally, `n` choices are generated per provided prompt with a default value of 1.\nToken limits and other settings may limit the number of choices generated.",
-          "minItems": 1,
-          "items": {
-            "$ref": "#/definitions/ChatChoice"
-          },
-          "readOnly": true,
-          "x-ms-identifiers": []
-        },
-        "usage": {
-          "$ref": "#/definitions/CompletionsUsage",
-          "description": "  Usage information for tokens processed and generated as part of this completions operation.",
-          "readOnly": true
-        }
-      },
-      "required": [
-        "id",
-        "object",
-        "created",
-        "model",
-        "choices",
-        "usage"
-      ]
-    },
-    "ChatCompletionsAudio": {
-      "type": "object",
-      "description": "A representation of the audio generated by the model.",
-      "properties": {
-        "id": {
-          "type": "string",
-          "description": "  Unique identifier for the audio response. This value can be used in chat history messages instead of passing \n  the full audio object.",
-          "readOnly": true
-        },
-        "expires_at": {
-          "type": "integer",
-          "format": "unixtime",
-          "description": "The Unix timestamp (in seconds) at which the audio piece expires and can't be any longer referenced by its ID in \nmulti-turn conversations.",
-          "readOnly": true,
-          "x-ms-client-name": "expiresAt"
-        },
-        "data": {
-          "type": "string",
-          "description": "Base64 encoded audio data",
-          "readOnly": true
-        },
-        "format": {
-          "$ref": "#/definitions/AudioContentFormat",
-          "description": "The format of the audio content. If format is not provided, it will match the format used in the\ninput audio request.",
-          "readOnly": true
-        },
-        "transcript": {
-          "type": "string",
-          "description": "The transcript of the audio file.",
-          "readOnly": true
-        }
-      },
-      "required": [
-        "id",
-        "expires_at",
-        "data",
-        "transcript"
-      ]
-    },
-    "ChatCompletionsModality": {
-      "type": "string",
-      "description": "The modalities that the model is allowed to use for the chat completions response.",
-      "enum": [
-        "text",
-        "audio"
-      ],
-      "x-ms-enum": {
-        "name": "ChatCompletionsModality",
-        "modelAsString": true,
-        "values": [
-          {
-            "name": "text",
-            "value": "text",
-            "description": "The model is only allowed to generate text."
-          },
-          {
-            "name": "audio",
-            "value": "audio",
-            "description": "The model is allowed to generate audio."
-          }
-        ]
-      }
-    },
-    "ChatCompletionsNamedToolChoice": {
-      "type": "object",
-      "description": "A tool selection of a specific, named function tool that will limit chat completions to using the named function.",
-      "properties": {
-        "type": {
-          "type": "string",
-          "description": "The type of the tool. Currently, only `function` is supported.",
-          "enum": [
-            "function"
-          ],
-          "x-ms-enum": {
-            "modelAsString": false
-          }
-        },
-        "function": {
-          "$ref": "#/definitions/ChatCompletionsNamedToolChoiceFunction",
-          "description": "The function that should be called."
-        }
-      },
-      "required": [
-        "type",
-        "function"
-      ]
-    },
-    "ChatCompletionsNamedToolChoiceFunction": {
-      "type": "object",
-      "description": "A tool selection of a specific, named function tool that will limit chat completions to using the named function.",
-      "properties": {
-        "name": {
-          "type": "string",
-          "description": "The name of the function that should be called."
-        }
-      },
-      "required": [
-        "name"
-      ]
-    },
-    "ChatCompletionsOptions": {
-      "type": "object",
-      "description": "The configuration information for a chat completions request.\nCompletions support a wide variety of tasks and generate text that continues from or \"completes\"\nprovided prompt data.",
-      "properties": {
-        "messages": {
-          "type": "array",
-          "description": "The collection of context messages associated with this chat completions request.\nTypical usage begins with a chat message for the System role that provides instructions for\nthe behavior of the assistant, followed by alternating messages between the User and\nAssistant roles.",
-          "minItems": 1,
-          "items": {
-            "$ref": "#/definitions/ChatRequestMessage"
-          },
-          "x-ms-identifiers": []
-        },
-        "frequency_penalty": {
-          "type": "number",
-          "format": "float",
-          "description": "A value that influences the probability of generated tokens appearing based on their cumulative\nfrequency in generated text.\nPositive values will make tokens less likely to appear as their frequency increases and\ndecrease the likelihood of the model repeating the same statements verbatim.\nSupported range is [-2, 2].",
-          "default": 0,
-          "minimum": -2,
-          "maximum": 2
-        },
-        "stream": {
-          "type": "boolean",
-          "description": "A value indicating whether chat completions should be streamed for this request."
-        },
-        "presence_penalty": {
-          "type": "number",
-          "format": "float",
-          "description": "A value that influences the probability of generated tokens appearing based on their existing\npresence in generated text.\nPositive values will make tokens less likely to appear when they already exist and increase the\nmodel's likelihood to output new topics.\nSupported range is [-2, 2].",
-          "default": 0,
-          "minimum": -2,
-          "maximum": 2
-        },
-        "temperature": {
-          "type": "number",
-          "format": "float",
-          "description": "The sampling temperature to use that controls the apparent creativity of generated completions.\nHigher values will make output more random while lower values will make results more focused\nand deterministic.\nIt is not recommended to modify temperature and top_p for the same completions request as the\ninteraction of these two settings is difficult to predict.\nSupported range is [0, 1].",
-          "default": 0.7,
-          "minimum": 0,
-          "maximum": 1
-        },
-        "top_p": {
-          "type": "number",
-          "format": "float",
-          "description": "An alternative to sampling with temperature called nucleus sampling. This value causes the\nmodel to consider the results of tokens with the provided probability mass. As an example, a\nvalue of 0.15 will cause only the tokens comprising the top 15% of probability mass to be\nconsidered.\nIt is not recommended to modify temperature and top_p for the same completions request as the\ninteraction of these two settings is difficult to predict.\nSupported range is [0, 1].",
-          "default": 1,
-          "minimum": 0,
-          "maximum": 1
-        },
-        "max_tokens": {
-          "type": "integer",
-          "format": "int32",
-          "description": "The maximum number of tokens to generate.",
-          "minimum": 0
-        },
-        "response_format": {
-          "$ref": "#/definitions/ChatCompletionsResponseFormat",
-          "description": "An object specifying the format that the model must output.\n\nSetting to `{ \"type\": \"json_schema\", \"json_schema\": {...} }` enables Structured Outputs which ensures the model will match your supplied JSON schema.\n\nSetting to `{ \"type\": \"json_object\" }` enables JSON mode, which ensures the message the model generates is valid JSON.\n\n**Important:** when using JSON mode, you **must** also instruct the model to produce JSON yourself via a system or user message. Without this, the model may generate an unending stream of whitespace until the generation reaches the token limit, resulting in a long-running and seemingly \"stuck\" request. Also note that the message content may be partially cut off if `finish_reason=\"length\"`, which indicates the generation exceeded `max_tokens` or the conversation exceeded the max context length."
-        },
-        "stop": {
-          "type": "array",
-          "description": "A collection of textual sequences that will end completions generation.",
-          "minItems": 1,
-          "items": {
-            "type": "string"
-          }
-        },
-        "tools": {
-          "type": "array",
-          "description": "A list of tools the model may request to call. Currently, only functions are supported as a tool. The model\nmay response with a function call request and provide the input arguments in JSON format for that function.",
-          "minItems": 1,
-          "items": {
-            "$ref": "#/definitions/ChatCompletionsToolDefinition"
-          },
-          "x-ms-identifiers": []
-        },
-        "tool_choice": {
-          "description": "If specified, the model will configure which of the provided tools it can use for the chat completions response.",
-          "x-ms-client-name": "toolChoice"
-        },
-        "seed": {
-          "type": "integer",
-          "format": "int64",
-          "description": "If specified, the system will make a best effort to sample deterministically such that repeated requests with the\nsame seed and parameters should return the same result. Determinism is not guaranteed."
-        },
-        "model": {
-          "type": "string",
-          "description": "ID of the specific AI model to use, if more than one model is available on the endpoint."
-        },
-        "modalities": {
-          "type": "array",
-          "description": "The modalities that the model is allowed to use for the chat completions response. The default modality\nis `text`. Indicating an unsupported modality combination results in an 422 error.",
-          "items": {
-            "$ref": "#/definitions/ChatCompletionsModality"
-          }
-        },
-        "user_security_context": {
-          "$ref": "#/definitions/UserSecurityContext",
-          "description": "User security context contains several parameters that describe the AI application itself, and the end user that interacts with the AI application. \nThese fields assist your security operations teams to investigate and mitigate security incidents by providing a comprehensive approach to protecting your AI applications. \n[Learn more](https://aka.ms/TP4AI/Documentation/EndUserContext) about protecting AI applications using Microsoft Defender for Cloud."
-        }
-      },
-      "required": [
-        "messages"
-      ],
-      "additionalProperties": {}
-    },
-    "ChatCompletionsResponseFormat": {
-      "type": "object",
-      "description": "Represents the format that the model must output. Use this to enable JSON mode instead of the default text mode.\nNote that to enable JSON mode, some AI models may also require you to instruct the model to produce JSON\nvia a system or user message.",
-      "properties": {
-        "type": {
-          "type": "string",
-          "description": "The response format type to use for chat completions."
-        }
-      },
-      "discriminator": "type",
-      "required": [
-        "type"
-      ]
-    },
-    "ChatCompletionsResponseFormatJsonObject": {
-      "type": "object",
-      "description": "A response format for Chat Completions that restricts responses to emitting valid JSON objects.\nNote that to enable JSON mode, some AI models may also require you to instruct the model to produce JSON\nvia a system or user message.",
-      "allOf": [
-        {
-          "$ref": "#/definitions/ChatCompletionsResponseFormat"
-        }
-      ],
-      "x-ms-discriminator-value": "json_object"
-    },
-    "ChatCompletionsResponseFormatJsonSchema": {
-      "type": "object",
-      "description": "A response format for Chat Completions that restricts responses to emitting valid JSON objects, with a\nJSON schema specified by the caller.",
-      "properties": {
-        "json_schema": {
-          "$ref": "#/definitions/ChatCompletionsResponseFormatJsonSchemaDefinition",
-          "description": "The definition of the required JSON schema in the response, and associated metadata."
-        }
-      },
-      "required": [
-        "json_schema"
-      ],
-      "allOf": [
-        {
-          "$ref": "#/definitions/ChatCompletionsResponseFormat"
-        }
-      ],
-      "x-ms-discriminator-value": "json_schema"
-    },
-    "ChatCompletionsResponseFormatJsonSchemaDefinition": {
-      "type": "object",
-      "description": "The definition of the required JSON schema in the response, and associated metadata.",
-      "properties": {
-        "name": {
-          "type": "string",
-          "description": "The name of the response format. Must be a-z, A-Z, 0-9, or contain underscores and dashes, with a maximum length of 64."
-        },
-        "schema": {
-          "type": "object",
-          "description": "The definition of the JSON schema",
-          "additionalProperties": {}
-        },
-        "description": {
-          "type": "string",
-          "description": "A description of the response format, used by the AI model to determine how to generate responses in this format."
-        },
-        "strict": {
-          "type": "boolean",
-          "description": "Whether to enable strict schema adherence when generating the output.\nIf set to true, the model will always follow the exact schema defined in the `schema` field. Only a subset of\nJSON Schema is supported when `strict` is `true`.",
-          "default": false
-        }
-      },
-      "required": [
-        "name",
-        "schema"
-      ]
-    },
-    "ChatCompletionsResponseFormatText": {
-      "type": "object",
-      "description": "A response format for Chat Completions that emits text responses. This is the default response format.",
-      "allOf": [
-        {
-          "$ref": "#/definitions/ChatCompletionsResponseFormat"
-        }
-      ],
-      "x-ms-discriminator-value": "text"
-    },
-    "ChatCompletionsToolCall": {
-      "type": "object",
-      "description": "A function tool call requested by the AI model.",
-      "properties": {
-        "id": {
-          "type": "string",
-          "description": "The ID of the tool call."
-        },
-        "type": {
-          "type": "string",
-          "description": "The type of tool call. Currently, only `function` is supported.",
-          "enum": [
-            "function"
-          ],
-          "x-ms-enum": {
-            "modelAsString": false
-          }
-        },
-        "function": {
-          "$ref": "#/definitions/FunctionCall",
-          "description": "The details of the function call requested by the AI model."
-        }
-      },
-      "required": [
-        "id",
-        "type",
-        "function"
-      ]
-    },
-    "ChatCompletionsToolChoicePreset": {
-      "type": "string",
-      "description": "Represents a generic policy for how a chat completions tool may be selected.",
-      "enum": [
-        "auto",
-        "none",
-        "required"
-      ],
-      "x-ms-enum": {
-        "name": "ChatCompletionsToolChoicePreset",
-        "modelAsString": true,
-        "values": [
-          {
-            "name": "auto",
-            "value": "auto",
-            "description": "Specifies that the model may either use any of the tools provided in this chat completions request or\ninstead return a standard chat completions response as if no tools were provided."
-          },
-          {
-            "name": "none",
-            "value": "none",
-            "description": "Specifies that the model should not respond with a tool call and should instead provide a standard chat\ncompletions response. Response content may still be influenced by the provided tool definitions."
-          },
-          {
-            "name": "required",
-            "value": "required",
-            "description": "Specifies that the model should respond with a call to one or more tools."
-          }
-        ]
-      }
-    },
-    "ChatCompletionsToolDefinition": {
-      "type": "object",
-      "description": "The definition of a chat completions tool that can call a function.",
-      "properties": {
-        "type": {
-          "type": "string",
-          "description": "The type of the tool. Currently, only `function` is supported.",
-          "enum": [
-            "function"
-          ],
-          "x-ms-enum": {
-            "modelAsString": false
-          }
-        },
-        "function": {
-          "$ref": "#/definitions/FunctionDefinition",
-          "description": "The function definition details for the function tool."
-        }
-      },
-      "required": [
-        "type",
-        "function"
-      ]
-    },
-    "ChatMessageAudioContentItem": {
-      "type": "object",
-      "description": "A structured chat content item containing an audio reference.",
-      "properties": {
-        "audio_url": {
-          "$ref": "#/definitions/ChatMessageAudioUrl",
-          "description": "An internet location, which must be accessible to the model, from which the audio may be retrieved.",
-          "x-ms-client-name": "audioUrl"
-        }
-      },
-      "required": [
-        "audio_url"
-      ],
-      "allOf": [
-        {
-          "$ref": "#/definitions/ChatMessageContentItem"
-        }
-      ],
-      "x-ms-discriminator-value": "audio_url"
-    },
-    "ChatMessageAudioUrl": {
-      "type": "object",
-      "description": "An internet location from which the model may retrieve an audio.",
-      "properties": {
-        "url": {
-          "type": "string",
-          "description": "The URL of the audio."
-        }
-      },
-      "required": [
-        "url"
-      ]
-    },
-    "ChatMessageContentItem": {
-      "type": "object",
-      "description": "An abstract representation of a structured content item within a chat message.",
-      "properties": {
-        "type": {
-          "type": "string",
-          "description": "The discriminated object type."
-        }
-      },
-      "discriminator": "type",
-      "required": [
-        "type"
-      ]
-    },
-    "ChatMessageImageContentItem": {
-      "type": "object",
-      "description": "A structured chat content item containing an image reference.",
-      "properties": {
-        "image_url": {
-          "$ref": "#/definitions/ChatMessageImageUrl",
-          "description": "An internet location, which must be accessible to the model,from which the image may be retrieved.",
-          "x-ms-client-name": "imageUrl"
-        }
-      },
-      "required": [
-        "image_url"
-      ],
-      "allOf": [
-        {
-          "$ref": "#/definitions/ChatMessageContentItem"
-        }
-      ],
-      "x-ms-discriminator-value": "image_url"
-    },
-    "ChatMessageImageDetailLevel": {
-      "type": "string",
-      "description": "A representation of the possible image detail levels for image-based chat completions message content.",
-      "enum": [
-        "auto",
-        "low",
-        "high"
-      ],
-      "x-ms-enum": {
-        "name": "ChatMessageImageDetailLevel",
-        "modelAsString": true,
-        "values": [
-          {
-            "name": "auto",
-            "value": "auto",
-            "description": "Specifies that the model should determine which detail level to apply using heuristics like image size."
-          },
-          {
-            "name": "low",
-            "value": "low",
-            "description": "Specifies that image evaluation should be constrained to the 'low-res' model that may be faster and consume fewer\ntokens but may also be less accurate for highly detailed images."
-          },
-          {
-            "name": "high",
-            "value": "high",
-            "description": "Specifies that image evaluation should enable the 'high-res' model that may be more accurate for highly detailed\nimages but may also be slower and consume more tokens."
-          }
-        ]
-      }
-    },
-    "ChatMessageImageUrl": {
-      "type": "object",
-      "description": "An internet location from which the model may retrieve an image.",
-      "properties": {
-        "url": {
-          "type": "string",
-          "description": "The URL of the image."
-        },
-        "detail": {
-          "$ref": "#/definitions/ChatMessageImageDetailLevel",
-          "description": "The evaluation quality setting to use, which controls relative prioritization of speed, token consumption, and\naccuracy."
-        }
-      },
-      "required": [
-        "url"
-      ]
-    },
-    "ChatMessageInputAudio": {
-      "type": "object",
-      "description": "The details of an audio chat message content part.",
-      "properties": {
-        "data": {
-          "type": "string",
-          "description": "Base64 encoded audio data"
-        },
-        "format": {
-          "$ref": "#/definitions/AudioContentFormat",
-          "description": "The audio format of the audio content."
-        }
-      },
-      "required": [
-        "data",
-        "format"
-      ]
-    },
-    "ChatMessageInputAudioContentItem": {
-      "type": "object",
-      "description": "A structured chat content item containing an audio content.",
-      "properties": {
-        "format": {
-          "$ref": "#/definitions/AudioContentFormat",
-          "description": "The audio format of the audio reference."
-        }
-      },
-      "required": [
-        "format"
-      ],
-      "allOf": [
-        {
-          "$ref": "#/definitions/ChatMessageContentItem"
-        }
-      ],
-      "x-ms-discriminator-value": "input_audio"
-    },
-    "ChatMessageTextContentItem": {
-      "type": "object",
-      "description": "A structured chat content item containing plain text.",
-      "properties": {
-        "text": {
-          "type": "string",
-          "description": "The content of the message."
-        }
-      },
-      "required": [
-        "text"
-      ],
-      "allOf": [
-        {
-          "$ref": "#/definitions/ChatMessageContentItem"
-        }
-      ],
-      "x-ms-discriminator-value": "text"
-    },
-    "ChatRequestAssistantMessage": {
-      "type": "object",
-      "description": "A request chat message representing response or action from the assistant.",
-      "properties": {
-        "content": {
-          "type": "string",
-          "description": "The content of the message."
-        },
-        "tool_calls": {
-          "type": "array",
-          "description": "The tool calls that must be resolved and have their outputs appended to subsequent input messages for the chat\ncompletions request to resolve as configured.",
-          "items": {
-            "$ref": "#/definitions/ChatCompletionsToolCall"
-          },
-          "x-ms-client-name": "toolCalls"
-        },
-        "audio": {
-          "$ref": "#/definitions/ChatRequestAudioReference",
-          "description": "  The audio generated by a previous response in a multi-turn conversation."
-        }
-      },
-      "allOf": [
-        {
-          "$ref": "#/definitions/ChatRequestMessage"
-        }
-      ],
-      "x-ms-discriminator-value": "assistant"
-    },
-    "ChatRequestAudioReference": {
-      "type": "object",
-      "description": "A reference to an audio response generated by the model.",
-      "properties": {
-        "id": {
-          "type": "string",
-          "description": "  Unique identifier for the audio response. This value corresponds to the id of a previous audio completion."
-        }
-      },
-      "required": [
-        "id"
-      ]
-    },
-    "ChatRequestMessage": {
-      "type": "object",
-      "description": "An abstract representation of a chat message as provided in a request.",
-      "properties": {
-        "role": {
-          "$ref": "#/definitions/ChatRole",
-          "description": "The chat role associated with this message."
-        }
-      },
-      "discriminator": "role",
-      "required": [
-        "role"
-      ]
-    },
-    "ChatRequestSystemMessage": {
-      "type": "object",
-      "description": "A request chat message containing system instructions that influence how the model will generate a chat completions\nresponse.",
-      "properties": {
-        "content": {
-          "type": "string",
-          "description": "The contents of the system message."
-        }
-      },
-      "required": [
-        "content"
-      ],
-      "allOf": [
-        {
-          "$ref": "#/definitions/ChatRequestMessage"
-        }
-      ],
-      "x-ms-discriminator-value": "system"
-    },
-    "ChatRequestToolMessage": {
-      "type": "object",
-      "description": "A request chat message representing requested output from a configured tool.",
-      "properties": {
-        "content": {
-          "type": "string",
-          "description": "The content of the message."
-        },
-        "tool_call_id": {
-          "type": "string",
-          "description": "The ID of the tool call resolved by the provided content.",
-          "x-ms-client-name": "toolCallId"
-        }
-      },
-      "required": [
-        "tool_call_id"
-      ],
-      "allOf": [
-        {
-          "$ref": "#/definitions/ChatRequestMessage"
-        }
-      ],
-      "x-ms-discriminator-value": "tool"
-    },
-    "ChatRequestUserMessage": {
-      "type": "object",
-      "description": "A request chat message representing user input to the assistant.",
-      "properties": {
-        "content": {
-          "description": "The contents of the user message, with available input types varying by selected model."
-        }
-      },
-      "required": [
-        "content"
-      ],
-      "allOf": [
-        {
-          "$ref": "#/definitions/ChatRequestMessage"
-        }
-      ],
-      "x-ms-discriminator-value": "user"
-    },
-    "ChatResponseMessage": {
-      "type": "object",
-      "description": "A representation of a chat message as received in a response.",
-      "properties": {
-        "role": {
-          "$ref": "#/definitions/ChatRole",
-          "description": "The chat role associated with the message.",
-          "readOnly": true
-        },
-        "content": {
-          "type": "string",
-          "description": "The content of the message.",
-          "x-nullable": true,
-          "readOnly": true
-        },
-        "reasoning_content": {
-          "type": "string",
-          "description": "The reasoning content the model used for generating the response",
-          "readOnly": true
-        },
-        "tool_calls": {
-          "type": "array",
-          "description": "The tool calls that must be resolved and have their outputs appended to subsequent input messages for the chat\ncompletions request to resolve as configured.",
-          "items": {
-            "$ref": "#/definitions/ChatCompletionsToolCall"
-          },
-          "readOnly": true,
-          "x-ms-client-name": "toolCalls"
-        },
-        "audio": {
-          "$ref": "#/definitions/ChatCompletionsAudio",
-          "description": "  The audio generated by the model as a response to the messages if the model is configured to generate audio.",
-          "readOnly": true
-        }
-      },
-      "required": [
-        "role",
-        "content"
-      ]
-    },
-    "ChatRole": {
-      "type": "string",
-      "description": "A description of the intended purpose of a message within a chat completions interaction.",
-      "enum": [
-        "system",
-        "developer",
-        "user",
-        "assistant",
-        "tool"
-      ],
-      "x-ms-enum": {
-        "name": "ChatRole",
-        "modelAsString": true,
-        "values": [
-          {
-            "name": "system",
-            "value": "system",
-            "description": "The role that instructs or sets the behavior of the assistant."
-          },
-          {
-            "name": "developer",
-            "value": "developer",
-            "description": "The role that provides instructions to the model prioritized ahead of user messages."
-          },
-          {
-            "name": "user",
-            "value": "user",
-            "description": "The role that provides input for chat completions."
-          },
-          {
-            "name": "assistant",
-            "value": "assistant",
-            "description": "The role that provides responses to system-instructed, user-prompted input."
-          },
-          {
-            "name": "tool",
-            "value": "tool",
-            "description": "The role that represents extension tool activity within a chat completions operation."
-          }
-        ]
-      }
-    },
-    "CompletionsFinishReason": {
-      "type": "string",
-      "description": "Representation of the manner in which a completions response concluded.",
-      "enum": [
-        "stop",
-        "length",
-        "content_filter",
-        "tool_calls"
-      ],
-      "x-ms-enum": {
-        "name": "CompletionsFinishReason",
-        "modelAsString": true,
-        "values": [
-          {
-            "name": "stopped",
-            "value": "stop",
-            "description": "Completions ended normally and reached its end of token generation."
-          },
-          {
-            "name": "tokenLimitReached",
-            "value": "length",
-            "description": "Completions exhausted available token limits before generation could complete."
-          },
-          {
-            "name": "contentFiltered",
-            "value": "content_filter",
-            "description": "Completions generated a response that was identified as potentially sensitive per content\nmoderation policies."
-          },
-          {
-            "name": "toolCalls",
-            "value": "tool_calls",
-            "description": "Completion ended with the model calling a provided tool for output."
-          }
-        ]
-      }
-    },
-    "CompletionsUsage": {
-      "type": "object",
-      "description": "Representation of the token counts processed for a completions request.\nCounts consider all tokens across prompts, choices, choice alternates, best_of generations, and\nother consumers.",
-      "properties": {
-        "completion_tokens": {
-          "type": "integer",
-          "format": "int32",
-          "description": "The number of tokens generated across all completions emissions.",
-          "readOnly": true
-        },
-        "prompt_tokens": {
-          "type": "integer",
-          "format": "int32",
-          "description": "The number of tokens in the provided prompts for the completions request.",
-          "readOnly": true
-        },
-        "total_tokens": {
-          "type": "integer",
-          "format": "int32",
-          "description": "The total number of tokens processed for the completions request and response.",
-          "readOnly": true
-        },
-        "completion_tokens_details": {
-          "$ref": "#/definitions/CompletionsUsageDetails",
-          "description": "Breakdown of tokens used in a completion.",
-          "readOnly": true
-        },
-        "prompt_tokens_details": {
-          "$ref": "#/definitions/PromptUsageDetails",
-          "description": "Breakdown of tokens used in the prompt/chat history.",
-          "readOnly": true
-        }
-      },
-      "required": [
-        "completion_tokens",
-        "prompt_tokens",
-        "total_tokens"
-      ]
-    },
-    "CompletionsUsageDetails": {
-      "type": "object",
-      "description": "A breakdown of tokens used in a completion.",
-      "properties": {
-        "audio_tokens": {
-          "type": "integer",
-          "format": "int32",
-          "description": "The number of tokens corresponding to audio input.",
-          "readOnly": true
-        },
-        "reasoning_tokens": {
-          "type": "integer",
-          "format": "int32",
-          "description": "The number of tokens corresponding to reasoning.",
-          "readOnly": true
-        },
-        "total_tokens": {
-          "type": "integer",
-          "format": "int32",
-          "description": "The total number of tokens processed for the completions request and response.",
-          "readOnly": true
-        }
-      },
-      "required": [
-        "audio_tokens",
-        "reasoning_tokens",
-        "total_tokens"
-      ]
-    },
-    "EmbeddingEncodingFormat": {
-      "type": "string",
-      "description": "Specifies the types of embeddings to generate. Compressed embeddings types like `uint8`, `int8`, `ubinary` and \n`binary`, may reduce storage costs without sacrificing the integrity of the data. Returns a 422 error if the\nmodel doesn't support the value or parameter. Read the model's documentation to know the values supported by\nthe your model.",
-      "enum": [
-        "base64",
-        "binary",
-        "float",
-        "int8",
-        "ubinary",
-        "uint8"
-      ],
-      "x-ms-enum": {
-        "name": "EmbeddingEncodingFormat",
-        "modelAsString": true,
-        "values": [
-          {
-            "name": "base64",
-            "value": "base64",
-            "description": "Get back binary representation of the embeddings encoded as Base64 string. OpenAI Python library retrieves \nembeddings from the API as encoded binary data, rather than using intermediate decimal representations as is \nusually done."
-          },
-          {
-            "name": "binary",
-            "value": "binary",
-            "description": "Get back signed binary embeddings"
-          },
-          {
-            "name": "float",
-            "value": "float",
-            "description": "Get back full precision embeddings"
-          },
-          {
-            "name": "int8",
-            "value": "int8",
-            "description": "Get back signed int8 embeddings"
-          },
-          {
-            "name": "ubinary",
-            "value": "ubinary",
-            "description": "Get back unsigned binary embeddings"
-          },
-          {
-            "name": "uint8",
-            "value": "uint8",
-            "description": "Get back unsigned int8 embeddings"
-          }
-        ]
-      }
-    },
-    "EmbeddingInputType": {
-      "type": "string",
-      "description": "Represents the input types used for embedding search.",
-      "enum": [
-        "text",
-        "query",
-        "document"
-      ],
-      "x-ms-enum": {
-        "name": "EmbeddingInputType",
-        "modelAsString": true,
-        "values": [
-          {
-            "name": "text",
-            "value": "text",
-            "description": "Indicates the input is a general text input."
-          },
-          {
-            "name": "query",
-            "value": "query",
-            "description": "Indicates the input represents a search query to find the most relevant documents in your vector database."
-          },
-          {
-            "name": "document",
-            "value": "document",
-            "description": "Indicates the input represents a document that is stored in a vector database."
-          }
-        ]
-      }
-    },
-    "EmbeddingItem": {
-      "type": "object",
-      "description": "Representation of a single embeddings relatedness comparison.",
-      "properties": {
-        "embedding": {
-          "type": "array",
-          "description": "List of embedding values for the input prompt. These represent a measurement of the\nvector-based relatedness of the provided input. Or a base64 encoded string of the embedding vector.",
-          "items": {
-            "type": "number",
-            "format": "float"
-          },
-          "readOnly": true
-        },
-        "index": {
-          "type": "integer",
-          "format": "int32",
-          "description": "Index of the prompt to which the EmbeddingItem corresponds.",
-          "readOnly": true
-        },
-        "object": {
-          "type": "string",
-          "description": "The object type of this embeddings item. Will always be `embedding`.",
-          "enum": [
-            "embedding"
-          ],
-          "x-ms-enum": {
-            "modelAsString": false
-          }
-        }
-      },
-      "required": [
-        "embedding",
-        "index",
-        "object"
-      ]
-    },
-    "EmbeddingsOptions": {
-      "type": "object",
-      "description": "The configuration information for an embeddings request.",
-      "properties": {
-        "input": {
-          "type": "array",
-          "description": "Input text to embed, encoded as a string or array of tokens.\nTo embed multiple inputs in a single request, pass an array\nof strings or array of token arrays.",
-          "items": {
-            "type": "string"
-          }
-        },
-        "dimensions": {
-          "type": "integer",
-          "format": "int32",
-          "description": "Optional. The number of dimensions the resulting output embeddings should have.\nPassing null causes the model to use its default value.\nReturns a 422 error if the model doesn't support the value or parameter."
-        },
-        "encoding_format": {
-          "$ref": "#/definitions/EmbeddingEncodingFormat",
-          "description": "Optional. The desired format for the returned embeddings."
-        },
-        "input_type": {
-          "$ref": "#/definitions/EmbeddingInputType",
-          "description": "Optional. The type of the input.\nReturns a 422 error if the model doesn't support the value or parameter."
-        },
-        "model": {
-          "type": "string",
-          "description": "ID of the specific AI model to use, if more than one model is available on the endpoint."
-        }
-      },
-      "required": [
-        "input"
-      ],
-      "additionalProperties": {}
-    },
-    "EmbeddingsResult": {
-      "type": "object",
-      "description": "Representation of the response data from an embeddings request.\nEmbeddings measure the relatedness of text strings and are commonly used for search, clustering,\nrecommendations, and other similar scenarios.",
-      "properties": {
-        "id": {
-          "type": "string",
-          "description": "Unique identifier for the embeddings result.",
-          "readOnly": true
-        },
-        "data": {
-          "type": "array",
-          "description": "Embedding values for the prompts submitted in the request.",
-          "items": {
-            "$ref": "#/definitions/EmbeddingItem"
-          },
-          "readOnly": true,
-          "x-ms-identifiers": []
-        },
-        "usage": {
-          "$ref": "#/definitions/EmbeddingsUsage",
-          "description": "Usage counts for tokens input using the embeddings API.",
-          "readOnly": true
-        },
-        "object": {
-          "type": "string",
-          "description": "The object type of the embeddings result. Will always be `list`.",
-          "enum": [
-            "list"
-          ],
-          "x-ms-enum": {
-            "modelAsString": false
-          }
-        },
-        "model": {
-          "type": "string",
-          "description": "The model ID used to generate this result.",
-          "readOnly": true
-        }
-      },
-      "required": [
-        "id",
-        "data",
-        "usage",
-        "object",
-        "model"
-      ]
-    },
-    "EmbeddingsUsage": {
-      "type": "object",
-      "description": "Measurement of the amount of tokens used in this request and response.",
-      "properties": {
-        "prompt_tokens": {
-          "type": "integer",
-          "format": "int32",
-          "description": "Number of tokens in the request.",
-          "readOnly": true
-        },
-        "total_tokens": {
-          "type": "integer",
-          "format": "int32",
-          "description": "Total number of tokens transacted in this request/response. Should equal the\nnumber of tokens in the request.",
-          "readOnly": true
-        }
-      },
-      "required": [
-        "prompt_tokens",
-        "total_tokens"
-      ]
-    },
-    "ExtraParameters": {
-      "type": "string",
-      "description": "Controls what happens if extra parameters, undefined by the REST API, are passed in the JSON request payload.",
-      "enum": [
-        "error",
-        "drop",
-        "pass-through"
-      ],
-      "x-ms-enum": {
-        "name": "ExtraParameters",
-        "modelAsString": true,
-        "values": [
-          {
-            "name": "error",
-            "value": "error",
-            "description": "The service will error if it detected extra parameters in the request payload. This is the service default."
-          },
-          {
-            "name": "drop",
-            "value": "drop",
-            "description": "The service will ignore (drop) extra parameters in the request payload. It will only pass the known parameters to the back-end AI model."
-          },
-          {
-            "name": "pass_through",
-            "value": "pass-through",
-            "description": "The service will pass extra parameters to the back-end AI model."
-          }
-        ]
-      }
-    },
-    "FunctionCall": {
-      "type": "object",
-      "description": "The name and arguments of a function that should be called, as generated by the model.",
-      "properties": {
-        "name": {
-          "type": "string",
-          "description": "The name of the function to call.",
-          "readOnly": true
-        },
-        "arguments": {
-          "type": "string",
-          "description": "The arguments to call the function with, as generated by the model in JSON format.\nNote that the model does not always generate valid JSON, and may hallucinate parameters\nnot defined by your function schema. Validate the arguments in your code before calling\nyour function.",
-          "readOnly": true
-        }
-      },
-      "required": [
-        "name",
-        "arguments"
-      ]
-    },
-    "FunctionDefinition": {
-      "type": "object",
-      "description": "The definition of a caller-specified function that chat completions may invoke in response to matching user input.",
-      "properties": {
-        "name": {
-          "type": "string",
-          "description": "The name of the function to be called."
-        },
-        "description": {
-          "type": "string",
-          "description": "A description of what the function does. The model will use this description when selecting the function and\ninterpreting its parameters."
-        },
-        "parameters": {
-          "type": "object",
-          "description": "The parameters the function accepts, described as a JSON Schema object.",
-          "additionalProperties": {}
-        }
-      },
-      "required": [
-        "name"
-      ]
-    },
-    "ImageEmbeddingInput": {
-      "type": "object",
-      "description": "Represents an image with optional text.",
-      "properties": {
-        "image": {
-          "type": "string",
-          "description": "The input image encoded in base64 string as a data URL. Example: `data:image/{format};base64,{data}`."
-        },
-        "text": {
-          "type": "string",
-          "description": "Optional. The text input to feed into the model (like DINO, CLIP).\nReturns a 422 error if the model doesn't support the value or parameter."
-        }
-      },
-      "required": [
-        "image"
-      ]
-    },
-    "ImageEmbeddingsOptions": {
-      "type": "object",
-      "description": "The configuration information for an image embeddings request.",
-      "properties": {
-        "input": {
-          "type": "array",
-          "description": "Input image to embed. To embed multiple inputs in a single request, pass an array.\nThe input must not exceed the max input tokens for the model.",
-          "items": {
-            "$ref": "#/definitions/ImageEmbeddingInput"
-          },
-          "x-ms-identifiers": []
-        },
-        "dimensions": {
-          "type": "integer",
-          "format": "int32",
-          "description": "Optional. The number of dimensions the resulting output embeddings should have.\nPassing null causes the model to use its default value.\nReturns a 422 error if the model doesn't support the value or parameter."
-        },
-        "encoding_format": {
-          "$ref": "#/definitions/EmbeddingEncodingFormat",
-          "description": "Optional. The number of dimensions the resulting output embeddings should have.\nPassing null causes the model to use its default value.\nReturns a 422 error if the model doesn't support the value or parameter."
-        },
-        "input_type": {
-          "$ref": "#/definitions/EmbeddingInputType",
-          "description": "Optional. The type of the input.\nReturns a 422 error if the model doesn't support the value or parameter."
-        },
-        "model": {
-          "type": "string",
-          "description": "ID of the specific AI model to use, if more than one model is available on the endpoint."
-        }
-      },
-      "required": [
-        "input"
-      ],
-      "additionalProperties": {}
-    },
-    "ImageGenerationItem": {
-      "type": "object",
-      "description": "Representation of a single image generation.",
-      "properties": {
-        "image": {
-          "type": "string",
-          "description": "The image generated, encoded in base64."
-        },
-        "seed": {
-          "type": "integer",
-          "format": "int64",
-          "description": "The seed that can be used to generate the image."
-        }
-      },
-      "required": [
-        "image"
-      ]
-    },
-    "ImageGenerationOutputFormat": {
-      "type": "string",
-      "description": "The image generation format to use in the output.",
-      "enum": [
-        "png",
-        "jpg"
-      ],
-      "x-ms-enum": {
-        "name": "ImageGenerationOutputFormat",
-        "modelAsString": true,
-        "values": [
-          {
-            "name": "png",
-            "value": "png",
-            "description": "Generates images in PNG format."
-          },
-          {
-            "name": "jpg",
-            "value": "jpg",
-            "description": "Generates images in JPEG format."
-          }
-        ]
-      }
-    },
-    "ImageGenerationPreset": {
-      "type": "string",
-      "description": "The preset size of the image to generate.",
-      "enum": [
-        "672x1566",
-        "768x1366",
-        "836x1254",
-        "916x1145",
-        "1024x1024",
-        "1145x916",
-        "1254x836",
-        "1366x768",
-        "1566x672"
-      ],
-      "x-ms-enum": {
-        "name": "ImageGenerationPreset",
-        "modelAsString": true,
-        "values": [
-          {
-            "name": "nine_twenty_one",
-            "value": "672x1566",
-            "description": "The size of the image is 672x1566 pixels."
-          },
-          {
-            "name": "nine_sixteen",
-            "value": "768x1366",
-            "description": "The size of the image is 768x1366 pixels."
-          },
-          {
-            "name": "two_three",
-            "value": "836x1254",
-            "description": "The size of the image is 836x1254 pixels."
-          },
-          {
-            "name": "four_five",
-            "value": "916x1145",
-            "description": "The size of the image is 916x1145 pixels."
-          },
-          {
-            "name": "one_one",
-            "value": "1024x1024",
-            "description": "The size of the image is 1024x1024 pixels."
-          },
-          {
-            "name": "five_four",
-            "value": "1145x916",
-            "description": "The size of the image is 1145x916 pixels."
-          },
-          {
-            "name": "three_two",
-            "value": "1254x836",
-            "description": "The size of the image is 1254x836 pixels."
-          },
-          {
-            "name": "sixteen_nine",
-            "value": "1366x768",
-            "description": "The size of the image is 1366x768 pixels."
-          },
-          {
-            "name": "twenty_one_nine",
-            "value": "1566x672",
-            "description": "The size of the image is 1566x672 pixels."
-          }
-        ]
-      }
-    },
-    "ImageGenerationPrompt": {
-      "type": "object",
-      "description": "Represents an image to use for guidance during the generation.",
-      "properties": {
-        "image": {
-          "type": "string",
-          "description": "The input image encoded in base64 string as a data URL.\nExample: `data:image/{format};base64,{data}`.\""
-        },
-        "strength": {
-          "type": "number",
-          "format": "float",
-          "description": "The degree at which the generation process uses the image prompt as guidance. 1 indicates\na generation process that fully follows the input image. 0 a generation a process that doesn't\ntake into consideration the prompt image.",
-          "default": 1
-        }
-      },
-      "required": [
-        "image"
-      ]
-    },
-    "ImageGenerationQuality": {
-      "type": "string",
-      "description": "The quality of the image to generate.",
-      "enum": [
-        "hd",
-        "standard"
-      ],
-      "x-ms-enum": {
-        "name": "ImageGenerationQuality",
-        "modelAsString": true,
-        "values": [
-          {
-            "name": "hd",
-            "value": "hd",
-            "description": "Generates images with the HD quality."
-          },
-          {
-            "name": "standard",
-            "value": "standard",
-            "description": "Generates images with standard quality."
-          }
-        ]
-      }
-    },
-    "ImageGenerationSize": {
-      "type": "object",
-      "description": "The size of the image to generate.",
-      "properties": {
-        "width": {
-          "type": "integer",
-          "format": "int32",
-          "description": "The width of the image to generate, in pixels."
-        },
-        "height": {
-          "type": "integer",
-          "format": "int32",
-          "description": "The height of the image to generate, in pixels."
-        }
-      },
-      "required": [
-        "width",
-        "height"
-      ]
-    },
-    "ImageGenerationUsage": {
-      "type": "object",
-      "description": "Usage counts for image generation API.",
-      "properties": {
-        "image_generations": {
-          "type": "integer",
-          "format": "int32",
-          "description": "The number of image generations performed."
-        }
-      },
-      "required": [
-        "image_generations"
-      ]
-    },
-    "ImageGenerationsOptions": {
-      "type": "object",
-      "description": "The configuration information for an image generation request.",
-      "properties": {
-        "prompt": {
-          "type": "string",
-          "description": "The prompt to use for the image geneartion. Read the model documentation to understand\nwhich language you should use to prompt the model and get specific results."
-        },
-        "negative_prompt": {
-          "type": "string",
-          "description": "Optional. The prompt to use for the negative image generation. Read the model documentation\nto understand to prompt the model and get specific results. If the model doesn't support\nnegative prompts, a 422 error is returned."
-        },
-        "image_prompt": {
-          "$ref": "#/definitions/ImageGenerationPrompt",
-          "description": "Optional. An image to use as guidance for the image generation process."
-        },
-        "size": {
-          "description": "The size of the image to generate."
-        },
-        "output_format": {
-          "type": "string",
-          "description": "Optional. The format in which to generate the image.\nReturns a 422 error if the model doesn't support the value or parameter.",
-          "default": "png",
-          "enum": [
-            "png",
-            "jpg"
-          ],
-          "x-ms-enum": {
-            "name": "ImageGenerationOutputFormat",
-            "modelAsString": true,
-            "values": [
-              {
-                "name": "png",
-                "value": "png",
-                "description": "Generates images in PNG format."
-              },
-              {
-                "name": "jpg",
-                "value": "jpg",
-                "description": "Generates images in JPEG format."
-              }
-            ]
-          }
-        },
-        "quality": {
-          "type": "string",
-          "description": "Optional. The quality of the image to generate.\nReturns a 422 error if the model doesn't support the value or parameter.",
-          "default": "standard",
-          "enum": [
-            "hd",
-            "standard"
-          ],
-          "x-ms-enum": {
-            "name": "ImageGenerationQuality",
-            "modelAsString": true,
-            "values": [
-              {
-                "name": "hd",
-                "value": "hd",
-                "description": "Generates images with the HD quality."
-              },
-              {
-                "name": "standard",
-                "value": "standard",
-                "description": "Generates images with standard quality."
-              }
-            ]
-          }
-        },
-        "n": {
-          "type": "integer",
-          "format": "int32",
-          "description": "The number of images to generate. Defaults to 1.",
-          "default": 1
-        },
-        "model": {
-          "type": "string",
-          "description": "ID of the specific AI model to use, if more than one model is available on the endpoint."
-        },
-        "seed": {
-          "type": "integer",
-          "format": "int64",
-          "description": "If specified, the system will make a best effort to sample deterministically such that repeated requests with the\nsame seed and parameters should return the same result. Determinism is not guaranteed."
-        }
-      },
-      "required": [
-        "prompt",
-        "size",
-        "n"
-      ],
-      "additionalProperties": {}
-    },
-    "ImageGenerationsResult": {
-      "type": "object",
-      "description": "The result of an image generation request.",
-      "properties": {
-        "id": {
-          "type": "string",
-          "description": "Unique identifier for the generation result."
-        },
-        "created": {
-          "type": "integer",
-          "format": "unixtime",
-          "description": "The first timestamp associated with generation activity for this completions response,\nrepresented as seconds since the beginning of the Unix epoch of 00:00 on 1 Jan 1970."
-        },
-        "model": {
-          "type": "string",
-          "description": "The model used for the chat completion."
-        },
-        "data": {
-          "type": "array",
-          "description": "Embedding values for the prompts submitted in the request.",
-          "items": {
-            "$ref": "#/definitions/ImageGenerationItem"
-          },
-          "x-ms-identifiers": []
-        },
-        "usage": {
-          "$ref": "#/definitions/ImageGenerationUsage",
-          "description": "Usage counts for tokens input using the embeddings API."
-        },
-        "object": {
-          "type": "string",
-          "description": "The object type of the image generation result. Will always be `list`.",
-          "enum": [
-            "image.generation"
-          ],
-          "x-ms-enum": {
-            "modelAsString": false
-          }
-        }
-      },
-      "required": [
-        "id",
-        "created",
-        "model",
-        "data",
-        "usage",
-        "object"
-      ]
-    },
-    "ModelInfo": {
-      "type": "object",
-      "description": "Represents some basic information about the AI model.",
-      "properties": {
-        "model_name": {
-          "type": "string",
-          "description": "The name of the AI model. For example: `Phi21`",
-          "readOnly": true
-        },
-        "model_type": {
-          "$ref": "#/definitions/ModelType",
-          "description": "The type of the AI model. A Unique identifier for the profile.",
-          "readOnly": true
-        },
-        "model_provider_name": {
-          "type": "string",
-          "description": "The model provider name. For example: `Microsoft`",
-          "readOnly": true
-        }
-      },
-      "required": [
-        "model_name",
-        "model_type",
-        "model_provider_name"
-      ]
-    },
-    "ModelType": {
-      "type": "string",
-      "description": "The type of AI model",
-      "enum": [
-        "embeddings",
-        "chat-completion"
-      ],
-      "x-ms-enum": {
-        "name": "ModelType",
-        "modelAsString": true,
-        "values": [
-          {
-            "name": "embeddings",
-            "value": "embeddings",
-            "description": "A model capable of generating embeddings from a text"
-          },
-          {
-            "name": "chat_completion",
-            "value": "chat-completion",
-            "description": "A model capable of taking chat-formatted messages and generate responses"
-          }
-        ]
-      }
-    },
-    "PromptUsageDetails": {
-      "type": "object",
-      "description": "A breakdown of tokens used in the prompt/chat history.",
-      "properties": {
-        "audio_tokens": {
-          "type": "integer",
-          "format": "int32",
-          "description": "The number of tokens corresponding to audio input.",
-          "readOnly": true
-        },
-        "cached_tokens": {
-          "type": "integer",
-          "format": "int32",
-          "description": "The total number of tokens cached.",
-          "readOnly": true
-        }
-      },
-      "required": [
-        "audio_tokens",
-        "cached_tokens"
-      ]
-    },
-    "StreamingChatChoiceUpdate": {
-      "type": "object",
-      "description": "Represents an update to a single prompt completion when the service is streaming updates \nusing Server Sent Events (SSE).\nGenerally, `n` choices are generated per provided prompt with a default value of 1.\nToken limits and other settings may limit the number of choices generated.",
-      "properties": {
-        "index": {
-          "type": "integer",
-          "format": "int32",
-          "description": "The ordered index associated with this chat completions choice."
-        },
-        "finish_reason": {
-          "$ref": "#/definitions/CompletionsFinishReason",
-          "description": "The reason that this chat completions choice completed its generated.",
-          "x-nullable": true,
-          "readOnly": true
-        },
-        "delta": {
-          "$ref": "#/definitions/StreamingChatResponseMessageUpdate",
-          "description": "An update to the chat message for a given chat completions prompt.",
-          "readOnly": true
-        }
-      },
-      "required": [
-        "index",
-        "finish_reason",
-        "delta"
-      ]
-    },
-    "StreamingChatCompletionsUpdate": {
-      "type": "object",
-      "description": "Represents a response update to a chat completions request, when the service is streaming updates \nusing Server Sent Events (SSE).\nCompletions support a wide variety of tasks and generate text that continues from or \"completes\"\nprovided prompt data.",
-      "properties": {
-        "id": {
-          "type": "string",
-          "description": "A unique identifier associated with this chat completions response."
-        },
-        "object": {
-          "type": "string",
-          "description": "The response object type, which is always `chat.completion`.",
-          "enum": [
-            "chat.completion"
-          ],
-          "x-ms-enum": {
-            "modelAsString": false
-          }
-        },
-        "created": {
-          "type": "integer",
-          "format": "unixtime",
-          "description": "The first timestamp associated with generation activity for this completions response,\nrepresented as seconds since the beginning of the Unix epoch of 00:00 on 1 Jan 1970.",
-          "readOnly": true
-        },
-        "model": {
-          "type": "string",
-          "description": "The model used for the chat completion.",
-          "readOnly": true
-        },
-        "choices": {
-          "type": "array",
-          "description": "An update to the collection of completion choices associated with this completions response.\nGenerally, `n` choices are generated per provided prompt with a default value of 1.\nToken limits and other settings may limit the number of choices generated.",
-          "minItems": 1,
-          "items": {
-            "$ref": "#/definitions/StreamingChatChoiceUpdate"
-          },
-          "readOnly": true,
-          "x-ms-identifiers": []
-        },
-        "usage": {
-          "$ref": "#/definitions/CompletionsUsage",
-          "description": "Usage information for tokens processed and generated as part of this completions operation.",
-          "readOnly": true
-        }
-      },
-      "required": [
-        "id",
-        "object",
-        "created",
-        "model",
-        "choices"
-      ]
-    },
-    "StreamingChatResponseMessageUpdate": {
-      "type": "object",
-      "description": "A representation of a chat message update as received in a streaming response.",
-      "properties": {
-        "role": {
-          "$ref": "#/definitions/ChatRole",
-          "description": "The chat role associated with the message. If present, should always be 'assistant'",
-          "readOnly": true
-        },
-        "content": {
-          "type": "string",
-          "description": "The content of the message.",
-          "readOnly": true
-        },
-        "reasoning_content": {
-          "type": "string",
-          "description": "The reasoning content the model used for generating the response",
-          "readOnly": true
-        },
-        "tool_calls": {
-          "type": "array",
-          "description": "The tool calls that must be resolved and have their outputs appended to subsequent input messages for the chat\ncompletions request to resolve as configured.",
-          "items": {
-            "$ref": "#/definitions/StreamingChatResponseToolCallUpdate"
-          },
-          "readOnly": true,
-          "x-ms-client-name": "toolCalls"
-        }
-      }
-    },
-    "StreamingChatResponseToolCallUpdate": {
-      "type": "object",
-      "description": "An update to the function tool call information requested by the AI model.",
-      "properties": {
-        "id": {
-          "type": "string",
-          "description": "The ID of the tool call.",
-          "readOnly": true
-        },
-        "function": {
-          "$ref": "#/definitions/FunctionCall",
-          "description": "Updates to the function call requested by the AI model.",
-          "readOnly": true
-        }
-      },
-      "required": [
-        "id",
-        "function"
-      ]
-    },
-    "UserSecurityContext": {
-      "type": "object",
-      "description": "User security context contains several parameters that describe the AI application itself, and the end user that interacts with the AI application. \nThese fields assist your security operations teams to investigate and mitigate security incidents by providing a comprehensive approach to protecting your AI applications. \n[Learn more](https://aka.ms/TP4AI/Documentation/EndUserContext) about protecting AI applications using Microsoft Defender for Cloud.",
-      "properties": {
-        "application_name": {
-          "type": "string",
-          "description": "The name of the application. Sensitive personal information should not be included in this field.",
-          "maxLength": 100
-        },
-        "end_user_id": {
-          "type": "string",
-          "description": "This identifier is the Microsoft Entra ID (formerly Azure Active Directory) user object ID used to authenticate end-users within the generative AI application. Sensitive personal information should not be included in this field.",
-          "minLength": 36,
-          "maxLength": 36,
-          "pattern": "^[0-9a-fA-F]{8}-[0-9a-fA-F]{4}-[0-9a-fA-F]{4}-[0-9a-fA-F]{4}-[0-9a-fA-F]{12}$"
-        },
-        "end_user_tenant_id": {
-          "type": "string",
-          "description": "The Microsoft 365 tenant ID the end user belongs to. It's required when the generative AI application is multi tenant.",
-          "minLength": 36,
-          "maxLength": 36,
-          "pattern": "^[0-9a-fA-F]{8}-[0-9a-fA-F]{4}-[0-9a-fA-F]{4}-[0-9a-fA-F]{4}-[0-9a-fA-F]{12}$"
-        },
-        "source_ip": {
-          "type": "string",
-          "description": "Captures the original client's IP address, accepting both IPv4 and IPv6 formats.",
-          "minLength": 2,
-          "maxLength": 45
-        }
-      }
-    }
-  },
-  "parameters": {
-    "Azure.Core.Foundations.ApiVersionParameter": {
-      "name": "api-version",
-      "in": "query",
-      "description": "The API version to use for this operation.",
-      "required": true,
-      "type": "string",
-      "minLength": 1,
-      "x-ms-parameter-location": "method",
-      "x-ms-client-name": "apiVersion"
-    }
-  }
-}
diff --git a/specification/ai/data-plane/ModelInference/readme.md b/specification/ai/data-plane/ModelInference/readme.md
index afb449f03f69..172805b24d20 100644
--- a/specification/ai/data-plane/ModelInference/readme.md
+++ b/specification/ai/data-plane/ModelInference/readme.md
@@ -4,10 +4,10 @@
 
 This is the AutoRest configuration file for Model Inference API.
 
-The current release is `2025-05-01`.
+The current release is `2024-05-01-preview`.
 
 ```yaml
-tag: 2025-05-01
+tag: 2024-05-01-preview
 openapi-type: data-plane
 ```
 
@@ -26,54 +26,3 @@ suppressions:
     reason: OpenAI compatible API, which uses unixTimeStamp
 ```
 
-### Release 2025-04-01
-
-These settings apply only when `--tag=2025-04-01` is specified on the command line.
-
-```yaml $(tag) == '2025-04-01'
-input-file:
-  - stable/2025-04-01/openapi.json
-suppressions:
-  - code: IntegerTypeMustHaveFormat
-    from: openapi.json
-    reason: OpenAI compatible API, which uses unixTimeStamp
-```
-
-### Release 2025-05-01
-
-These settings apply only when `--tag=2025-05-01` is specified on the command line.
-
-```yaml $(tag) == '2025-05-01'
-input-file:
-  - stable/2025-05-01/openapi.json
-suppressions:
-  - code: IntegerTypeMustHaveFormat
-    from: openapi.json
-    reason: OpenAI compatible API, which uses unixTimeStamp
-  - code: PropertyType
-    from: openapi.json
-    reason: External API shape is defined in OpenAPI 3.0 as oneOf. 2.0 doesn't support union.
-  - code: EnumInsteadOfBoolean
-    from: openapi.json
-    reason: OpenAI compatible API, which uses boolean.
-```
-
-### Release 2025-05-15-preview
-
-These settings apply only when `--tag=2025-05-15-preview` is specified on the command line.
-
-```yaml $(tag) == '2025-05-15-preview'
-input-file:
-  - preview/2025-05-15-preview/openapi.json
-suppressions:
-  - code: IntegerTypeMustHaveFormat
-    from: openapi.json
-    reason: OpenAI compatible API, which uses unixTimeStamp
-  - code: PropertyType
-    from: openapi.json
-    reason: External API shape is defined in OpenAPI 3.0 as oneOf. 2.0 doesn't support union.
-  - code: EnumInsteadOfBoolean
-    from: openapi.json
-    reason: OpenAI compatible API, which uses boolean.
-```
-
diff --git a/specification/ai/data-plane/ModelInference/stable/2025-04-01/examples/GetChatCompletions_AudioModality_Gen.json b/specification/ai/data-plane/ModelInference/stable/2025-04-01/examples/GetChatCompletions_AudioModality_Gen.json
deleted file mode 100644
index 01ebe10c8908..000000000000
--- a/specification/ai/data-plane/ModelInference/stable/2025-04-01/examples/GetChatCompletions_AudioModality_Gen.json
+++ /dev/null
@@ -1,99 +0,0 @@
-{
-  "title": "Audio modality chat completion",
-  "operationId": "GetChatCompletions",
-  "parameters": {
-    "api-version": "2025-04-01",
-    "extra-parameters": "error",
-    "body": {
-      "modalities": [
-        "text",
-        "audio"
-      ],
-      "messages": [
-        {
-          "role": "system",
-          "content": "You are a helpful assistant"
-        },
-        {
-          "role": "user",
-          "content": [
-            {
-              "type": "input_audio",
-              "input_audio": {
-                "data": "<base64 encoded audio data>",
-                "format": "wav"
-              }
-            }
-          ]
-        },
-        {
-          "role": "assistant",
-          "content": null,
-          "audio": {
-            "id": "abcdef1234"
-          }
-        },
-        {
-          "role": "user",
-          "content": [
-            {
-              "type": "input_audio",
-              "input_audio": {
-                "data": "<base64 encoded audio data>",
-                "format": "wav"
-              }
-            }
-          ]
-        }
-      ],
-      "frequency_penalty": 0,
-      "presence_penalty": 0,
-      "temperature": 0,
-      "top_p": 0,
-      "seed": 21,
-      "model": "my-model-name"
-    }
-  },
-  "responses": {
-    "200": {
-      "body": {
-        "id": "kgousajxgzyhugvqekuswuqbk",
-        "object": "chat.completion",
-        "created": 1696522361,
-        "model": "my-model-name",
-        "usage": {
-          "completion_tokens": 19,
-          "prompt_tokens": 28,
-          "total_tokens": 16,
-          "completion_tokens_details": {
-            "audio_tokens": 5,
-            "reasoning_tokens": 0,
-            "total_tokens": 5
-          },
-          "prompt_tokens_details": {
-            "audio_tokens": 10,
-            "cached_tokens": 0
-          }
-        },
-        "choices": [
-          {
-            "index": 0,
-            "finish_reason": "stop",
-            "message": {
-              "role": "assistant",
-              "content": null,
-              "tool_calls": null,
-              "audio": {
-                "id": "abcdef1234",
-                "format": "wav",
-                "data": "<base64 encoded audio data>",
-                "expires_at": 1896522361,
-                "transcript": "This is a sample transcript"
-              }
-            }
-          }
-        ]
-      }
-    }
-  }
-}
diff --git a/specification/ai/data-plane/ModelInference/stable/2025-04-01/examples/GetChatCompletions_MaximumSet_Gen.json b/specification/ai/data-plane/ModelInference/stable/2025-04-01/examples/GetChatCompletions_MaximumSet_Gen.json
deleted file mode 100644
index 1ce6d85f1c7e..000000000000
--- a/specification/ai/data-plane/ModelInference/stable/2025-04-01/examples/GetChatCompletions_MaximumSet_Gen.json
+++ /dev/null
@@ -1,95 +0,0 @@
-{
-  "title": "maximum set chat completion",
-  "operationId": "GetChatCompletions",
-  "parameters": {
-    "api-version": "2025-04-01",
-    "extra-parameters": "error",
-    "body": {
-      "modalities": [
-        "text"
-      ],
-      "messages": [
-        {
-          "role": "system",
-          "content": "You are a helpful assistant"
-        },
-        {
-          "role": "user",
-          "content": "Explain Riemann's conjecture"
-        },
-        {
-          "role": "assistant",
-          "content": "The Riemann Conjecture is a deep mathematical conjecture around prime numbers and how they can be predicted. It was first published in Riemann's groundbreaking 1859 paper. The conjecture states that the Riemann zeta function has its zeros only at the negative even integers and complex numbers with real part 1/21. Many consider it to be the most important unsolved problem in pure mathematics. The Riemann hypothesis is a way to predict the probability that numbers in a certain range are prime that was also devised by German mathematician Bernhard Riemann in 18594."
-        },
-        {
-          "role": "user",
-          "content": "Ist it proved?"
-        }
-      ],
-      "frequency_penalty": 0,
-      "stream": true,
-      "presence_penalty": 0,
-      "temperature": 0,
-      "top_p": 0,
-      "max_tokens": 255,
-      "response_format": {
-        "type": "text"
-      },
-      "stop": [
-        "<|endoftext|>"
-      ],
-      "tools": [
-        {
-          "type": "function",
-          "function": {
-            "name": "my-function-name",
-            "description": "A function useful to know if a theroem is proved or not"
-          }
-        }
-      ],
-      "seed": 21,
-      "model": "my-model-name"
-    }
-  },
-  "responses": {
-    "200": {
-      "body": {
-        "id": "kgousajxgzyhugvqekuswuqbk",
-        "object": "chat.completion",
-        "created": 18,
-        "model": "my-model-name",
-        "usage": {
-          "completion_tokens": 19,
-          "prompt_tokens": 28,
-          "total_tokens": 16,
-          "completion_tokens_details": {
-            "audio_tokens": 5,
-            "reasoning_tokens": 0,
-            "total_tokens": 5
-          }
-        },
-        "choices": [
-          {
-            "index": 7,
-            "finish_reason": "stop",
-            "message": {
-              "role": "assistant",
-              "content": null,
-              "reasoning_content": null,
-              "tool_calls": [
-                {
-                  "id": "yrobmilsrugmbwukmzo",
-                  "type": "function",
-                  "function": {
-                    "name": "my-function-name",
-                    "arguments": "{ \"arg1\": \"value1\", \"arg2\": \"value2\" }"
-                  }
-                }
-              ]
-            }
-          }
-        ]
-      }
-    }
-  }
-}
diff --git a/specification/ai/data-plane/ModelInference/stable/2025-04-01/examples/GetChatCompletions_MinimumSet_Gen.json b/specification/ai/data-plane/ModelInference/stable/2025-04-01/examples/GetChatCompletions_MinimumSet_Gen.json
deleted file mode 100644
index 75fce220d10e..000000000000
--- a/specification/ai/data-plane/ModelInference/stable/2025-04-01/examples/GetChatCompletions_MinimumSet_Gen.json
+++ /dev/null
@@ -1,40 +0,0 @@
-{
-  "title": "minimum set chat completion",
-  "operationId": "GetChatCompletions",
-  "parameters": {
-    "api-version": "2025-04-01",
-    "body": {
-      "messages": [
-        {
-          "role": "user",
-          "content": "Explain Riemann's conjecture"
-        }
-      ]
-    }
-  },
-  "responses": {
-    "200": {
-      "body": {
-        "id": "kgousajxgzyhugvqekuswuqbk",
-        "object": "chat.completion",
-        "created": 1234567890,
-        "model": "my-model-name",
-        "usage": {
-          "prompt_tokens": 205,
-          "completion_tokens": 5,
-          "total_tokens": 210
-        },
-        "choices": [
-          {
-            "index": 0,
-            "finish_reason": "stop",
-            "message": {
-              "role": "assistant",
-              "content": "The Riemann Conjecture is a deep mathematical conjecture around prime numbers and how they can be predicted. It was first published in Riemann's groundbreaking 1859 paper. The conjecture states that the Riemann zeta function has its zeros only at the negative even integers and complex numbers with real part 1/21. Many consider it to be the most important unsolved problem in pure mathematics. The Riemann hypothesis is a way to predict the probability that numbers in a certain range are prime that was also devised by German mathematician Bernhard Riemann in 18594"
-            }
-          }
-        ]
-      }
-    }
-  }
-}
diff --git a/specification/ai/data-plane/ModelInference/stable/2025-04-01/examples/GetEmbeddings_MaximumSet_Gen.json b/specification/ai/data-plane/ModelInference/stable/2025-04-01/examples/GetEmbeddings_MaximumSet_Gen.json
deleted file mode 100644
index 67e4b8a1440e..000000000000
--- a/specification/ai/data-plane/ModelInference/stable/2025-04-01/examples/GetEmbeddings_MaximumSet_Gen.json
+++ /dev/null
@@ -1,50 +0,0 @@
-{
-  "title": "maximum set embeddings",
-  "operationId": "GetEmbeddings",
-  "parameters": {
-    "api-version": "2025-04-01",
-    "extra-parameters": "error",
-    "body": {
-      "input": [
-        "This is a very good text"
-      ],
-      "dimensions": 1024,
-      "encoding_format": "float",
-      "input_type": "text",
-      "model": "my-model-name"
-    }
-  },
-  "responses": {
-    "200": {
-      "body": {
-        "id": "cknxthfa",
-        "data": [
-          {
-            "index": 0,
-            "object": "embedding",
-            "embedding": [
-              0,
-              0,
-              0,
-              0,
-              0,
-              0,
-              0,
-              0,
-              0,
-              0,
-              0,
-              0
-            ]
-          }
-        ],
-        "object": "list",
-        "model": "my-model-name",
-        "usage": {
-          "prompt_tokens": 15,
-          "total_tokens": 15
-        }
-      }
-    }
-  }
-}
diff --git a/specification/ai/data-plane/ModelInference/stable/2025-04-01/examples/GetEmbeddings_MinimumSet_Gen.json b/specification/ai/data-plane/ModelInference/stable/2025-04-01/examples/GetEmbeddings_MinimumSet_Gen.json
deleted file mode 100644
index 7225ec8a8c14..000000000000
--- a/specification/ai/data-plane/ModelInference/stable/2025-04-01/examples/GetEmbeddings_MinimumSet_Gen.json
+++ /dev/null
@@ -1,45 +0,0 @@
-{
-  "title": "minimum set embeddings",
-  "operationId": "GetEmbeddings",
-  "parameters": {
-    "api-version": "2025-04-01",
-    "body": {
-      "input": [
-        "This is a very good text"
-      ]
-    }
-  },
-  "responses": {
-    "200": {
-      "body": {
-        "id": "cknxthfa",
-        "data": [
-          {
-            "index": 0,
-            "object": "embedding",
-            "embedding": [
-              0,
-              0,
-              0,
-              0,
-              0,
-              0,
-              0,
-              0,
-              0,
-              0,
-              0,
-              0
-            ]
-          }
-        ],
-        "object": "list",
-        "model": "my-model-name",
-        "usage": {
-          "prompt_tokens": 15,
-          "total_tokens": 15
-        }
-      }
-    }
-  }
-}
diff --git a/specification/ai/data-plane/ModelInference/stable/2025-04-01/examples/GetImageEmbeddings_MaximumSet_Gen.json b/specification/ai/data-plane/ModelInference/stable/2025-04-01/examples/GetImageEmbeddings_MaximumSet_Gen.json
deleted file mode 100644
index 7bf59bb84883..000000000000
--- a/specification/ai/data-plane/ModelInference/stable/2025-04-01/examples/GetImageEmbeddings_MaximumSet_Gen.json
+++ /dev/null
@@ -1,53 +0,0 @@
-{
-  "title": "maximum set image embeddings",
-  "operationId": "GetImageEmbeddings",
-  "parameters": {
-    "api-version": "2025-04-01",
-    "extra-parameters": "error",
-    "body": {
-      "input": [
-        {
-          "image": "puqkvvlvgcjyzughesnkena",
-          "text": "azrzyjsmnuefqpowpvfmyobeehqsni"
-        }
-      ],
-      "dimensions": 1024,
-      "encoding_format": "float",
-      "input_type": "text",
-      "model": "my-model-name"
-    }
-  },
-  "responses": {
-    "200": {
-      "body": {
-        "id": "cknxthfa",
-        "data": [
-          {
-            "index": 0,
-            "object": "embedding",
-            "embedding": [
-              0,
-              0,
-              0,
-              0,
-              0,
-              0,
-              0,
-              0,
-              0,
-              0,
-              0,
-              0
-            ]
-          }
-        ],
-        "object": "list",
-        "model": "my-model-name",
-        "usage": {
-          "prompt_tokens": 15,
-          "total_tokens": 15
-        }
-      }
-    }
-  }
-}
diff --git a/specification/ai/data-plane/ModelInference/stable/2025-04-01/examples/GetImageEmbeddings_MinimumSet_Gen.json b/specification/ai/data-plane/ModelInference/stable/2025-04-01/examples/GetImageEmbeddings_MinimumSet_Gen.json
deleted file mode 100644
index c9963a729a2b..000000000000
--- a/specification/ai/data-plane/ModelInference/stable/2025-04-01/examples/GetImageEmbeddings_MinimumSet_Gen.json
+++ /dev/null
@@ -1,47 +0,0 @@
-{
-  "title": "minimum set image embeddings",
-  "operationId": "GetImageEmbeddings",
-  "parameters": {
-    "api-version": "2025-04-01",
-    "body": {
-      "input": [
-        {
-          "image": "gvmojtfooxixxzayrditjlyymg"
-        }
-      ]
-    }
-  },
-  "responses": {
-    "200": {
-      "body": {
-        "id": "cknxthfa",
-        "data": [
-          {
-            "index": 0,
-            "object": "embedding",
-            "embedding": [
-              0,
-              0,
-              0,
-              0,
-              0,
-              0,
-              0,
-              0,
-              0,
-              0,
-              0,
-              0
-            ]
-          }
-        ],
-        "object": "list",
-        "model": "my-model-name",
-        "usage": {
-          "prompt_tokens": 15,
-          "total_tokens": 15
-        }
-      }
-    }
-  }
-}
diff --git a/specification/ai/data-plane/ModelInference/stable/2025-04-01/examples/GetModelInfo_MaximumSet_Gen.json b/specification/ai/data-plane/ModelInference/stable/2025-04-01/examples/GetModelInfo_MaximumSet_Gen.json
deleted file mode 100644
index 6a94e8c02b7a..000000000000
--- a/specification/ai/data-plane/ModelInference/stable/2025-04-01/examples/GetModelInfo_MaximumSet_Gen.json
+++ /dev/null
@@ -1,17 +0,0 @@
-{
-  "title": "maximum set model information",
-  "operationId": "GetModelInfo",
-  "parameters": {
-    "api-version": "2025-04-01",
-    "model": "Phi-3.5-mini-instruct"
-  },
-  "responses": {
-    "200": {
-      "body": {
-        "model_name": "Phi-3.5-mini-instruct",
-        "model_type": "chat-completion",
-        "model_provider_name": "Microsoft"
-      }
-    }
-  }
-}
diff --git a/specification/ai/data-plane/ModelInference/stable/2025-04-01/examples/GetModelInfo_MinimumSet_Gen.json b/specification/ai/data-plane/ModelInference/stable/2025-04-01/examples/GetModelInfo_MinimumSet_Gen.json
deleted file mode 100644
index c5c343992c7d..000000000000
--- a/specification/ai/data-plane/ModelInference/stable/2025-04-01/examples/GetModelInfo_MinimumSet_Gen.json
+++ /dev/null
@@ -1,17 +0,0 @@
-{
-  "title": "minimum set model information",
-  "operationId": "GetModelInfo",
-  "parameters": {
-    "api-version": "2025-04-01",
-    "model": "Phi-3.5-mini-instruct"
-  },
-  "responses": {
-    "200": {
-      "body": {
-        "model_name": "Phi-3.5-mini-instruct",
-        "model_type": "chat-completion",
-        "model_provider_name": "Microsoft"
-      }
-    }
-  }
-}
diff --git a/specification/ai/data-plane/ModelInference/stable/2025-04-01/openapi.json b/specification/ai/data-plane/ModelInference/stable/2025-04-01/openapi.json
deleted file mode 100644
index f74221d9f513..000000000000
--- a/specification/ai/data-plane/ModelInference/stable/2025-04-01/openapi.json
+++ /dev/null
@@ -1,1918 +0,0 @@
-{
-  "swagger": "2.0",
-  "info": {
-    "title": "AI Model Inference",
-    "version": "2025-04-01",
-    "x-typespec-generated": [
-      {
-        "emitter": "@azure-tools/typespec-autorest"
-      }
-    ]
-  },
-  "schemes": [
-    "https"
-  ],
-  "x-ms-parameterized-host": {
-    "hostTemplate": "https://{resource}.services.ai.azure.com/api/models",
-    "useSchemePrefix": false,
-    "parameters": [
-      {
-        "name": "resource",
-        "in": "path",
-        "description": "The Azure AI Services resource name, for example 'my-resource'",
-        "required": true,
-        "type": "string"
-      }
-    ]
-  },
-  "produces": [
-    "application/json"
-  ],
-  "consumes": [
-    "application/json"
-  ],
-  "security": [
-    {
-      "ApiKeyAuth": []
-    },
-    {
-      "OAuth2Auth": [
-        "https://cognitiveservices.azure.com/.default"
-      ]
-    }
-  ],
-  "securityDefinitions": {
-    "ApiKeyAuth": {
-      "type": "apiKey",
-      "name": "api-key",
-      "in": "header"
-    },
-    "OAuth2Auth": {
-      "type": "oauth2",
-      "flow": "implicit",
-      "authorizationUrl": "https://login.microsoftonline.com/common/oauth2/v2.0/authorize",
-      "scopes": {
-        "https://cognitiveservices.azure.com/.default": ""
-      }
-    }
-  },
-  "tags": [],
-  "paths": {
-    "/chat/completions": {
-      "post": {
-        "operationId": "GetChatCompletions",
-        "description": "Gets chat completions for the provided chat messages.\nCompletions support a wide variety of tasks and generate text that continues from or \"completes\"\nprovided prompt data. The method makes a REST API call to the `/chat/completions` route\non the given endpoint.",
-        "parameters": [
-          {
-            "$ref": "#/parameters/Azure.Core.Foundations.ApiVersionParameter"
-          },
-          {
-            "name": "extra-parameters",
-            "in": "header",
-            "description": "Controls what happens if extra parameters, undefined by the REST API,\nare passed in the JSON request payload.\nThis sets the HTTP request header `extra-parameters`.",
-            "required": false,
-            "type": "string",
-            "enum": [
-              "error",
-              "drop",
-              "pass-through"
-            ],
-            "x-ms-enum": {
-              "name": "ExtraParameters",
-              "modelAsString": true,
-              "values": [
-                {
-                  "name": "error",
-                  "value": "error",
-                  "description": "The service will error if it detected extra parameters in the request payload. This is the service default."
-                },
-                {
-                  "name": "drop",
-                  "value": "drop",
-                  "description": "The service will ignore (drop) extra parameters in the request payload. It will only pass the known parameters to the back-end AI model."
-                },
-                {
-                  "name": "pass_through",
-                  "value": "pass-through",
-                  "description": "The service will pass extra parameters to the back-end AI model."
-                }
-              ]
-            },
-            "x-ms-client-name": "extra_params"
-          },
-          {
-            "name": "body",
-            "in": "body",
-            "description": "The parameters of the chat completions request.",
-            "required": true,
-            "schema": {
-              "$ref": "#/definitions/ChatCompletionsOptions"
-            }
-          }
-        ],
-        "responses": {
-          "200": {
-            "description": "The request has succeeded.",
-            "schema": {
-              "$ref": "#/definitions/ChatCompletions"
-            }
-          },
-          "default": {
-            "description": "An unexpected error response.",
-            "schema": {
-              "$ref": "#/definitions/Azure.Core.Foundations.ErrorResponse"
-            },
-            "headers": {
-              "x-ms-error-code": {
-                "type": "string",
-                "description": "String error code indicating what went wrong."
-              }
-            }
-          }
-        },
-        "x-ms-examples": {
-          "Audio modality chat completion": {
-            "$ref": "./examples/GetChatCompletions_AudioModality_Gen.json"
-          },
-          "maximum set chat completion": {
-            "$ref": "./examples/GetChatCompletions_MaximumSet_Gen.json"
-          },
-          "minimum set chat completion": {
-            "$ref": "./examples/GetChatCompletions_MinimumSet_Gen.json"
-          }
-        }
-      }
-    },
-    "/embeddings": {
-      "post": {
-        "operationId": "GetEmbeddings",
-        "description": "Return the embedding vectors for given text prompts.\nThe method makes a REST API call to the `/embeddings` route on the given endpoint.",
-        "parameters": [
-          {
-            "$ref": "#/parameters/Azure.Core.Foundations.ApiVersionParameter"
-          },
-          {
-            "name": "extra-parameters",
-            "in": "header",
-            "description": "Controls what happens if extra parameters, undefined by the REST API,\nare passed in the JSON request payload.\nThis sets the HTTP request header `extra-parameters`.",
-            "required": false,
-            "type": "string",
-            "enum": [
-              "error",
-              "drop",
-              "pass-through"
-            ],
-            "x-ms-enum": {
-              "name": "ExtraParameters",
-              "modelAsString": true,
-              "values": [
-                {
-                  "name": "error",
-                  "value": "error",
-                  "description": "The service will error if it detected extra parameters in the request payload. This is the service default."
-                },
-                {
-                  "name": "drop",
-                  "value": "drop",
-                  "description": "The service will ignore (drop) extra parameters in the request payload. It will only pass the known parameters to the back-end AI model."
-                },
-                {
-                  "name": "pass_through",
-                  "value": "pass-through",
-                  "description": "The service will pass extra parameters to the back-end AI model."
-                }
-              ]
-            },
-            "x-ms-client-name": "extra_params"
-          },
-          {
-            "name": "body",
-            "in": "body",
-            "description": "The parameters of the embeddings request.",
-            "required": true,
-            "schema": {
-              "$ref": "#/definitions/EmbeddingsOptions"
-            }
-          }
-        ],
-        "responses": {
-          "200": {
-            "description": "The request has succeeded.",
-            "schema": {
-              "$ref": "#/definitions/EmbeddingsResult"
-            }
-          },
-          "default": {
-            "description": "An unexpected error response.",
-            "schema": {
-              "$ref": "#/definitions/Azure.Core.Foundations.ErrorResponse"
-            },
-            "headers": {
-              "x-ms-error-code": {
-                "type": "string",
-                "description": "String error code indicating what went wrong."
-              }
-            }
-          }
-        },
-        "x-ms-examples": {
-          "maximum set embeddings": {
-            "$ref": "./examples/GetEmbeddings_MaximumSet_Gen.json"
-          },
-          "minimum set embeddings": {
-            "$ref": "./examples/GetEmbeddings_MinimumSet_Gen.json"
-          }
-        }
-      }
-    },
-    "/images/embeddings": {
-      "post": {
-        "operationId": "GetImageEmbeddings",
-        "description": "Return the embedding vectors for given images.\nThe method makes a REST API call to the `/images/embeddings` route on the given endpoint.",
-        "parameters": [
-          {
-            "$ref": "#/parameters/Azure.Core.Foundations.ApiVersionParameter"
-          },
-          {
-            "name": "extra-parameters",
-            "in": "header",
-            "description": "Controls what happens if extra parameters, undefined by the REST API,\nare passed in the JSON request payload.\nThis sets the HTTP request header `extra-parameters`.",
-            "required": false,
-            "type": "string",
-            "enum": [
-              "error",
-              "drop",
-              "pass-through"
-            ],
-            "x-ms-enum": {
-              "name": "ExtraParameters",
-              "modelAsString": true,
-              "values": [
-                {
-                  "name": "error",
-                  "value": "error",
-                  "description": "The service will error if it detected extra parameters in the request payload. This is the service default."
-                },
-                {
-                  "name": "drop",
-                  "value": "drop",
-                  "description": "The service will ignore (drop) extra parameters in the request payload. It will only pass the known parameters to the back-end AI model."
-                },
-                {
-                  "name": "pass_through",
-                  "value": "pass-through",
-                  "description": "The service will pass extra parameters to the back-end AI model."
-                }
-              ]
-            },
-            "x-ms-client-name": "extra_params"
-          },
-          {
-            "name": "body",
-            "in": "body",
-            "description": "The parameters of the image embeddings request.",
-            "required": true,
-            "schema": {
-              "$ref": "#/definitions/ImageEmbeddingsOptions"
-            }
-          }
-        ],
-        "responses": {
-          "200": {
-            "description": "The request has succeeded.",
-            "schema": {
-              "$ref": "#/definitions/EmbeddingsResult"
-            }
-          },
-          "default": {
-            "description": "An unexpected error response.",
-            "schema": {
-              "$ref": "#/definitions/Azure.Core.Foundations.ErrorResponse"
-            },
-            "headers": {
-              "x-ms-error-code": {
-                "type": "string",
-                "description": "String error code indicating what went wrong."
-              }
-            }
-          }
-        },
-        "x-ms-examples": {
-          "maximum set image embeddings": {
-            "$ref": "./examples/GetImageEmbeddings_MaximumSet_Gen.json"
-          },
-          "minimum set image embeddings": {
-            "$ref": "./examples/GetImageEmbeddings_MinimumSet_Gen.json"
-          }
-        }
-      }
-    },
-    "/info": {
-      "get": {
-        "operationId": "GetModelInfo",
-        "description": "Returns information about the AI model deployed.\nThe method makes a REST API call to the `/info` route on the given endpoint.\nThis method will only work when using Serverless API, Managed Compute, or Model .\ninference endpoint. Azure OpenAI endpoints don't support i.",
-        "parameters": [
-          {
-            "$ref": "#/parameters/Azure.Core.Foundations.ApiVersionParameter"
-          },
-          {
-            "name": "model",
-            "in": "query",
-            "description": "The model deployment name you want information from.",
-            "required": false,
-            "type": "string"
-          }
-        ],
-        "responses": {
-          "200": {
-            "description": "The request has succeeded.",
-            "schema": {
-              "$ref": "#/definitions/ModelInfo"
-            }
-          },
-          "default": {
-            "description": "An unexpected error response.",
-            "schema": {
-              "$ref": "#/definitions/Azure.Core.Foundations.ErrorResponse"
-            },
-            "headers": {
-              "x-ms-error-code": {
-                "type": "string",
-                "description": "String error code indicating what went wrong."
-              }
-            }
-          }
-        },
-        "x-ms-examples": {
-          "maximum set model information": {
-            "$ref": "./examples/GetModelInfo_MaximumSet_Gen.json"
-          },
-          "minimum set model information": {
-            "$ref": "./examples/GetModelInfo_MinimumSet_Gen.json"
-          }
-        }
-      }
-    }
-  },
-  "definitions": {
-    "AudioContentFormat": {
-      "type": "string",
-      "description": "A representation of the possible audio formats for audio.",
-      "enum": [
-        "wav",
-        "mp3"
-      ],
-      "x-ms-enum": {
-        "name": "AudioContentFormat",
-        "modelAsString": true,
-        "values": [
-          {
-            "name": "wav",
-            "value": "wav",
-            "description": "Specifies audio in WAV format."
-          },
-          {
-            "name": "mp3",
-            "value": "mp3",
-            "description": "Specifies audio in MP3 format."
-          }
-        ]
-      }
-    },
-    "Azure.Core.Foundations.Error": {
-      "type": "object",
-      "description": "The error object.",
-      "properties": {
-        "code": {
-          "type": "string",
-          "description": "One of a server-defined set of error codes."
-        },
-        "message": {
-          "type": "string",
-          "description": "A human-readable representation of the error."
-        },
-        "target": {
-          "type": "string",
-          "description": "The target of the error."
-        },
-        "details": {
-          "type": "array",
-          "description": "An array of details about specific errors that led to this reported error.",
-          "items": {
-            "$ref": "#/definitions/Azure.Core.Foundations.Error"
-          },
-          "x-ms-identifiers": []
-        },
-        "innererror": {
-          "$ref": "#/definitions/Azure.Core.Foundations.InnerError",
-          "description": "An object containing more specific information than the current object about the error."
-        }
-      },
-      "required": [
-        "code",
-        "message"
-      ]
-    },
-    "Azure.Core.Foundations.ErrorResponse": {
-      "type": "object",
-      "description": "A response containing error details.",
-      "properties": {
-        "error": {
-          "$ref": "#/definitions/Azure.Core.Foundations.Error",
-          "description": "The error object."
-        }
-      },
-      "required": [
-        "error"
-      ]
-    },
-    "Azure.Core.Foundations.InnerError": {
-      "type": "object",
-      "description": "An object containing more specific information about the error. As per Microsoft One API guidelines - https://github.com/microsoft/api-guidelines/blob/vNext/azure/Guidelines.md#handling-errors.",
-      "properties": {
-        "code": {
-          "type": "string",
-          "description": "One of a server-defined set of error codes."
-        },
-        "innererror": {
-          "$ref": "#/definitions/Azure.Core.Foundations.InnerError",
-          "description": "Inner error."
-        }
-      }
-    },
-    "ChatChoice": {
-      "type": "object",
-      "description": "The representation of a single prompt completion as part of an overall chat completions request.\nGenerally, `n` choices are generated per provided prompt with a default value of 1.\nToken limits and other settings may limit the number of choices generated.",
-      "properties": {
-        "index": {
-          "type": "integer",
-          "format": "int32",
-          "description": "The ordered index associated with this chat completions choice."
-        },
-        "finish_reason": {
-          "$ref": "#/definitions/CompletionsFinishReason",
-          "description": "The reason that this chat completions choice completed its generated.",
-          "x-nullable": true,
-          "readOnly": true
-        },
-        "message": {
-          "$ref": "#/definitions/ChatResponseMessage",
-          "description": "The chat message for a given chat completions prompt.",
-          "readOnly": true
-        }
-      },
-      "required": [
-        "index",
-        "finish_reason",
-        "message"
-      ]
-    },
-    "ChatCompletions": {
-      "type": "object",
-      "description": "Representation of the response data from a chat completions request.\nCompletions support a wide variety of tasks and generate text that continues from or \"completes\"\nprovided prompt data.",
-      "properties": {
-        "id": {
-          "type": "string",
-          "description": "A unique identifier associated with this chat completions response."
-        },
-        "object": {
-          "type": "string",
-          "description": "The response object type, which is always `chat.completion`.",
-          "enum": [
-            "chat.completion"
-          ],
-          "x-ms-enum": {
-            "modelAsString": false
-          }
-        },
-        "created": {
-          "type": "integer",
-          "format": "unixtime",
-          "description": "The first timestamp associated with generation activity for this completions response,\nrepresented as seconds since the beginning of the Unix epoch of 00:00 on 1 Jan 1970.",
-          "readOnly": true
-        },
-        "model": {
-          "type": "string",
-          "description": "The model used for the chat completion.",
-          "readOnly": true
-        },
-        "choices": {
-          "type": "array",
-          "description": "The collection of completions choices associated with this completions response.\nGenerally, `n` choices are generated per provided prompt with a default value of 1.\nToken limits and other settings may limit the number of choices generated.",
-          "minItems": 1,
-          "items": {
-            "$ref": "#/definitions/ChatChoice"
-          },
-          "readOnly": true,
-          "x-ms-identifiers": []
-        },
-        "usage": {
-          "$ref": "#/definitions/CompletionsUsage",
-          "description": "  Usage information for tokens processed and generated as part of this completions operation.",
-          "readOnly": true
-        }
-      },
-      "required": [
-        "id",
-        "object",
-        "created",
-        "model",
-        "choices",
-        "usage"
-      ]
-    },
-    "ChatCompletionsAudio": {
-      "type": "object",
-      "description": "A representation of the audio generated by the model.",
-      "properties": {
-        "id": {
-          "type": "string",
-          "description": "  Unique identifier for the audio response. This value can be used in chat history messages instead of passing \n  the full audio object.",
-          "readOnly": true
-        },
-        "expires_at": {
-          "type": "integer",
-          "format": "unixtime",
-          "description": "The Unix timestamp (in seconds) at which the audio piece expires and can't be any longer referenced by its ID in \nmulti-turn conversations.",
-          "readOnly": true,
-          "x-ms-client-name": "expiresAt"
-        },
-        "data": {
-          "type": "string",
-          "description": "Base64 encoded audio data",
-          "readOnly": true
-        },
-        "format": {
-          "$ref": "#/definitions/AudioContentFormat",
-          "description": "The format of the audio content. If format is not provided, it will match the format used in the\ninput audio request.",
-          "readOnly": true
-        },
-        "transcript": {
-          "type": "string",
-          "description": "The transcript of the audio file.",
-          "readOnly": true
-        }
-      },
-      "required": [
-        "id",
-        "expires_at",
-        "data",
-        "transcript"
-      ]
-    },
-    "ChatCompletionsModality": {
-      "type": "string",
-      "description": "The modalities that the model is allowed to use for the chat completions response.",
-      "enum": [
-        "text",
-        "audio"
-      ],
-      "x-ms-enum": {
-        "name": "ChatCompletionsModality",
-        "modelAsString": true,
-        "values": [
-          {
-            "name": "text",
-            "value": "text",
-            "description": "The model is only allowed to generate text."
-          },
-          {
-            "name": "audio",
-            "value": "audio",
-            "description": "The model is allowed to generate audio."
-          }
-        ]
-      }
-    },
-    "ChatCompletionsNamedToolChoice": {
-      "type": "object",
-      "description": "A tool selection of a specific, named function tool that will limit chat completions to using the named function.",
-      "properties": {
-        "type": {
-          "type": "string",
-          "description": "The type of the tool. Currently, only `function` is supported.",
-          "enum": [
-            "function"
-          ],
-          "x-ms-enum": {
-            "modelAsString": false
-          }
-        },
-        "function": {
-          "$ref": "#/definitions/ChatCompletionsNamedToolChoiceFunction",
-          "description": "The function that should be called."
-        }
-      },
-      "required": [
-        "type",
-        "function"
-      ]
-    },
-    "ChatCompletionsNamedToolChoiceFunction": {
-      "type": "object",
-      "description": "A tool selection of a specific, named function tool that will limit chat completions to using the named function.",
-      "properties": {
-        "name": {
-          "type": "string",
-          "description": "The name of the function that should be called."
-        }
-      },
-      "required": [
-        "name"
-      ]
-    },
-    "ChatCompletionsOptions": {
-      "type": "object",
-      "description": "The configuration information for a chat completions request.\nCompletions support a wide variety of tasks and generate text that continues from or \"completes\"\nprovided prompt data.",
-      "properties": {
-        "messages": {
-          "type": "array",
-          "description": "The collection of context messages associated with this chat completions request.\nTypical usage begins with a chat message for the System role that provides instructions for\nthe behavior of the assistant, followed by alternating messages between the User and\nAssistant roles.",
-          "minItems": 1,
-          "items": {
-            "$ref": "#/definitions/ChatRequestMessage"
-          },
-          "x-ms-identifiers": []
-        },
-        "frequency_penalty": {
-          "type": "number",
-          "format": "float",
-          "description": "A value that influences the probability of generated tokens appearing based on their cumulative\nfrequency in generated text.\nPositive values will make tokens less likely to appear as their frequency increases and\ndecrease the likelihood of the model repeating the same statements verbatim.\nSupported range is [-2, 2].",
-          "default": 0,
-          "minimum": -2,
-          "maximum": 2
-        },
-        "stream": {
-          "type": "boolean",
-          "description": "A value indicating whether chat completions should be streamed for this request."
-        },
-        "presence_penalty": {
-          "type": "number",
-          "format": "float",
-          "description": "A value that influences the probability of generated tokens appearing based on their existing\npresence in generated text.\nPositive values will make tokens less likely to appear when they already exist and increase the\nmodel's likelihood to output new topics.\nSupported range is [-2, 2].",
-          "default": 0,
-          "minimum": -2,
-          "maximum": 2
-        },
-        "temperature": {
-          "type": "number",
-          "format": "float",
-          "description": "The sampling temperature to use that controls the apparent creativity of generated completions.\nHigher values will make output more random while lower values will make results more focused\nand deterministic.\nIt is not recommended to modify temperature and top_p for the same completions request as the\ninteraction of these two settings is difficult to predict.\nSupported range is [0, 1].",
-          "default": 0.7,
-          "minimum": 0,
-          "maximum": 1
-        },
-        "top_p": {
-          "type": "number",
-          "format": "float",
-          "description": "An alternative to sampling with temperature called nucleus sampling. This value causes the\nmodel to consider the results of tokens with the provided probability mass. As an example, a\nvalue of 0.15 will cause only the tokens comprising the top 15% of probability mass to be\nconsidered.\nIt is not recommended to modify temperature and top_p for the same completions request as the\ninteraction of these two settings is difficult to predict.\nSupported range is [0, 1].",
-          "default": 1,
-          "minimum": 0,
-          "maximum": 1
-        },
-        "max_tokens": {
-          "type": "integer",
-          "format": "int32",
-          "description": "The maximum number of tokens to generate.",
-          "minimum": 0
-        },
-        "response_format": {
-          "$ref": "#/definitions/ChatCompletionsResponseFormat",
-          "description": "An object specifying the format that the model must output.\n\nSetting to `{ \"type\": \"json_schema\", \"json_schema\": {...} }` enables Structured Outputs which ensures the model will match your supplied JSON schema.\n\nSetting to `{ \"type\": \"json_object\" }` enables JSON mode, which ensures the message the model generates is valid JSON.\n\n**Important:** when using JSON mode, you **must** also instruct the model to produce JSON yourself via a system or user message. Without this, the model may generate an unending stream of whitespace until the generation reaches the token limit, resulting in a long-running and seemingly \"stuck\" request. Also note that the message content may be partially cut off if `finish_reason=\"length\"`, which indicates the generation exceeded `max_tokens` or the conversation exceeded the max context length."
-        },
-        "stop": {
-          "type": "array",
-          "description": "A collection of textual sequences that will end completions generation.",
-          "minItems": 1,
-          "items": {
-            "type": "string"
-          }
-        },
-        "tools": {
-          "type": "array",
-          "description": "A list of tools the model may request to call. Currently, only functions are supported as a tool. The model\nmay response with a function call request and provide the input arguments in JSON format for that function.",
-          "minItems": 1,
-          "items": {
-            "$ref": "#/definitions/ChatCompletionsToolDefinition"
-          },
-          "x-ms-identifiers": []
-        },
-        "tool_choice": {
-          "description": "If specified, the model will configure which of the provided tools it can use for the chat completions response.",
-          "x-ms-client-name": "toolChoice"
-        },
-        "seed": {
-          "type": "integer",
-          "format": "int64",
-          "description": "If specified, the system will make a best effort to sample deterministically such that repeated requests with the\nsame seed and parameters should return the same result. Determinism is not guaranteed."
-        },
-        "model": {
-          "type": "string",
-          "description": "ID of the specific AI model to use, if more than one model is available on the endpoint."
-        },
-        "modalities": {
-          "type": "array",
-          "description": "The modalities that the model is allowed to use for the chat completions response. The default modality\nis `text`. Indicating an unsupported modality combination results in an 422 error.",
-          "items": {
-            "$ref": "#/definitions/ChatCompletionsModality"
-          }
-        }
-      },
-      "required": [
-        "messages"
-      ],
-      "additionalProperties": {}
-    },
-    "ChatCompletionsResponseFormat": {
-      "type": "object",
-      "description": "Represents the format that the model must output. Use this to enable JSON mode instead of the default text mode.\nNote that to enable JSON mode, some AI models may also require you to instruct the model to produce JSON\nvia a system or user message.",
-      "properties": {
-        "type": {
-          "type": "string",
-          "description": "The response format type to use for chat completions."
-        }
-      },
-      "discriminator": "type",
-      "required": [
-        "type"
-      ]
-    },
-    "ChatCompletionsResponseFormatJsonObject": {
-      "type": "object",
-      "description": "A response format for Chat Completions that restricts responses to emitting valid JSON objects.\nNote that to enable JSON mode, some AI models may also require you to instruct the model to produce JSON\nvia a system or user message.",
-      "allOf": [
-        {
-          "$ref": "#/definitions/ChatCompletionsResponseFormat"
-        }
-      ],
-      "x-ms-discriminator-value": "json_object"
-    },
-    "ChatCompletionsResponseFormatJsonSchema": {
-      "type": "object",
-      "description": "A response format for Chat Completions that restricts responses to emitting valid JSON objects, with a\nJSON schema specified by the caller.",
-      "properties": {
-        "json_schema": {
-          "$ref": "#/definitions/ChatCompletionsResponseFormatJsonSchemaDefinition",
-          "description": "The definition of the required JSON schema in the response, and associated metadata."
-        }
-      },
-      "required": [
-        "json_schema"
-      ],
-      "allOf": [
-        {
-          "$ref": "#/definitions/ChatCompletionsResponseFormat"
-        }
-      ],
-      "x-ms-discriminator-value": "json_schema"
-    },
-    "ChatCompletionsResponseFormatJsonSchemaDefinition": {
-      "type": "object",
-      "description": "The definition of the required JSON schema in the response, and associated metadata.",
-      "properties": {
-        "name": {
-          "type": "string",
-          "description": "The name of the response format. Must be a-z, A-Z, 0-9, or contain underscores and dashes, with a maximum length of 64."
-        },
-        "schema": {
-          "type": "object",
-          "description": "The definition of the JSON schema",
-          "additionalProperties": {}
-        },
-        "description": {
-          "type": "string",
-          "description": "A description of the response format, used by the AI model to determine how to generate responses in this format."
-        },
-        "strict": {
-          "type": "boolean",
-          "description": "Whether to enable strict schema adherence when generating the output.\nIf set to true, the model will always follow the exact schema defined in the `schema` field. Only a subset of\nJSON Schema is supported when `strict` is `true`.",
-          "default": false
-        }
-      },
-      "required": [
-        "name",
-        "schema"
-      ]
-    },
-    "ChatCompletionsResponseFormatText": {
-      "type": "object",
-      "description": "A response format for Chat Completions that emits text responses. This is the default response format.",
-      "allOf": [
-        {
-          "$ref": "#/definitions/ChatCompletionsResponseFormat"
-        }
-      ],
-      "x-ms-discriminator-value": "text"
-    },
-    "ChatCompletionsToolCall": {
-      "type": "object",
-      "description": "A function tool call requested by the AI model.",
-      "properties": {
-        "id": {
-          "type": "string",
-          "description": "The ID of the tool call."
-        },
-        "type": {
-          "type": "string",
-          "description": "The type of tool call. Currently, only `function` is supported.",
-          "enum": [
-            "function"
-          ],
-          "x-ms-enum": {
-            "modelAsString": false
-          }
-        },
-        "function": {
-          "$ref": "#/definitions/FunctionCall",
-          "description": "The details of the function call requested by the AI model."
-        }
-      },
-      "required": [
-        "id",
-        "type",
-        "function"
-      ]
-    },
-    "ChatCompletionsToolChoicePreset": {
-      "type": "string",
-      "description": "Represents a generic policy for how a chat completions tool may be selected.",
-      "enum": [
-        "auto",
-        "none",
-        "required"
-      ],
-      "x-ms-enum": {
-        "name": "ChatCompletionsToolChoicePreset",
-        "modelAsString": true,
-        "values": [
-          {
-            "name": "auto",
-            "value": "auto",
-            "description": "Specifies that the model may either use any of the tools provided in this chat completions request or\ninstead return a standard chat completions response as if no tools were provided."
-          },
-          {
-            "name": "none",
-            "value": "none",
-            "description": "Specifies that the model should not respond with a tool call and should instead provide a standard chat\ncompletions response. Response content may still be influenced by the provided tool definitions."
-          },
-          {
-            "name": "required",
-            "value": "required",
-            "description": "Specifies that the model should respond with a call to one or more tools."
-          }
-        ]
-      }
-    },
-    "ChatCompletionsToolDefinition": {
-      "type": "object",
-      "description": "The definition of a chat completions tool that can call a function.",
-      "properties": {
-        "type": {
-          "type": "string",
-          "description": "The type of the tool. Currently, only `function` is supported.",
-          "enum": [
-            "function"
-          ],
-          "x-ms-enum": {
-            "modelAsString": false
-          }
-        },
-        "function": {
-          "$ref": "#/definitions/FunctionDefinition",
-          "description": "The function definition details for the function tool."
-        }
-      },
-      "required": [
-        "type",
-        "function"
-      ]
-    },
-    "ChatMessageAudioContentItem": {
-      "type": "object",
-      "description": "A structured chat content item containing an audio reference.",
-      "properties": {
-        "audio_url": {
-          "$ref": "#/definitions/ChatMessageAudioUrl",
-          "description": "An internet location, which must be accessible to the model, from which the audio may be retrieved.",
-          "x-ms-client-name": "audioUrl"
-        }
-      },
-      "required": [
-        "audio_url"
-      ],
-      "allOf": [
-        {
-          "$ref": "#/definitions/ChatMessageContentItem"
-        }
-      ],
-      "x-ms-discriminator-value": "audio_url"
-    },
-    "ChatMessageAudioUrl": {
-      "type": "object",
-      "description": "An internet location from which the model may retrieve an audio.",
-      "properties": {
-        "url": {
-          "type": "string",
-          "description": "The URL of the audio."
-        }
-      },
-      "required": [
-        "url"
-      ]
-    },
-    "ChatMessageContentItem": {
-      "type": "object",
-      "description": "An abstract representation of a structured content item within a chat message.",
-      "properties": {
-        "type": {
-          "type": "string",
-          "description": "The discriminated object type."
-        }
-      },
-      "discriminator": "type",
-      "required": [
-        "type"
-      ]
-    },
-    "ChatMessageImageContentItem": {
-      "type": "object",
-      "description": "A structured chat content item containing an image reference.",
-      "properties": {
-        "image_url": {
-          "$ref": "#/definitions/ChatMessageImageUrl",
-          "description": "An internet location, which must be accessible to the model,from which the image may be retrieved.",
-          "x-ms-client-name": "imageUrl"
-        }
-      },
-      "required": [
-        "image_url"
-      ],
-      "allOf": [
-        {
-          "$ref": "#/definitions/ChatMessageContentItem"
-        }
-      ],
-      "x-ms-discriminator-value": "image_url"
-    },
-    "ChatMessageImageDetailLevel": {
-      "type": "string",
-      "description": "A representation of the possible image detail levels for image-based chat completions message content.",
-      "enum": [
-        "auto",
-        "low",
-        "high"
-      ],
-      "x-ms-enum": {
-        "name": "ChatMessageImageDetailLevel",
-        "modelAsString": true,
-        "values": [
-          {
-            "name": "auto",
-            "value": "auto",
-            "description": "Specifies that the model should determine which detail level to apply using heuristics like image size."
-          },
-          {
-            "name": "low",
-            "value": "low",
-            "description": "Specifies that image evaluation should be constrained to the 'low-res' model that may be faster and consume fewer\ntokens but may also be less accurate for highly detailed images."
-          },
-          {
-            "name": "high",
-            "value": "high",
-            "description": "Specifies that image evaluation should enable the 'high-res' model that may be more accurate for highly detailed\nimages but may also be slower and consume more tokens."
-          }
-        ]
-      }
-    },
-    "ChatMessageImageUrl": {
-      "type": "object",
-      "description": "An internet location from which the model may retrieve an image.",
-      "properties": {
-        "url": {
-          "type": "string",
-          "description": "The URL of the image."
-        },
-        "detail": {
-          "$ref": "#/definitions/ChatMessageImageDetailLevel",
-          "description": "The evaluation quality setting to use, which controls relative prioritization of speed, token consumption, and\naccuracy."
-        }
-      },
-      "required": [
-        "url"
-      ]
-    },
-    "ChatMessageInputAudio": {
-      "type": "object",
-      "description": "The details of an audio chat message content part.",
-      "properties": {
-        "data": {
-          "type": "string",
-          "description": "Base64 encoded audio data"
-        },
-        "format": {
-          "$ref": "#/definitions/AudioContentFormat",
-          "description": "The audio format of the audio content."
-        }
-      },
-      "required": [
-        "data",
-        "format"
-      ]
-    },
-    "ChatMessageInputAudioContentItem": {
-      "type": "object",
-      "description": "A structured chat content item containing an audio content.",
-      "properties": {
-        "format": {
-          "$ref": "#/definitions/AudioContentFormat",
-          "description": "The audio format of the audio reference."
-        }
-      },
-      "required": [
-        "format"
-      ],
-      "allOf": [
-        {
-          "$ref": "#/definitions/ChatMessageContentItem"
-        }
-      ],
-      "x-ms-discriminator-value": "input_audio"
-    },
-    "ChatMessageTextContentItem": {
-      "type": "object",
-      "description": "A structured chat content item containing plain text.",
-      "properties": {
-        "text": {
-          "type": "string",
-          "description": "The content of the message."
-        }
-      },
-      "required": [
-        "text"
-      ],
-      "allOf": [
-        {
-          "$ref": "#/definitions/ChatMessageContentItem"
-        }
-      ],
-      "x-ms-discriminator-value": "text"
-    },
-    "ChatRequestAssistantMessage": {
-      "type": "object",
-      "description": "A request chat message representing response or action from the assistant.",
-      "properties": {
-        "content": {
-          "type": "string",
-          "description": "The content of the message."
-        },
-        "tool_calls": {
-          "type": "array",
-          "description": "The tool calls that must be resolved and have their outputs appended to subsequent input messages for the chat\ncompletions request to resolve as configured.",
-          "items": {
-            "$ref": "#/definitions/ChatCompletionsToolCall"
-          },
-          "x-ms-client-name": "toolCalls"
-        },
-        "audio": {
-          "$ref": "#/definitions/ChatRequestAudioReference",
-          "description": "  The audio generated by a previous response in a multi-turn conversation."
-        }
-      },
-      "allOf": [
-        {
-          "$ref": "#/definitions/ChatRequestMessage"
-        }
-      ],
-      "x-ms-discriminator-value": "assistant"
-    },
-    "ChatRequestAudioReference": {
-      "type": "object",
-      "description": "A reference to an audio response generated by the model.",
-      "properties": {
-        "id": {
-          "type": "string",
-          "description": "  Unique identifier for the audio response. This value corresponds to the id of a previous audio completion."
-        }
-      },
-      "required": [
-        "id"
-      ]
-    },
-    "ChatRequestMessage": {
-      "type": "object",
-      "description": "An abstract representation of a chat message as provided in a request.",
-      "properties": {
-        "role": {
-          "$ref": "#/definitions/ChatRole",
-          "description": "The chat role associated with this message."
-        }
-      },
-      "discriminator": "role",
-      "required": [
-        "role"
-      ]
-    },
-    "ChatRequestSystemMessage": {
-      "type": "object",
-      "description": "A request chat message containing system instructions that influence how the model will generate a chat completions\nresponse.",
-      "properties": {
-        "content": {
-          "type": "string",
-          "description": "The contents of the system message."
-        }
-      },
-      "required": [
-        "content"
-      ],
-      "allOf": [
-        {
-          "$ref": "#/definitions/ChatRequestMessage"
-        }
-      ],
-      "x-ms-discriminator-value": "system"
-    },
-    "ChatRequestToolMessage": {
-      "type": "object",
-      "description": "A request chat message representing requested output from a configured tool.",
-      "properties": {
-        "content": {
-          "type": "string",
-          "description": "The content of the message."
-        },
-        "tool_call_id": {
-          "type": "string",
-          "description": "The ID of the tool call resolved by the provided content.",
-          "x-ms-client-name": "toolCallId"
-        }
-      },
-      "required": [
-        "tool_call_id"
-      ],
-      "allOf": [
-        {
-          "$ref": "#/definitions/ChatRequestMessage"
-        }
-      ],
-      "x-ms-discriminator-value": "tool"
-    },
-    "ChatRequestUserMessage": {
-      "type": "object",
-      "description": "A request chat message representing user input to the assistant.",
-      "properties": {
-        "content": {
-          "description": "The contents of the user message, with available input types varying by selected model."
-        }
-      },
-      "required": [
-        "content"
-      ],
-      "allOf": [
-        {
-          "$ref": "#/definitions/ChatRequestMessage"
-        }
-      ],
-      "x-ms-discriminator-value": "user"
-    },
-    "ChatResponseMessage": {
-      "type": "object",
-      "description": "A representation of a chat message as received in a response.",
-      "properties": {
-        "role": {
-          "$ref": "#/definitions/ChatRole",
-          "description": "The chat role associated with the message.",
-          "readOnly": true
-        },
-        "content": {
-          "type": "string",
-          "description": "The content of the message.",
-          "x-nullable": true,
-          "readOnly": true
-        },
-        "reasoning_content": {
-          "type": "string",
-          "description": "The reasoning content the model used for generating the response",
-          "readOnly": true
-        },
-        "tool_calls": {
-          "type": "array",
-          "description": "The tool calls that must be resolved and have their outputs appended to subsequent input messages for the chat\ncompletions request to resolve as configured.",
-          "items": {
-            "$ref": "#/definitions/ChatCompletionsToolCall"
-          },
-          "readOnly": true,
-          "x-ms-client-name": "toolCalls"
-        },
-        "audio": {
-          "$ref": "#/definitions/ChatCompletionsAudio",
-          "description": "  The audio generated by the model as a response to the messages if the model is configured to generate audio.",
-          "readOnly": true
-        }
-      },
-      "required": [
-        "role",
-        "content"
-      ]
-    },
-    "ChatRole": {
-      "type": "string",
-      "description": "A description of the intended purpose of a message within a chat completions interaction.",
-      "enum": [
-        "system",
-        "developer",
-        "user",
-        "assistant",
-        "tool"
-      ],
-      "x-ms-enum": {
-        "name": "ChatRole",
-        "modelAsString": true,
-        "values": [
-          {
-            "name": "system",
-            "value": "system",
-            "description": "The role that instructs or sets the behavior of the assistant."
-          },
-          {
-            "name": "developer",
-            "value": "developer",
-            "description": "The role that provides instructions to the model prioritized ahead of user messages."
-          },
-          {
-            "name": "user",
-            "value": "user",
-            "description": "The role that provides input for chat completions."
-          },
-          {
-            "name": "assistant",
-            "value": "assistant",
-            "description": "The role that provides responses to system-instructed, user-prompted input."
-          },
-          {
-            "name": "tool",
-            "value": "tool",
-            "description": "The role that represents extension tool activity within a chat completions operation."
-          }
-        ]
-      }
-    },
-    "CompletionsFinishReason": {
-      "type": "string",
-      "description": "Representation of the manner in which a completions response concluded.",
-      "enum": [
-        "stop",
-        "length",
-        "content_filter",
-        "tool_calls"
-      ],
-      "x-ms-enum": {
-        "name": "CompletionsFinishReason",
-        "modelAsString": true,
-        "values": [
-          {
-            "name": "stopped",
-            "value": "stop",
-            "description": "Completions ended normally and reached its end of token generation."
-          },
-          {
-            "name": "tokenLimitReached",
-            "value": "length",
-            "description": "Completions exhausted available token limits before generation could complete."
-          },
-          {
-            "name": "contentFiltered",
-            "value": "content_filter",
-            "description": "Completions generated a response that was identified as potentially sensitive per content\nmoderation policies."
-          },
-          {
-            "name": "toolCalls",
-            "value": "tool_calls",
-            "description": "Completion ended with the model calling a provided tool for output."
-          }
-        ]
-      }
-    },
-    "CompletionsUsage": {
-      "type": "object",
-      "description": "Representation of the token counts processed for a completions request.\nCounts consider all tokens across prompts, choices, choice alternates, best_of generations, and\nother consumers.",
-      "properties": {
-        "completion_tokens": {
-          "type": "integer",
-          "format": "int32",
-          "description": "The number of tokens generated across all completions emissions.",
-          "readOnly": true
-        },
-        "prompt_tokens": {
-          "type": "integer",
-          "format": "int32",
-          "description": "The number of tokens in the provided prompts for the completions request.",
-          "readOnly": true
-        },
-        "total_tokens": {
-          "type": "integer",
-          "format": "int32",
-          "description": "The total number of tokens processed for the completions request and response.",
-          "readOnly": true
-        },
-        "completion_tokens_details": {
-          "$ref": "#/definitions/CompletionsUsageDetails",
-          "description": "Breakdown of tokens used in a completion.",
-          "readOnly": true
-        },
-        "prompt_tokens_details": {
-          "$ref": "#/definitions/PromptUsageDetails",
-          "description": "Breakdown of tokens used in the prompt/chat history.",
-          "readOnly": true
-        }
-      },
-      "required": [
-        "completion_tokens",
-        "prompt_tokens",
-        "total_tokens"
-      ]
-    },
-    "CompletionsUsageDetails": {
-      "type": "object",
-      "description": "A breakdown of tokens used in a completion.",
-      "properties": {
-        "audio_tokens": {
-          "type": "integer",
-          "format": "int32",
-          "description": "The number of tokens corresponding to audio input.",
-          "readOnly": true
-        },
-        "reasoning_tokens": {
-          "type": "integer",
-          "format": "int32",
-          "description": "The number of tokens corresponding to reasoning.",
-          "readOnly": true
-        },
-        "total_tokens": {
-          "type": "integer",
-          "format": "int32",
-          "description": "The total number of tokens processed for the completions request and response.",
-          "readOnly": true
-        }
-      },
-      "required": [
-        "audio_tokens",
-        "reasoning_tokens",
-        "total_tokens"
-      ]
-    },
-    "EmbeddingEncodingFormat": {
-      "type": "string",
-      "description": "Specifies the types of embeddings to generate. Compressed embeddings types like `uint8`, `int8`, `ubinary` and \n`binary`, may reduce storage costs without sacrificing the integrity of the data. Returns a 422 error if the\nmodel doesn't support the value or parameter. Read the model's documentation to know the values supported by\nthe your model.",
-      "enum": [
-        "base64",
-        "binary",
-        "float",
-        "int8",
-        "ubinary",
-        "uint8"
-      ],
-      "x-ms-enum": {
-        "name": "EmbeddingEncodingFormat",
-        "modelAsString": true,
-        "values": [
-          {
-            "name": "base64",
-            "value": "base64",
-            "description": "Get back binary representation of the embeddings encoded as Base64 string. OpenAI Python library retrieves \nembeddings from the API as encoded binary data, rather than using intermediate decimal representations as is \nusually done."
-          },
-          {
-            "name": "binary",
-            "value": "binary",
-            "description": "Get back signed binary embeddings"
-          },
-          {
-            "name": "float",
-            "value": "float",
-            "description": "Get back full precision embeddings"
-          },
-          {
-            "name": "int8",
-            "value": "int8",
-            "description": "Get back signed int8 embeddings"
-          },
-          {
-            "name": "ubinary",
-            "value": "ubinary",
-            "description": "Get back unsigned binary embeddings"
-          },
-          {
-            "name": "uint8",
-            "value": "uint8",
-            "description": "Get back unsigned int8 embeddings"
-          }
-        ]
-      }
-    },
-    "EmbeddingInputType": {
-      "type": "string",
-      "description": "Represents the input types used for embedding search.",
-      "enum": [
-        "text",
-        "query",
-        "document"
-      ],
-      "x-ms-enum": {
-        "name": "EmbeddingInputType",
-        "modelAsString": true,
-        "values": [
-          {
-            "name": "text",
-            "value": "text",
-            "description": "Indicates the input is a general text input."
-          },
-          {
-            "name": "query",
-            "value": "query",
-            "description": "Indicates the input represents a search query to find the most relevant documents in your vector database."
-          },
-          {
-            "name": "document",
-            "value": "document",
-            "description": "Indicates the input represents a document that is stored in a vector database."
-          }
-        ]
-      }
-    },
-    "EmbeddingItem": {
-      "type": "object",
-      "description": "Representation of a single embeddings relatedness comparison.",
-      "properties": {
-        "embedding": {
-          "type": "array",
-          "description": "List of embedding values for the input prompt. These represent a measurement of the\nvector-based relatedness of the provided input. Or a base64 encoded string of the embedding vector.",
-          "items": {
-            "type": "number",
-            "format": "float"
-          },
-          "readOnly": true
-        },
-        "index": {
-          "type": "integer",
-          "format": "int32",
-          "description": "Index of the prompt to which the EmbeddingItem corresponds.",
-          "readOnly": true
-        },
-        "object": {
-          "type": "string",
-          "description": "The object type of this embeddings item. Will always be `embedding`.",
-          "enum": [
-            "embedding"
-          ],
-          "x-ms-enum": {
-            "modelAsString": false
-          }
-        }
-      },
-      "required": [
-        "embedding",
-        "index",
-        "object"
-      ]
-    },
-    "EmbeddingsOptions": {
-      "type": "object",
-      "description": "The configuration information for an embeddings request.",
-      "properties": {
-        "input": {
-          "type": "array",
-          "description": "Input text to embed, encoded as a string or array of tokens.\nTo embed multiple inputs in a single request, pass an array\nof strings or array of token arrays.",
-          "items": {
-            "type": "string"
-          }
-        },
-        "dimensions": {
-          "type": "integer",
-          "format": "int32",
-          "description": "Optional. The number of dimensions the resulting output embeddings should have.\nPassing null causes the model to use its default value.\nReturns a 422 error if the model doesn't support the value or parameter."
-        },
-        "encoding_format": {
-          "$ref": "#/definitions/EmbeddingEncodingFormat",
-          "description": "Optional. The desired format for the returned embeddings."
-        },
-        "input_type": {
-          "$ref": "#/definitions/EmbeddingInputType",
-          "description": "Optional. The type of the input.\nReturns a 422 error if the model doesn't support the value or parameter."
-        },
-        "model": {
-          "type": "string",
-          "description": "ID of the specific AI model to use, if more than one model is available on the endpoint."
-        }
-      },
-      "required": [
-        "input"
-      ],
-      "additionalProperties": {}
-    },
-    "EmbeddingsResult": {
-      "type": "object",
-      "description": "Representation of the response data from an embeddings request.\nEmbeddings measure the relatedness of text strings and are commonly used for search, clustering,\nrecommendations, and other similar scenarios.",
-      "properties": {
-        "id": {
-          "type": "string",
-          "description": "Unique identifier for the embeddings result.",
-          "readOnly": true
-        },
-        "data": {
-          "type": "array",
-          "description": "Embedding values for the prompts submitted in the request.",
-          "items": {
-            "$ref": "#/definitions/EmbeddingItem"
-          },
-          "readOnly": true,
-          "x-ms-identifiers": []
-        },
-        "usage": {
-          "$ref": "#/definitions/EmbeddingsUsage",
-          "description": "Usage counts for tokens input using the embeddings API.",
-          "readOnly": true
-        },
-        "object": {
-          "type": "string",
-          "description": "The object type of the embeddings result. Will always be `list`.",
-          "enum": [
-            "list"
-          ],
-          "x-ms-enum": {
-            "modelAsString": false
-          }
-        },
-        "model": {
-          "type": "string",
-          "description": "The model ID used to generate this result.",
-          "readOnly": true
-        }
-      },
-      "required": [
-        "id",
-        "data",
-        "usage",
-        "object",
-        "model"
-      ]
-    },
-    "EmbeddingsUsage": {
-      "type": "object",
-      "description": "Measurement of the amount of tokens used in this request and response.",
-      "properties": {
-        "prompt_tokens": {
-          "type": "integer",
-          "format": "int32",
-          "description": "Number of tokens in the request.",
-          "readOnly": true
-        },
-        "total_tokens": {
-          "type": "integer",
-          "format": "int32",
-          "description": "Total number of tokens transacted in this request/response. Should equal the\nnumber of tokens in the request.",
-          "readOnly": true
-        }
-      },
-      "required": [
-        "prompt_tokens",
-        "total_tokens"
-      ]
-    },
-    "ExtraParameters": {
-      "type": "string",
-      "description": "Controls what happens if extra parameters, undefined by the REST API, are passed in the JSON request payload.",
-      "enum": [
-        "error",
-        "drop",
-        "pass-through"
-      ],
-      "x-ms-enum": {
-        "name": "ExtraParameters",
-        "modelAsString": true,
-        "values": [
-          {
-            "name": "error",
-            "value": "error",
-            "description": "The service will error if it detected extra parameters in the request payload. This is the service default."
-          },
-          {
-            "name": "drop",
-            "value": "drop",
-            "description": "The service will ignore (drop) extra parameters in the request payload. It will only pass the known parameters to the back-end AI model."
-          },
-          {
-            "name": "pass_through",
-            "value": "pass-through",
-            "description": "The service will pass extra parameters to the back-end AI model."
-          }
-        ]
-      }
-    },
-    "FunctionCall": {
-      "type": "object",
-      "description": "The name and arguments of a function that should be called, as generated by the model.",
-      "properties": {
-        "name": {
-          "type": "string",
-          "description": "The name of the function to call.",
-          "readOnly": true
-        },
-        "arguments": {
-          "type": "string",
-          "description": "The arguments to call the function with, as generated by the model in JSON format.\nNote that the model does not always generate valid JSON, and may hallucinate parameters\nnot defined by your function schema. Validate the arguments in your code before calling\nyour function.",
-          "readOnly": true
-        }
-      },
-      "required": [
-        "name",
-        "arguments"
-      ]
-    },
-    "FunctionDefinition": {
-      "type": "object",
-      "description": "The definition of a caller-specified function that chat completions may invoke in response to matching user input.",
-      "properties": {
-        "name": {
-          "type": "string",
-          "description": "The name of the function to be called."
-        },
-        "description": {
-          "type": "string",
-          "description": "A description of what the function does. The model will use this description when selecting the function and\ninterpreting its parameters."
-        },
-        "parameters": {
-          "type": "object",
-          "description": "The parameters the function accepts, described as a JSON Schema object.",
-          "additionalProperties": {}
-        }
-      },
-      "required": [
-        "name"
-      ]
-    },
-    "ImageEmbeddingInput": {
-      "type": "object",
-      "description": "Represents an image with optional text.",
-      "properties": {
-        "image": {
-          "type": "string",
-          "description": "The input image encoded in base64 string as a data URL. Example: `data:image/{format};base64,{data}`."
-        },
-        "text": {
-          "type": "string",
-          "description": "Optional. The text input to feed into the model (like DINO, CLIP).\nReturns a 422 error if the model doesn't support the value or parameter."
-        }
-      },
-      "required": [
-        "image"
-      ]
-    },
-    "ImageEmbeddingsOptions": {
-      "type": "object",
-      "description": "The configuration information for an image embeddings request.",
-      "properties": {
-        "input": {
-          "type": "array",
-          "description": "Input image to embed. To embed multiple inputs in a single request, pass an array.\nThe input must not exceed the max input tokens for the model.",
-          "items": {
-            "$ref": "#/definitions/ImageEmbeddingInput"
-          },
-          "x-ms-identifiers": []
-        },
-        "dimensions": {
-          "type": "integer",
-          "format": "int32",
-          "description": "Optional. The number of dimensions the resulting output embeddings should have.\nPassing null causes the model to use its default value.\nReturns a 422 error if the model doesn't support the value or parameter."
-        },
-        "encoding_format": {
-          "$ref": "#/definitions/EmbeddingEncodingFormat",
-          "description": "Optional. The number of dimensions the resulting output embeddings should have.\nPassing null causes the model to use its default value.\nReturns a 422 error if the model doesn't support the value or parameter."
-        },
-        "input_type": {
-          "$ref": "#/definitions/EmbeddingInputType",
-          "description": "Optional. The type of the input.\nReturns a 422 error if the model doesn't support the value or parameter."
-        },
-        "model": {
-          "type": "string",
-          "description": "ID of the specific AI model to use, if more than one model is available on the endpoint."
-        }
-      },
-      "required": [
-        "input"
-      ],
-      "additionalProperties": {}
-    },
-    "ModelInfo": {
-      "type": "object",
-      "description": "Represents some basic information about the AI model.",
-      "properties": {
-        "model_name": {
-          "type": "string",
-          "description": "The name of the AI model. For example: `Phi21`",
-          "readOnly": true
-        },
-        "model_type": {
-          "$ref": "#/definitions/ModelType",
-          "description": "The type of the AI model. A Unique identifier for the profile.",
-          "readOnly": true
-        },
-        "model_provider_name": {
-          "type": "string",
-          "description": "The model provider name. For example: `Microsoft`",
-          "readOnly": true
-        }
-      },
-      "required": [
-        "model_name",
-        "model_type",
-        "model_provider_name"
-      ]
-    },
-    "ModelType": {
-      "type": "string",
-      "description": "The type of AI model",
-      "enum": [
-        "embeddings",
-        "chat-completion"
-      ],
-      "x-ms-enum": {
-        "name": "ModelType",
-        "modelAsString": true,
-        "values": [
-          {
-            "name": "embeddings",
-            "value": "embeddings",
-            "description": "A model capable of generating embeddings from a text"
-          },
-          {
-            "name": "chat_completion",
-            "value": "chat-completion",
-            "description": "A model capable of taking chat-formatted messages and generate responses"
-          }
-        ]
-      }
-    },
-    "PromptUsageDetails": {
-      "type": "object",
-      "description": "A breakdown of tokens used in the prompt/chat history.",
-      "properties": {
-        "audio_tokens": {
-          "type": "integer",
-          "format": "int32",
-          "description": "The number of tokens corresponding to audio input.",
-          "readOnly": true
-        },
-        "cached_tokens": {
-          "type": "integer",
-          "format": "int32",
-          "description": "The total number of tokens cached.",
-          "readOnly": true
-        }
-      },
-      "required": [
-        "audio_tokens",
-        "cached_tokens"
-      ]
-    },
-    "StreamingChatChoiceUpdate": {
-      "type": "object",
-      "description": "Represents an update to a single prompt completion when the service is streaming updates \nusing Server Sent Events (SSE).\nGenerally, `n` choices are generated per provided prompt with a default value of 1.\nToken limits and other settings may limit the number of choices generated.",
-      "properties": {
-        "index": {
-          "type": "integer",
-          "format": "int32",
-          "description": "The ordered index associated with this chat completions choice."
-        },
-        "finish_reason": {
-          "$ref": "#/definitions/CompletionsFinishReason",
-          "description": "The reason that this chat completions choice completed its generated.",
-          "x-nullable": true,
-          "readOnly": true
-        },
-        "delta": {
-          "$ref": "#/definitions/StreamingChatResponseMessageUpdate",
-          "description": "An update to the chat message for a given chat completions prompt.",
-          "readOnly": true
-        }
-      },
-      "required": [
-        "index",
-        "finish_reason",
-        "delta"
-      ]
-    },
-    "StreamingChatCompletionsUpdate": {
-      "type": "object",
-      "description": "Represents a response update to a chat completions request, when the service is streaming updates \nusing Server Sent Events (SSE).\nCompletions support a wide variety of tasks and generate text that continues from or \"completes\"\nprovided prompt data.",
-      "properties": {
-        "id": {
-          "type": "string",
-          "description": "A unique identifier associated with this chat completions response."
-        },
-        "object": {
-          "type": "string",
-          "description": "The response object type, which is always `chat.completion`.",
-          "enum": [
-            "chat.completion"
-          ],
-          "x-ms-enum": {
-            "modelAsString": false
-          }
-        },
-        "created": {
-          "type": "integer",
-          "format": "unixtime",
-          "description": "The first timestamp associated with generation activity for this completions response,\nrepresented as seconds since the beginning of the Unix epoch of 00:00 on 1 Jan 1970.",
-          "readOnly": true
-        },
-        "model": {
-          "type": "string",
-          "description": "The model used for the chat completion.",
-          "readOnly": true
-        },
-        "choices": {
-          "type": "array",
-          "description": "An update to the collection of completion choices associated with this completions response.\nGenerally, `n` choices are generated per provided prompt with a default value of 1.\nToken limits and other settings may limit the number of choices generated.",
-          "minItems": 1,
-          "items": {
-            "$ref": "#/definitions/StreamingChatChoiceUpdate"
-          },
-          "readOnly": true,
-          "x-ms-identifiers": []
-        },
-        "usage": {
-          "$ref": "#/definitions/CompletionsUsage",
-          "description": "Usage information for tokens processed and generated as part of this completions operation.",
-          "readOnly": true
-        }
-      },
-      "required": [
-        "id",
-        "object",
-        "created",
-        "model",
-        "choices"
-      ]
-    },
-    "StreamingChatResponseMessageUpdate": {
-      "type": "object",
-      "description": "A representation of a chat message update as received in a streaming response.",
-      "properties": {
-        "role": {
-          "$ref": "#/definitions/ChatRole",
-          "description": "The chat role associated with the message. If present, should always be 'assistant'",
-          "readOnly": true
-        },
-        "content": {
-          "type": "string",
-          "description": "The content of the message.",
-          "readOnly": true
-        },
-        "reasoning_content": {
-          "type": "string",
-          "description": "The reasoning content the model used for generating the response",
-          "readOnly": true
-        },
-        "tool_calls": {
-          "type": "array",
-          "description": "The tool calls that must be resolved and have their outputs appended to subsequent input messages for the chat\ncompletions request to resolve as configured.",
-          "items": {
-            "$ref": "#/definitions/StreamingChatResponseToolCallUpdate"
-          },
-          "readOnly": true,
-          "x-ms-client-name": "toolCalls"
-        }
-      }
-    },
-    "StreamingChatResponseToolCallUpdate": {
-      "type": "object",
-      "description": "An update to the function tool call information requested by the AI model.",
-      "properties": {
-        "id": {
-          "type": "string",
-          "description": "The ID of the tool call.",
-          "readOnly": true
-        },
-        "function": {
-          "$ref": "#/definitions/FunctionCall",
-          "description": "Updates to the function call requested by the AI model.",
-          "readOnly": true
-        }
-      },
-      "required": [
-        "id",
-        "function"
-      ]
-    }
-  },
-  "parameters": {
-    "Azure.Core.Foundations.ApiVersionParameter": {
-      "name": "api-version",
-      "in": "query",
-      "description": "The API version to use for this operation.",
-      "required": true,
-      "type": "string",
-      "minLength": 1,
-      "x-ms-parameter-location": "method",
-      "x-ms-client-name": "apiVersion"
-    }
-  }
-}
diff --git a/specification/ai/data-plane/ModelInference/stable/2025-05-01/examples/GetChatCompletions_AudioModality_Gen.json b/specification/ai/data-plane/ModelInference/stable/2025-05-01/examples/GetChatCompletions_AudioModality_Gen.json
deleted file mode 100644
index 5434af06078d..000000000000
--- a/specification/ai/data-plane/ModelInference/stable/2025-05-01/examples/GetChatCompletions_AudioModality_Gen.json
+++ /dev/null
@@ -1,99 +0,0 @@
-{
-  "title": "Chat completion with audio content",
-  "operationId": "GetChatCompletions",
-  "parameters": {
-    "api-version": "2025-05-01",
-    "extra-parameters": "error",
-    "body": {
-      "modalities": [
-        "text",
-        "audio"
-      ],
-      "messages": [
-        {
-          "role": "system",
-          "content": "You are a helpful assistant"
-        },
-        {
-          "role": "user",
-          "content": [
-            {
-              "type": "input_audio",
-              "input_audio": {
-                "data": "<base64 encoded audio data>",
-                "format": "wav"
-              }
-            }
-          ]
-        },
-        {
-          "role": "assistant",
-          "content": null,
-          "audio": {
-            "id": "abcdef1234"
-          }
-        },
-        {
-          "role": "user",
-          "content": [
-            {
-              "type": "input_audio",
-              "input_audio": {
-                "data": "<base64 encoded audio data>",
-                "format": "wav"
-              }
-            }
-          ]
-        }
-      ],
-      "frequency_penalty": 0,
-      "presence_penalty": 0,
-      "temperature": 0,
-      "top_p": 0,
-      "seed": 21,
-      "model": "my-model-name"
-    }
-  },
-  "responses": {
-    "200": {
-      "body": {
-        "id": "kgousajxgzyhugvqekuswuqbk",
-        "object": "chat.completion",
-        "created": 1696522361,
-        "model": "my-model-name",
-        "usage": {
-          "completion_tokens": 19,
-          "prompt_tokens": 28,
-          "total_tokens": 16,
-          "completion_tokens_details": {
-            "audio_tokens": 5,
-            "reasoning_tokens": 0,
-            "total_tokens": 5
-          },
-          "prompt_tokens_details": {
-            "audio_tokens": 10,
-            "cached_tokens": 0
-          }
-        },
-        "choices": [
-          {
-            "index": 0,
-            "finish_reason": "stop",
-            "message": {
-              "role": "assistant",
-              "content": null,
-              "tool_calls": null,
-              "audio": {
-                "id": "abcdef1234",
-                "format": "wav",
-                "data": "<base64 encoded audio data>",
-                "expires_at": 1896522361,
-                "transcript": "This is a sample transcript"
-              }
-            }
-          }
-        ]
-      }
-    }
-  }
-}
diff --git a/specification/ai/data-plane/ModelInference/stable/2025-05-01/examples/GetChatCompletions_MaximumSet_Gen.json b/specification/ai/data-plane/ModelInference/stable/2025-05-01/examples/GetChatCompletions_MaximumSet_Gen.json
deleted file mode 100644
index 4d154c3a9c2c..000000000000
--- a/specification/ai/data-plane/ModelInference/stable/2025-05-01/examples/GetChatCompletions_MaximumSet_Gen.json
+++ /dev/null
@@ -1,95 +0,0 @@
-{
-  "title": "Chat completion with multiple parameters and chat history",
-  "operationId": "GetChatCompletions",
-  "parameters": {
-    "api-version": "2025-05-01",
-    "extra-parameters": "error",
-    "body": {
-      "modalities": [
-        "text"
-      ],
-      "messages": [
-        {
-          "role": "system",
-          "content": "You are a helpful assistant"
-        },
-        {
-          "role": "user",
-          "content": "Explain Riemann's conjecture"
-        },
-        {
-          "role": "assistant",
-          "content": "The Riemann Conjecture is a deep mathematical conjecture around prime numbers and how they can be predicted. It was first published in Riemann's groundbreaking 1859 paper. The conjecture states that the Riemann zeta function has its zeros only at the negative even integers and complex numbers with real part 1/21. Many consider it to be the most important unsolved problem in pure mathematics. The Riemann hypothesis is a way to predict the probability that numbers in a certain range are prime that was also devised by German mathematician Bernhard Riemann in 18594."
-        },
-        {
-          "role": "user",
-          "content": "Ist it proved?"
-        }
-      ],
-      "frequency_penalty": 0,
-      "stream": true,
-      "presence_penalty": 0,
-      "temperature": 0,
-      "top_p": 0,
-      "max_tokens": 255,
-      "response_format": {
-        "type": "text"
-      },
-      "stop": [
-        "<|endoftext|>"
-      ],
-      "tools": [
-        {
-          "type": "function",
-          "function": {
-            "name": "my-function-name",
-            "description": "A function useful to know if a theroem is proved or not"
-          }
-        }
-      ],
-      "seed": 21,
-      "model": "my-model-name"
-    }
-  },
-  "responses": {
-    "200": {
-      "body": {
-        "id": "kgousajxgzyhugvqekuswuqbk",
-        "object": "chat.completion",
-        "created": 18,
-        "model": "my-model-name",
-        "usage": {
-          "completion_tokens": 19,
-          "prompt_tokens": 28,
-          "total_tokens": 16,
-          "completion_tokens_details": {
-            "audio_tokens": 5,
-            "reasoning_tokens": 0,
-            "total_tokens": 5
-          }
-        },
-        "choices": [
-          {
-            "index": 7,
-            "finish_reason": "stop",
-            "message": {
-              "role": "assistant",
-              "content": null,
-              "reasoning_content": null,
-              "tool_calls": [
-                {
-                  "id": "yrobmilsrugmbwukmzo",
-                  "type": "function",
-                  "function": {
-                    "name": "my-function-name",
-                    "arguments": "{ \"arg1\": \"value1\", \"arg2\": \"value2\" }"
-                  }
-                }
-              ]
-            }
-          }
-        ]
-      }
-    }
-  }
-}
diff --git a/specification/ai/data-plane/ModelInference/stable/2025-05-01/examples/GetChatCompletions_MinimumSet_Gen.json b/specification/ai/data-plane/ModelInference/stable/2025-05-01/examples/GetChatCompletions_MinimumSet_Gen.json
deleted file mode 100644
index 9e35a49d9c96..000000000000
--- a/specification/ai/data-plane/ModelInference/stable/2025-05-01/examples/GetChatCompletions_MinimumSet_Gen.json
+++ /dev/null
@@ -1,40 +0,0 @@
-{
-  "title": "Simple chat completion",
-  "operationId": "GetChatCompletions",
-  "parameters": {
-    "api-version": "2025-05-01",
-    "body": {
-      "messages": [
-        {
-          "role": "user",
-          "content": "Explain Riemann's conjecture"
-        }
-      ]
-    }
-  },
-  "responses": {
-    "200": {
-      "body": {
-        "id": "kgousajxgzyhugvqekuswuqbk",
-        "object": "chat.completion",
-        "created": 1234567890,
-        "model": "my-model-name",
-        "usage": {
-          "prompt_tokens": 205,
-          "completion_tokens": 5,
-          "total_tokens": 210
-        },
-        "choices": [
-          {
-            "index": 0,
-            "finish_reason": "stop",
-            "message": {
-              "role": "assistant",
-              "content": "The Riemann Conjecture is a deep mathematical conjecture around prime numbers and how they can be predicted. It was first published in Riemann's groundbreaking 1859 paper. The conjecture states that the Riemann zeta function has its zeros only at the negative even integers and complex numbers with real part 1/21. Many consider it to be the most important unsolved problem in pure mathematics. The Riemann hypothesis is a way to predict the probability that numbers in a certain range are prime that was also devised by German mathematician Bernhard Riemann in 18594"
-            }
-          }
-        ]
-      }
-    }
-  }
-}
diff --git a/specification/ai/data-plane/ModelInference/stable/2025-05-01/examples/GetEmbeddings_MaximumSet_Gen.json b/specification/ai/data-plane/ModelInference/stable/2025-05-01/examples/GetEmbeddings_MaximumSet_Gen.json
deleted file mode 100644
index 9247e0436dc6..000000000000
--- a/specification/ai/data-plane/ModelInference/stable/2025-05-01/examples/GetEmbeddings_MaximumSet_Gen.json
+++ /dev/null
@@ -1,50 +0,0 @@
-{
-  "title": "Create text embeddings with dimension, encoding, and input type",
-  "operationId": "GetEmbeddings",
-  "parameters": {
-    "api-version": "2025-05-01",
-    "extra-parameters": "error",
-    "body": {
-      "input": [
-        "This is a very good text"
-      ],
-      "dimensions": 1024,
-      "encoding_format": "float",
-      "input_type": "text",
-      "model": "my-model-name"
-    }
-  },
-  "responses": {
-    "200": {
-      "body": {
-        "id": "cknxthfa",
-        "data": [
-          {
-            "index": 0,
-            "object": "embedding",
-            "embedding": [
-              0,
-              0,
-              0,
-              0,
-              0,
-              0,
-              0,
-              0,
-              0,
-              0,
-              0,
-              0
-            ]
-          }
-        ],
-        "object": "list",
-        "model": "my-model-name",
-        "usage": {
-          "prompt_tokens": 15,
-          "total_tokens": 15
-        }
-      }
-    }
-  }
-}
diff --git a/specification/ai/data-plane/ModelInference/stable/2025-05-01/examples/GetEmbeddings_MinimumSet_Gen.json b/specification/ai/data-plane/ModelInference/stable/2025-05-01/examples/GetEmbeddings_MinimumSet_Gen.json
deleted file mode 100644
index 59cfd528c2a1..000000000000
--- a/specification/ai/data-plane/ModelInference/stable/2025-05-01/examples/GetEmbeddings_MinimumSet_Gen.json
+++ /dev/null
@@ -1,45 +0,0 @@
-{
-  "title": "Create text embeddings",
-  "operationId": "GetEmbeddings",
-  "parameters": {
-    "api-version": "2025-05-01",
-    "body": {
-      "input": [
-        "This is a very good text"
-      ]
-    }
-  },
-  "responses": {
-    "200": {
-      "body": {
-        "id": "cknxthfa",
-        "data": [
-          {
-            "index": 0,
-            "object": "embedding",
-            "embedding": [
-              0,
-              0,
-              0,
-              0,
-              0,
-              0,
-              0,
-              0,
-              0,
-              0,
-              0,
-              0
-            ]
-          }
-        ],
-        "object": "list",
-        "model": "my-model-name",
-        "usage": {
-          "prompt_tokens": 15,
-          "total_tokens": 15
-        }
-      }
-    }
-  }
-}
diff --git a/specification/ai/data-plane/ModelInference/stable/2025-05-01/examples/GetImageEmbeddings_MaximumSet_Gen.json b/specification/ai/data-plane/ModelInference/stable/2025-05-01/examples/GetImageEmbeddings_MaximumSet_Gen.json
deleted file mode 100644
index 3df9cb9ed8c4..000000000000
--- a/specification/ai/data-plane/ModelInference/stable/2025-05-01/examples/GetImageEmbeddings_MaximumSet_Gen.json
+++ /dev/null
@@ -1,53 +0,0 @@
-{
-  "title": "Create image embeddings from text and image pair, and encoding format",
-  "operationId": "GetImageEmbeddings",
-  "parameters": {
-    "api-version": "2025-05-01",
-    "extra-parameters": "error",
-    "body": {
-      "input": [
-        {
-          "image": "puqkvvlvgcjyzughesnkena",
-          "text": "azrzyjsmnuefqpowpvfmyobeehqsni"
-        }
-      ],
-      "dimensions": 1024,
-      "encoding_format": "float",
-      "input_type": "text",
-      "model": "my-model-name"
-    }
-  },
-  "responses": {
-    "200": {
-      "body": {
-        "id": "cknxthfa",
-        "data": [
-          {
-            "index": 0,
-            "object": "embedding",
-            "embedding": [
-              0,
-              0,
-              0,
-              0,
-              0,
-              0,
-              0,
-              0,
-              0,
-              0,
-              0,
-              0
-            ]
-          }
-        ],
-        "object": "list",
-        "model": "my-model-name",
-        "usage": {
-          "prompt_tokens": 15,
-          "total_tokens": 15
-        }
-      }
-    }
-  }
-}
diff --git a/specification/ai/data-plane/ModelInference/stable/2025-05-01/examples/GetImageEmbeddings_MinimumSet_Gen.json b/specification/ai/data-plane/ModelInference/stable/2025-05-01/examples/GetImageEmbeddings_MinimumSet_Gen.json
deleted file mode 100644
index 7c686097d492..000000000000
--- a/specification/ai/data-plane/ModelInference/stable/2025-05-01/examples/GetImageEmbeddings_MinimumSet_Gen.json
+++ /dev/null
@@ -1,47 +0,0 @@
-{
-  "title": "Generate image embeddings",
-  "operationId": "GetImageEmbeddings",
-  "parameters": {
-    "api-version": "2025-05-01",
-    "body": {
-      "input": [
-        {
-          "image": "gvmojtfooxixxzayrditjlyymg"
-        }
-      ]
-    }
-  },
-  "responses": {
-    "200": {
-      "body": {
-        "id": "cknxthfa",
-        "data": [
-          {
-            "index": 0,
-            "object": "embedding",
-            "embedding": [
-              0,
-              0,
-              0,
-              0,
-              0,
-              0,
-              0,
-              0,
-              0,
-              0,
-              0,
-              0
-            ]
-          }
-        ],
-        "object": "list",
-        "model": "my-model-name",
-        "usage": {
-          "prompt_tokens": 15,
-          "total_tokens": 15
-        }
-      }
-    }
-  }
-}
diff --git a/specification/ai/data-plane/ModelInference/stable/2025-05-01/examples/GetModelInfo_MaximumSet_Gen.json b/specification/ai/data-plane/ModelInference/stable/2025-05-01/examples/GetModelInfo_MaximumSet_Gen.json
deleted file mode 100644
index f6012f69291a..000000000000
--- a/specification/ai/data-plane/ModelInference/stable/2025-05-01/examples/GetModelInfo_MaximumSet_Gen.json
+++ /dev/null
@@ -1,17 +0,0 @@
-{
-  "title": "Get model information for a model deployment",
-  "operationId": "GetModelInfo",
-  "parameters": {
-    "api-version": "2025-05-01",
-    "model": "Phi-3.5-mini-instruct"
-  },
-  "responses": {
-    "200": {
-      "body": {
-        "model_name": "Phi-3.5-mini-instruct",
-        "model_type": "chat-completion",
-        "model_provider_name": "Microsoft"
-      }
-    }
-  }
-}
diff --git a/specification/ai/data-plane/ModelInference/stable/2025-05-01/examples/GetModelInfo_MinimumSet_Gen.json b/specification/ai/data-plane/ModelInference/stable/2025-05-01/examples/GetModelInfo_MinimumSet_Gen.json
deleted file mode 100644
index 628e6539a92d..000000000000
--- a/specification/ai/data-plane/ModelInference/stable/2025-05-01/examples/GetModelInfo_MinimumSet_Gen.json
+++ /dev/null
@@ -1,16 +0,0 @@
-{
-  "title": "Get model information",
-  "operationId": "GetModelInfo",
-  "parameters": {
-    "api-version": "2025-05-01"
-  },
-  "responses": {
-    "200": {
-      "body": {
-        "model_name": "Phi-3.5-mini-instruct",
-        "model_type": "chat-completion",
-        "model_provider_name": "Microsoft"
-      }
-    }
-  }
-}
diff --git a/specification/ai/data-plane/ModelInference/stable/2025-05-01/openapi.json b/specification/ai/data-plane/ModelInference/stable/2025-05-01/openapi.json
deleted file mode 100644
index 4f43d95369a4..000000000000
--- a/specification/ai/data-plane/ModelInference/stable/2025-05-01/openapi.json
+++ /dev/null
@@ -1,1953 +0,0 @@
-{
-  "swagger": "2.0",
-  "info": {
-    "title": "AI Model Inference",
-    "version": "2025-05-01",
-    "x-typespec-generated": [
-      {
-        "emitter": "@azure-tools/typespec-autorest"
-      }
-    ]
-  },
-  "schemes": [
-    "https"
-  ],
-  "x-ms-parameterized-host": {
-    "hostTemplate": "https://{resource}.services.ai.azure.com/api/models",
-    "useSchemePrefix": false,
-    "parameters": [
-      {
-        "name": "resource",
-        "in": "path",
-        "description": "The Azure AI Services resource name, for example 'my-resource'",
-        "required": true,
-        "type": "string"
-      }
-    ]
-  },
-  "produces": [
-    "application/json"
-  ],
-  "consumes": [
-    "application/json"
-  ],
-  "security": [
-    {
-      "ApiKeyAuth": []
-    },
-    {
-      "OAuth2Auth": [
-        "https://cognitiveservices.azure.com/.default"
-      ]
-    }
-  ],
-  "securityDefinitions": {
-    "ApiKeyAuth": {
-      "type": "apiKey",
-      "name": "api-key",
-      "in": "header"
-    },
-    "OAuth2Auth": {
-      "type": "oauth2",
-      "flow": "implicit",
-      "authorizationUrl": "https://login.microsoftonline.com/common/oauth2/v2.0/authorize",
-      "scopes": {
-        "https://cognitiveservices.azure.com/.default": ""
-      }
-    }
-  },
-  "tags": [],
-  "paths": {
-    "/chat/completions": {
-      "post": {
-        "operationId": "GetChatCompletions",
-        "description": "Gets chat completions for the provided chat messages.\nCompletions support a wide variety of tasks and generate text that continues from or \"completes\"\nprovided prompt data. The method makes a REST API call to the `/chat/completions` route\non the given endpoint.",
-        "parameters": [
-          {
-            "$ref": "#/parameters/Azure.Core.Foundations.ApiVersionParameter"
-          },
-          {
-            "name": "extra-parameters",
-            "in": "header",
-            "description": "Controls what happens if extra parameters, undefined by the REST API,\nare passed in the JSON request payload.\nThis sets the HTTP request header `extra-parameters`.",
-            "required": false,
-            "type": "string",
-            "enum": [
-              "error",
-              "drop",
-              "pass-through"
-            ],
-            "x-ms-enum": {
-              "name": "ExtraParameters",
-              "modelAsString": true,
-              "values": [
-                {
-                  "name": "error",
-                  "value": "error",
-                  "description": "The service will error if it detected extra parameters in the request payload. This is the service default."
-                },
-                {
-                  "name": "drop",
-                  "value": "drop",
-                  "description": "The service will ignore (drop) extra parameters in the request payload. It will only pass the known parameters to the back-end AI model."
-                },
-                {
-                  "name": "pass_through",
-                  "value": "pass-through",
-                  "description": "The service will pass extra parameters to the back-end AI model."
-                }
-              ]
-            },
-            "x-ms-client-name": "extra_params"
-          },
-          {
-            "name": "body",
-            "in": "body",
-            "description": "The parameters of the chat completions request.",
-            "required": true,
-            "schema": {
-              "$ref": "#/definitions/ChatCompletionsOptions"
-            }
-          }
-        ],
-        "responses": {
-          "200": {
-            "description": "The request has succeeded.",
-            "schema": {
-              "$ref": "#/definitions/ChatCompletions"
-            }
-          },
-          "default": {
-            "description": "An unexpected error response.",
-            "schema": {
-              "$ref": "#/definitions/Azure.Core.Foundations.ErrorResponse"
-            },
-            "headers": {
-              "x-ms-error-code": {
-                "type": "string",
-                "description": "String error code indicating what went wrong."
-              }
-            }
-          }
-        },
-        "x-ms-examples": {
-          "Chat completion with audio content": {
-            "$ref": "./examples/GetChatCompletions_AudioModality_Gen.json"
-          },
-          "Chat completion with multiple parameters and chat history": {
-            "$ref": "./examples/GetChatCompletions_MaximumSet_Gen.json"
-          },
-          "Simple chat completion": {
-            "$ref": "./examples/GetChatCompletions_MinimumSet_Gen.json"
-          }
-        }
-      }
-    },
-    "/embeddings": {
-      "post": {
-        "operationId": "GetEmbeddings",
-        "description": "Return the embedding vectors for given text prompts.\nThe method makes a REST API call to the `/embeddings` route on the given endpoint.",
-        "parameters": [
-          {
-            "$ref": "#/parameters/Azure.Core.Foundations.ApiVersionParameter"
-          },
-          {
-            "name": "extra-parameters",
-            "in": "header",
-            "description": "Controls what happens if extra parameters, undefined by the REST API,\nare passed in the JSON request payload.\nThis sets the HTTP request header `extra-parameters`.",
-            "required": false,
-            "type": "string",
-            "enum": [
-              "error",
-              "drop",
-              "pass-through"
-            ],
-            "x-ms-enum": {
-              "name": "ExtraParameters",
-              "modelAsString": true,
-              "values": [
-                {
-                  "name": "error",
-                  "value": "error",
-                  "description": "The service will error if it detected extra parameters in the request payload. This is the service default."
-                },
-                {
-                  "name": "drop",
-                  "value": "drop",
-                  "description": "The service will ignore (drop) extra parameters in the request payload. It will only pass the known parameters to the back-end AI model."
-                },
-                {
-                  "name": "pass_through",
-                  "value": "pass-through",
-                  "description": "The service will pass extra parameters to the back-end AI model."
-                }
-              ]
-            },
-            "x-ms-client-name": "extra_params"
-          },
-          {
-            "name": "body",
-            "in": "body",
-            "description": "The parameters of the embeddings request.",
-            "required": true,
-            "schema": {
-              "$ref": "#/definitions/EmbeddingsOptions"
-            }
-          }
-        ],
-        "responses": {
-          "200": {
-            "description": "The request has succeeded.",
-            "schema": {
-              "$ref": "#/definitions/EmbeddingsResult"
-            }
-          },
-          "default": {
-            "description": "An unexpected error response.",
-            "schema": {
-              "$ref": "#/definitions/Azure.Core.Foundations.ErrorResponse"
-            },
-            "headers": {
-              "x-ms-error-code": {
-                "type": "string",
-                "description": "String error code indicating what went wrong."
-              }
-            }
-          }
-        },
-        "x-ms-examples": {
-          "Create text embeddings": {
-            "$ref": "./examples/GetEmbeddings_MinimumSet_Gen.json"
-          },
-          "Create text embeddings with dimension, encoding, and input type": {
-            "$ref": "./examples/GetEmbeddings_MaximumSet_Gen.json"
-          }
-        }
-      }
-    },
-    "/images/embeddings": {
-      "post": {
-        "operationId": "GetImageEmbeddings",
-        "description": "Return the embedding vectors for given images.\nThe method makes a REST API call to the `/images/embeddings` route on the given endpoint.",
-        "parameters": [
-          {
-            "$ref": "#/parameters/Azure.Core.Foundations.ApiVersionParameter"
-          },
-          {
-            "name": "extra-parameters",
-            "in": "header",
-            "description": "Controls what happens if extra parameters, undefined by the REST API,\nare passed in the JSON request payload.\nThis sets the HTTP request header `extra-parameters`.",
-            "required": false,
-            "type": "string",
-            "enum": [
-              "error",
-              "drop",
-              "pass-through"
-            ],
-            "x-ms-enum": {
-              "name": "ExtraParameters",
-              "modelAsString": true,
-              "values": [
-                {
-                  "name": "error",
-                  "value": "error",
-                  "description": "The service will error if it detected extra parameters in the request payload. This is the service default."
-                },
-                {
-                  "name": "drop",
-                  "value": "drop",
-                  "description": "The service will ignore (drop) extra parameters in the request payload. It will only pass the known parameters to the back-end AI model."
-                },
-                {
-                  "name": "pass_through",
-                  "value": "pass-through",
-                  "description": "The service will pass extra parameters to the back-end AI model."
-                }
-              ]
-            },
-            "x-ms-client-name": "extra_params"
-          },
-          {
-            "name": "body",
-            "in": "body",
-            "description": "The parameters of the image embeddings request.",
-            "required": true,
-            "schema": {
-              "$ref": "#/definitions/ImageEmbeddingsOptions"
-            }
-          }
-        ],
-        "responses": {
-          "200": {
-            "description": "The request has succeeded.",
-            "schema": {
-              "$ref": "#/definitions/EmbeddingsResult"
-            }
-          },
-          "default": {
-            "description": "An unexpected error response.",
-            "schema": {
-              "$ref": "#/definitions/Azure.Core.Foundations.ErrorResponse"
-            },
-            "headers": {
-              "x-ms-error-code": {
-                "type": "string",
-                "description": "String error code indicating what went wrong."
-              }
-            }
-          }
-        },
-        "x-ms-examples": {
-          "Create image embeddings from text and image pair, and encoding format": {
-            "$ref": "./examples/GetImageEmbeddings_MaximumSet_Gen.json"
-          },
-          "Generate image embeddings": {
-            "$ref": "./examples/GetImageEmbeddings_MinimumSet_Gen.json"
-          }
-        }
-      }
-    },
-    "/info": {
-      "get": {
-        "operationId": "GetModelInfo",
-        "description": "Returns information about the AI model deployed.\nThe method makes a REST API call to the `/info` route on the given endpoint.\nThis method will only work when using Serverless API, Managed Compute, or Model .\ninference endpoint. Azure OpenAI endpoints don't support i.",
-        "parameters": [
-          {
-            "$ref": "#/parameters/Azure.Core.Foundations.ApiVersionParameter"
-          },
-          {
-            "name": "model",
-            "in": "query",
-            "description": "The model deployment name you want information from.",
-            "required": false,
-            "type": "string"
-          }
-        ],
-        "responses": {
-          "200": {
-            "description": "The request has succeeded.",
-            "schema": {
-              "$ref": "#/definitions/ModelInfo"
-            }
-          },
-          "default": {
-            "description": "An unexpected error response.",
-            "schema": {
-              "$ref": "#/definitions/Azure.Core.Foundations.ErrorResponse"
-            },
-            "headers": {
-              "x-ms-error-code": {
-                "type": "string",
-                "description": "String error code indicating what went wrong."
-              }
-            }
-          }
-        },
-        "x-ms-examples": {
-          "Get model information": {
-            "$ref": "./examples/GetModelInfo_MinimumSet_Gen.json"
-          },
-          "Get model information for a model deployment": {
-            "$ref": "./examples/GetModelInfo_MaximumSet_Gen.json"
-          }
-        }
-      }
-    }
-  },
-  "definitions": {
-    "AudioContentFormat": {
-      "type": "string",
-      "description": "A representation of the possible audio formats for audio.",
-      "enum": [
-        "wav",
-        "mp3"
-      ],
-      "x-ms-enum": {
-        "name": "AudioContentFormat",
-        "modelAsString": true,
-        "values": [
-          {
-            "name": "wav",
-            "value": "wav",
-            "description": "Specifies audio in WAV format."
-          },
-          {
-            "name": "mp3",
-            "value": "mp3",
-            "description": "Specifies audio in MP3 format."
-          }
-        ]
-      }
-    },
-    "Azure.Core.Foundations.Error": {
-      "type": "object",
-      "description": "The error object.",
-      "properties": {
-        "code": {
-          "type": "string",
-          "description": "One of a server-defined set of error codes."
-        },
-        "message": {
-          "type": "string",
-          "description": "A human-readable representation of the error."
-        },
-        "target": {
-          "type": "string",
-          "description": "The target of the error."
-        },
-        "details": {
-          "type": "array",
-          "description": "An array of details about specific errors that led to this reported error.",
-          "items": {
-            "$ref": "#/definitions/Azure.Core.Foundations.Error"
-          },
-          "x-ms-identifiers": []
-        },
-        "innererror": {
-          "$ref": "#/definitions/Azure.Core.Foundations.InnerError",
-          "description": "An object containing more specific information than the current object about the error."
-        }
-      },
-      "required": [
-        "code",
-        "message"
-      ]
-    },
-    "Azure.Core.Foundations.ErrorResponse": {
-      "type": "object",
-      "description": "A response containing error details.",
-      "properties": {
-        "error": {
-          "$ref": "#/definitions/Azure.Core.Foundations.Error",
-          "description": "The error object."
-        }
-      },
-      "required": [
-        "error"
-      ]
-    },
-    "Azure.Core.Foundations.InnerError": {
-      "type": "object",
-      "description": "An object containing more specific information about the error. As per Microsoft One API guidelines - https://github.com/microsoft/api-guidelines/blob/vNext/azure/Guidelines.md#handling-errors.",
-      "properties": {
-        "code": {
-          "type": "string",
-          "description": "One of a server-defined set of error codes."
-        },
-        "innererror": {
-          "$ref": "#/definitions/Azure.Core.Foundations.InnerError",
-          "description": "Inner error."
-        }
-      }
-    },
-    "ChatChoice": {
-      "type": "object",
-      "description": "The representation of a single prompt completion as part of an overall chat completions request.\nGenerally, `n` choices are generated per provided prompt with a default value of 1.\nToken limits and other settings may limit the number of choices generated.",
-      "properties": {
-        "index": {
-          "type": "integer",
-          "format": "int32",
-          "description": "The ordered index associated with this chat completions choice."
-        },
-        "finish_reason": {
-          "$ref": "#/definitions/CompletionsFinishReason",
-          "description": "The reason that this chat completions choice completed its generated.",
-          "x-nullable": true,
-          "readOnly": true
-        },
-        "message": {
-          "$ref": "#/definitions/ChatResponseMessage",
-          "description": "The chat message for a given chat completions prompt.",
-          "readOnly": true
-        }
-      },
-      "required": [
-        "index",
-        "finish_reason",
-        "message"
-      ]
-    },
-    "ChatCompletions": {
-      "type": "object",
-      "description": "Representation of the response data from a chat completions request.\nCompletions support a wide variety of tasks and generate text that continues from or \"completes\"\nprovided prompt data.",
-      "properties": {
-        "id": {
-          "type": "string",
-          "description": "A unique identifier associated with this chat completions response."
-        },
-        "object": {
-          "type": "string",
-          "description": "The response object type, which is always `chat.completion`.",
-          "enum": [
-            "chat.completion"
-          ],
-          "x-ms-enum": {
-            "modelAsString": false
-          }
-        },
-        "created": {
-          "type": "integer",
-          "format": "unixtime",
-          "description": "The first timestamp associated with generation activity for this completions response,\nrepresented as seconds since the beginning of the Unix epoch of 00:00 on 1 Jan 1970.",
-          "readOnly": true
-        },
-        "model": {
-          "type": "string",
-          "description": "The model used for the chat completion.",
-          "readOnly": true
-        },
-        "choices": {
-          "type": "array",
-          "description": "The collection of completions choices associated with this completions response.\nGenerally, `n` choices are generated per provided prompt with a default value of 1.\nToken limits and other settings may limit the number of choices generated.",
-          "minItems": 1,
-          "items": {
-            "$ref": "#/definitions/ChatChoice"
-          },
-          "readOnly": true,
-          "x-ms-identifiers": []
-        },
-        "usage": {
-          "$ref": "#/definitions/CompletionsUsage",
-          "description": "  Usage information for tokens processed and generated as part of this completions operation.",
-          "readOnly": true
-        }
-      },
-      "required": [
-        "id",
-        "object",
-        "created",
-        "model",
-        "choices",
-        "usage"
-      ]
-    },
-    "ChatCompletionsAudio": {
-      "type": "object",
-      "description": "A representation of the audio generated by the model.",
-      "properties": {
-        "id": {
-          "type": "string",
-          "description": "  Unique identifier for the audio response. This value can be used in chat history messages instead of passing \n  the full audio object.",
-          "readOnly": true
-        },
-        "expires_at": {
-          "type": "integer",
-          "format": "unixtime",
-          "description": "The Unix timestamp (in seconds) at which the audio piece expires and can't be any longer referenced by its ID in \nmulti-turn conversations.",
-          "readOnly": true,
-          "x-ms-client-name": "expiresAt"
-        },
-        "data": {
-          "type": "string",
-          "description": "Base64 encoded audio data",
-          "readOnly": true
-        },
-        "format": {
-          "$ref": "#/definitions/AudioContentFormat",
-          "description": "The format of the audio content. If format is not provided, it will match the format used in the\ninput audio request.",
-          "readOnly": true
-        },
-        "transcript": {
-          "type": "string",
-          "description": "The transcript of the audio file.",
-          "readOnly": true
-        }
-      },
-      "required": [
-        "id",
-        "expires_at",
-        "data",
-        "transcript"
-      ]
-    },
-    "ChatCompletionsModality": {
-      "type": "string",
-      "description": "The modalities that the model is allowed to use for the chat completions response.",
-      "enum": [
-        "text",
-        "audio"
-      ],
-      "x-ms-enum": {
-        "name": "ChatCompletionsModality",
-        "modelAsString": true,
-        "values": [
-          {
-            "name": "text",
-            "value": "text",
-            "description": "The model is only allowed to generate text."
-          },
-          {
-            "name": "audio",
-            "value": "audio",
-            "description": "The model is allowed to generate audio."
-          }
-        ]
-      }
-    },
-    "ChatCompletionsNamedToolChoice": {
-      "type": "object",
-      "description": "A tool selection of a specific, named function tool that will limit chat completions to using the named function.",
-      "properties": {
-        "type": {
-          "type": "string",
-          "description": "The type of the tool. Currently, only `function` is supported.",
-          "enum": [
-            "function"
-          ],
-          "x-ms-enum": {
-            "modelAsString": false
-          }
-        },
-        "function": {
-          "$ref": "#/definitions/ChatCompletionsNamedToolChoiceFunction",
-          "description": "The function that should be called."
-        }
-      },
-      "required": [
-        "type",
-        "function"
-      ]
-    },
-    "ChatCompletionsNamedToolChoiceFunction": {
-      "type": "object",
-      "description": "A tool selection of a specific, named function tool that will limit chat completions to using the named function.",
-      "properties": {
-        "name": {
-          "type": "string",
-          "description": "The name of the function that should be called."
-        }
-      },
-      "required": [
-        "name"
-      ]
-    },
-    "ChatCompletionsOptions": {
-      "type": "object",
-      "description": "The configuration information for a chat completions request.\nCompletions support a wide variety of tasks and generate text that continues from or \"completes\"\nprovided prompt data.",
-      "properties": {
-        "messages": {
-          "type": "array",
-          "description": "The collection of context messages associated with this chat completions request.\nTypical usage begins with a chat message for the System role that provides instructions for\nthe behavior of the assistant, followed by alternating messages between the User and\nAssistant roles.",
-          "minItems": 1,
-          "items": {
-            "$ref": "#/definitions/ChatRequestMessage"
-          },
-          "x-ms-identifiers": []
-        },
-        "frequency_penalty": {
-          "type": "number",
-          "format": "float",
-          "description": "A value that influences the probability of generated tokens appearing based on their cumulative\nfrequency in generated text.\nPositive values will make tokens less likely to appear as their frequency increases and\ndecrease the likelihood of the model repeating the same statements verbatim.\nSupported range is [-2, 2].",
-          "default": 0,
-          "minimum": -2,
-          "maximum": 2
-        },
-        "stream": {
-          "type": "boolean",
-          "description": "A value indicating whether chat completions should be streamed for this request."
-        },
-        "presence_penalty": {
-          "type": "number",
-          "format": "float",
-          "description": "A value that influences the probability of generated tokens appearing based on their existing\npresence in generated text.\nPositive values will make tokens less likely to appear when they already exist and increase the\nmodel's likelihood to output new topics.\nSupported range is [-2, 2].",
-          "default": 0,
-          "minimum": -2,
-          "maximum": 2
-        },
-        "temperature": {
-          "type": "number",
-          "format": "float",
-          "description": "The sampling temperature to use that controls the apparent creativity of generated completions.\nHigher values will make output more random while lower values will make results more focused\nand deterministic.\nIt is not recommended to modify temperature and top_p for the same completions request as the\ninteraction of these two settings is difficult to predict.\nSupported range is [0, 1].",
-          "default": 0.7,
-          "minimum": 0,
-          "maximum": 1
-        },
-        "top_p": {
-          "type": "number",
-          "format": "float",
-          "description": "An alternative to sampling with temperature called nucleus sampling. This value causes the\nmodel to consider the results of tokens with the provided probability mass. As an example, a\nvalue of 0.15 will cause only the tokens comprising the top 15% of probability mass to be\nconsidered.\nIt is not recommended to modify temperature and top_p for the same completions request as the\ninteraction of these two settings is difficult to predict.\nSupported range is [0, 1].",
-          "default": 1,
-          "minimum": 0,
-          "maximum": 1
-        },
-        "max_tokens": {
-          "type": "integer",
-          "format": "int32",
-          "description": "The maximum number of tokens to generate.",
-          "minimum": 0
-        },
-        "response_format": {
-          "$ref": "#/definitions/ChatCompletionsResponseFormat",
-          "description": "An object specifying the format that the model must output.\n\nSetting to `{ \"type\": \"json_schema\", \"json_schema\": {...} }` enables Structured Outputs which ensures the model will match your supplied JSON schema.\n\nSetting to `{ \"type\": \"json_object\" }` enables JSON mode, which ensures the message the model generates is valid JSON.\n\n**Important:** when using JSON mode, you **must** also instruct the model to produce JSON yourself via a system or user message. Without this, the model may generate an unending stream of whitespace until the generation reaches the token limit, resulting in a long-running and seemingly \"stuck\" request. Also note that the message content may be partially cut off if `finish_reason=\"length\"`, which indicates the generation exceeded `max_tokens` or the conversation exceeded the max context length."
-        },
-        "stop": {
-          "type": "array",
-          "description": "A collection of textual sequences that will end completions generation.",
-          "minItems": 1,
-          "items": {
-            "type": "string"
-          }
-        },
-        "tools": {
-          "type": "array",
-          "description": "A list of tools the model may request to call. Currently, only functions are supported as a tool. The model\nmay response with a function call request and provide the input arguments in JSON format for that function.",
-          "minItems": 1,
-          "items": {
-            "$ref": "#/definitions/ChatCompletionsToolDefinition"
-          },
-          "x-ms-identifiers": []
-        },
-        "tool_choice": {
-          "description": "If specified, the model will configure which of the provided tools it can use for the chat completions response.",
-          "x-ms-client-name": "toolChoice"
-        },
-        "seed": {
-          "type": "integer",
-          "format": "int64",
-          "description": "If specified, the system will make a best effort to sample deterministically such that repeated requests with the\nsame seed and parameters should return the same result. Determinism is not guaranteed."
-        },
-        "model": {
-          "type": "string",
-          "description": "ID of the specific AI model to use, if more than one model is available on the endpoint."
-        },
-        "modalities": {
-          "type": "array",
-          "description": "The modalities that the model is allowed to use for the chat completions response. The default modality\nis `text`. Indicating an unsupported modality combination results in an 422 error.",
-          "items": {
-            "$ref": "#/definitions/ChatCompletionsModality"
-          }
-        },
-        "user_security_context": {
-          "$ref": "#/definitions/UserSecurityContext",
-          "description": "User security context contains several parameters that describe the AI application itself, and the end user that interacts with the AI application. \nThese fields assist your security operations teams to investigate and mitigate security incidents by providing a comprehensive approach to protecting your AI applications. \n[Learn more](https://aka.ms/TP4AI/Documentation/EndUserContext) about protecting AI applications using Microsoft Defender for Cloud."
-        }
-      },
-      "required": [
-        "messages"
-      ],
-      "additionalProperties": {}
-    },
-    "ChatCompletionsResponseFormat": {
-      "type": "object",
-      "description": "Represents the format that the model must output. Use this to enable JSON mode instead of the default text mode.\nNote that to enable JSON mode, some AI models may also require you to instruct the model to produce JSON\nvia a system or user message.",
-      "properties": {
-        "type": {
-          "type": "string",
-          "description": "The response format type to use for chat completions."
-        }
-      },
-      "discriminator": "type",
-      "required": [
-        "type"
-      ]
-    },
-    "ChatCompletionsResponseFormatJsonObject": {
-      "type": "object",
-      "description": "A response format for Chat Completions that restricts responses to emitting valid JSON objects.\nNote that to enable JSON mode, some AI models may also require you to instruct the model to produce JSON\nvia a system or user message.",
-      "allOf": [
-        {
-          "$ref": "#/definitions/ChatCompletionsResponseFormat"
-        }
-      ],
-      "x-ms-discriminator-value": "json_object"
-    },
-    "ChatCompletionsResponseFormatJsonSchema": {
-      "type": "object",
-      "description": "A response format for Chat Completions that restricts responses to emitting valid JSON objects, with a\nJSON schema specified by the caller.",
-      "properties": {
-        "json_schema": {
-          "$ref": "#/definitions/ChatCompletionsResponseFormatJsonSchemaDefinition",
-          "description": "The definition of the required JSON schema in the response, and associated metadata."
-        }
-      },
-      "required": [
-        "json_schema"
-      ],
-      "allOf": [
-        {
-          "$ref": "#/definitions/ChatCompletionsResponseFormat"
-        }
-      ],
-      "x-ms-discriminator-value": "json_schema"
-    },
-    "ChatCompletionsResponseFormatJsonSchemaDefinition": {
-      "type": "object",
-      "description": "The definition of the required JSON schema in the response, and associated metadata.",
-      "properties": {
-        "name": {
-          "type": "string",
-          "description": "The name of the response format. Must be a-z, A-Z, 0-9, or contain underscores and dashes, with a maximum length of 64."
-        },
-        "schema": {
-          "type": "object",
-          "description": "The definition of the JSON schema",
-          "additionalProperties": {}
-        },
-        "description": {
-          "type": "string",
-          "description": "A description of the response format, used by the AI model to determine how to generate responses in this format."
-        },
-        "strict": {
-          "type": "boolean",
-          "description": "Whether to enable strict schema adherence when generating the output.\nIf set to true, the model will always follow the exact schema defined in the `schema` field. Only a subset of\nJSON Schema is supported when `strict` is `true`.",
-          "default": false
-        }
-      },
-      "required": [
-        "name",
-        "schema"
-      ]
-    },
-    "ChatCompletionsResponseFormatText": {
-      "type": "object",
-      "description": "A response format for Chat Completions that emits text responses. This is the default response format.",
-      "allOf": [
-        {
-          "$ref": "#/definitions/ChatCompletionsResponseFormat"
-        }
-      ],
-      "x-ms-discriminator-value": "text"
-    },
-    "ChatCompletionsToolCall": {
-      "type": "object",
-      "description": "A function tool call requested by the AI model.",
-      "properties": {
-        "id": {
-          "type": "string",
-          "description": "The ID of the tool call."
-        },
-        "type": {
-          "type": "string",
-          "description": "The type of tool call. Currently, only `function` is supported.",
-          "enum": [
-            "function"
-          ],
-          "x-ms-enum": {
-            "modelAsString": false
-          }
-        },
-        "function": {
-          "$ref": "#/definitions/FunctionCall",
-          "description": "The details of the function call requested by the AI model."
-        }
-      },
-      "required": [
-        "id",
-        "type",
-        "function"
-      ]
-    },
-    "ChatCompletionsToolChoicePreset": {
-      "type": "string",
-      "description": "Represents a generic policy for how a chat completions tool may be selected.",
-      "enum": [
-        "auto",
-        "none",
-        "required"
-      ],
-      "x-ms-enum": {
-        "name": "ChatCompletionsToolChoicePreset",
-        "modelAsString": true,
-        "values": [
-          {
-            "name": "auto",
-            "value": "auto",
-            "description": "Specifies that the model may either use any of the tools provided in this chat completions request or\ninstead return a standard chat completions response as if no tools were provided."
-          },
-          {
-            "name": "none",
-            "value": "none",
-            "description": "Specifies that the model should not respond with a tool call and should instead provide a standard chat\ncompletions response. Response content may still be influenced by the provided tool definitions."
-          },
-          {
-            "name": "required",
-            "value": "required",
-            "description": "Specifies that the model should respond with a call to one or more tools."
-          }
-        ]
-      }
-    },
-    "ChatCompletionsToolDefinition": {
-      "type": "object",
-      "description": "The definition of a chat completions tool that can call a function.",
-      "properties": {
-        "type": {
-          "type": "string",
-          "description": "The type of the tool. Currently, only `function` is supported.",
-          "enum": [
-            "function"
-          ],
-          "x-ms-enum": {
-            "modelAsString": false
-          }
-        },
-        "function": {
-          "$ref": "#/definitions/FunctionDefinition",
-          "description": "The function definition details for the function tool."
-        }
-      },
-      "required": [
-        "type",
-        "function"
-      ]
-    },
-    "ChatMessageAudioContentItem": {
-      "type": "object",
-      "description": "A structured chat content item containing an audio reference.",
-      "properties": {
-        "audio_url": {
-          "$ref": "#/definitions/ChatMessageAudioUrl",
-          "description": "An internet location, which must be accessible to the model, from which the audio may be retrieved.",
-          "x-ms-client-name": "audioUrl"
-        }
-      },
-      "required": [
-        "audio_url"
-      ],
-      "allOf": [
-        {
-          "$ref": "#/definitions/ChatMessageContentItem"
-        }
-      ],
-      "x-ms-discriminator-value": "audio_url"
-    },
-    "ChatMessageAudioUrl": {
-      "type": "object",
-      "description": "An internet location from which the model may retrieve an audio.",
-      "properties": {
-        "url": {
-          "type": "string",
-          "description": "The URL of the audio."
-        }
-      },
-      "required": [
-        "url"
-      ]
-    },
-    "ChatMessageContentItem": {
-      "type": "object",
-      "description": "An abstract representation of a structured content item within a chat message.",
-      "properties": {
-        "type": {
-          "type": "string",
-          "description": "The discriminated object type."
-        }
-      },
-      "discriminator": "type",
-      "required": [
-        "type"
-      ]
-    },
-    "ChatMessageImageContentItem": {
-      "type": "object",
-      "description": "A structured chat content item containing an image reference.",
-      "properties": {
-        "image_url": {
-          "$ref": "#/definitions/ChatMessageImageUrl",
-          "description": "An internet location, which must be accessible to the model,from which the image may be retrieved.",
-          "x-ms-client-name": "imageUrl"
-        }
-      },
-      "required": [
-        "image_url"
-      ],
-      "allOf": [
-        {
-          "$ref": "#/definitions/ChatMessageContentItem"
-        }
-      ],
-      "x-ms-discriminator-value": "image_url"
-    },
-    "ChatMessageImageDetailLevel": {
-      "type": "string",
-      "description": "A representation of the possible image detail levels for image-based chat completions message content.",
-      "enum": [
-        "auto",
-        "low",
-        "high"
-      ],
-      "x-ms-enum": {
-        "name": "ChatMessageImageDetailLevel",
-        "modelAsString": true,
-        "values": [
-          {
-            "name": "auto",
-            "value": "auto",
-            "description": "Specifies that the model should determine which detail level to apply using heuristics like image size."
-          },
-          {
-            "name": "low",
-            "value": "low",
-            "description": "Specifies that image evaluation should be constrained to the 'low-res' model that may be faster and consume fewer\ntokens but may also be less accurate for highly detailed images."
-          },
-          {
-            "name": "high",
-            "value": "high",
-            "description": "Specifies that image evaluation should enable the 'high-res' model that may be more accurate for highly detailed\nimages but may also be slower and consume more tokens."
-          }
-        ]
-      }
-    },
-    "ChatMessageImageUrl": {
-      "type": "object",
-      "description": "An internet location from which the model may retrieve an image.",
-      "properties": {
-        "url": {
-          "type": "string",
-          "description": "The URL of the image."
-        },
-        "detail": {
-          "$ref": "#/definitions/ChatMessageImageDetailLevel",
-          "description": "The evaluation quality setting to use, which controls relative prioritization of speed, token consumption, and\naccuracy."
-        }
-      },
-      "required": [
-        "url"
-      ]
-    },
-    "ChatMessageInputAudio": {
-      "type": "object",
-      "description": "The details of an audio chat message content part.",
-      "properties": {
-        "data": {
-          "type": "string",
-          "description": "Base64 encoded audio data"
-        },
-        "format": {
-          "$ref": "#/definitions/AudioContentFormat",
-          "description": "The audio format of the audio content."
-        }
-      },
-      "required": [
-        "data",
-        "format"
-      ]
-    },
-    "ChatMessageInputAudioContentItem": {
-      "type": "object",
-      "description": "A structured chat content item containing an audio content.",
-      "properties": {
-        "format": {
-          "$ref": "#/definitions/AudioContentFormat",
-          "description": "The audio format of the audio reference."
-        }
-      },
-      "required": [
-        "format"
-      ],
-      "allOf": [
-        {
-          "$ref": "#/definitions/ChatMessageContentItem"
-        }
-      ],
-      "x-ms-discriminator-value": "input_audio"
-    },
-    "ChatMessageTextContentItem": {
-      "type": "object",
-      "description": "A structured chat content item containing plain text.",
-      "properties": {
-        "text": {
-          "type": "string",
-          "description": "The content of the message."
-        }
-      },
-      "required": [
-        "text"
-      ],
-      "allOf": [
-        {
-          "$ref": "#/definitions/ChatMessageContentItem"
-        }
-      ],
-      "x-ms-discriminator-value": "text"
-    },
-    "ChatRequestAssistantMessage": {
-      "type": "object",
-      "description": "A request chat message representing response or action from the assistant.",
-      "properties": {
-        "content": {
-          "type": "string",
-          "description": "The content of the message."
-        },
-        "tool_calls": {
-          "type": "array",
-          "description": "The tool calls that must be resolved and have their outputs appended to subsequent input messages for the chat\ncompletions request to resolve as configured.",
-          "items": {
-            "$ref": "#/definitions/ChatCompletionsToolCall"
-          },
-          "x-ms-client-name": "toolCalls"
-        },
-        "audio": {
-          "$ref": "#/definitions/ChatRequestAudioReference",
-          "description": "  The audio generated by a previous response in a multi-turn conversation."
-        }
-      },
-      "allOf": [
-        {
-          "$ref": "#/definitions/ChatRequestMessage"
-        }
-      ],
-      "x-ms-discriminator-value": "assistant"
-    },
-    "ChatRequestAudioReference": {
-      "type": "object",
-      "description": "A reference to an audio response generated by the model.",
-      "properties": {
-        "id": {
-          "type": "string",
-          "description": "  Unique identifier for the audio response. This value corresponds to the id of a previous audio completion."
-        }
-      },
-      "required": [
-        "id"
-      ]
-    },
-    "ChatRequestMessage": {
-      "type": "object",
-      "description": "An abstract representation of a chat message as provided in a request.",
-      "properties": {
-        "role": {
-          "$ref": "#/definitions/ChatRole",
-          "description": "The chat role associated with this message."
-        }
-      },
-      "discriminator": "role",
-      "required": [
-        "role"
-      ]
-    },
-    "ChatRequestSystemMessage": {
-      "type": "object",
-      "description": "A request chat message containing system instructions that influence how the model will generate a chat completions\nresponse.",
-      "properties": {
-        "content": {
-          "type": "string",
-          "description": "The contents of the system message."
-        }
-      },
-      "required": [
-        "content"
-      ],
-      "allOf": [
-        {
-          "$ref": "#/definitions/ChatRequestMessage"
-        }
-      ],
-      "x-ms-discriminator-value": "system"
-    },
-    "ChatRequestToolMessage": {
-      "type": "object",
-      "description": "A request chat message representing requested output from a configured tool.",
-      "properties": {
-        "content": {
-          "type": "string",
-          "description": "The content of the message."
-        },
-        "tool_call_id": {
-          "type": "string",
-          "description": "The ID of the tool call resolved by the provided content.",
-          "x-ms-client-name": "toolCallId"
-        }
-      },
-      "required": [
-        "tool_call_id"
-      ],
-      "allOf": [
-        {
-          "$ref": "#/definitions/ChatRequestMessage"
-        }
-      ],
-      "x-ms-discriminator-value": "tool"
-    },
-    "ChatRequestUserMessage": {
-      "type": "object",
-      "description": "A request chat message representing user input to the assistant.",
-      "properties": {
-        "content": {
-          "description": "The contents of the user message, with available input types varying by selected model."
-        }
-      },
-      "required": [
-        "content"
-      ],
-      "allOf": [
-        {
-          "$ref": "#/definitions/ChatRequestMessage"
-        }
-      ],
-      "x-ms-discriminator-value": "user"
-    },
-    "ChatResponseMessage": {
-      "type": "object",
-      "description": "A representation of a chat message as received in a response.",
-      "properties": {
-        "role": {
-          "$ref": "#/definitions/ChatRole",
-          "description": "The chat role associated with the message.",
-          "readOnly": true
-        },
-        "content": {
-          "type": "string",
-          "description": "The content of the message.",
-          "x-nullable": true,
-          "readOnly": true
-        },
-        "reasoning_content": {
-          "type": "string",
-          "description": "The reasoning content the model used for generating the response",
-          "readOnly": true
-        },
-        "tool_calls": {
-          "type": "array",
-          "description": "The tool calls that must be resolved and have their outputs appended to subsequent input messages for the chat\ncompletions request to resolve as configured.",
-          "items": {
-            "$ref": "#/definitions/ChatCompletionsToolCall"
-          },
-          "readOnly": true,
-          "x-ms-client-name": "toolCalls"
-        },
-        "audio": {
-          "$ref": "#/definitions/ChatCompletionsAudio",
-          "description": "  The audio generated by the model as a response to the messages if the model is configured to generate audio.",
-          "readOnly": true
-        }
-      },
-      "required": [
-        "role",
-        "content"
-      ]
-    },
-    "ChatRole": {
-      "type": "string",
-      "description": "A description of the intended purpose of a message within a chat completions interaction.",
-      "enum": [
-        "system",
-        "developer",
-        "user",
-        "assistant",
-        "tool"
-      ],
-      "x-ms-enum": {
-        "name": "ChatRole",
-        "modelAsString": true,
-        "values": [
-          {
-            "name": "system",
-            "value": "system",
-            "description": "The role that instructs or sets the behavior of the assistant."
-          },
-          {
-            "name": "developer",
-            "value": "developer",
-            "description": "The role that provides instructions to the model prioritized ahead of user messages."
-          },
-          {
-            "name": "user",
-            "value": "user",
-            "description": "The role that provides input for chat completions."
-          },
-          {
-            "name": "assistant",
-            "value": "assistant",
-            "description": "The role that provides responses to system-instructed, user-prompted input."
-          },
-          {
-            "name": "tool",
-            "value": "tool",
-            "description": "The role that represents extension tool activity within a chat completions operation."
-          }
-        ]
-      }
-    },
-    "CompletionsFinishReason": {
-      "type": "string",
-      "description": "Representation of the manner in which a completions response concluded.",
-      "enum": [
-        "stop",
-        "length",
-        "content_filter",
-        "tool_calls"
-      ],
-      "x-ms-enum": {
-        "name": "CompletionsFinishReason",
-        "modelAsString": true,
-        "values": [
-          {
-            "name": "stopped",
-            "value": "stop",
-            "description": "Completions ended normally and reached its end of token generation."
-          },
-          {
-            "name": "tokenLimitReached",
-            "value": "length",
-            "description": "Completions exhausted available token limits before generation could complete."
-          },
-          {
-            "name": "contentFiltered",
-            "value": "content_filter",
-            "description": "Completions generated a response that was identified as potentially sensitive per content\nmoderation policies."
-          },
-          {
-            "name": "toolCalls",
-            "value": "tool_calls",
-            "description": "Completion ended with the model calling a provided tool for output."
-          }
-        ]
-      }
-    },
-    "CompletionsUsage": {
-      "type": "object",
-      "description": "Representation of the token counts processed for a completions request.\nCounts consider all tokens across prompts, choices, choice alternates, best_of generations, and\nother consumers.",
-      "properties": {
-        "completion_tokens": {
-          "type": "integer",
-          "format": "int32",
-          "description": "The number of tokens generated across all completions emissions.",
-          "readOnly": true
-        },
-        "prompt_tokens": {
-          "type": "integer",
-          "format": "int32",
-          "description": "The number of tokens in the provided prompts for the completions request.",
-          "readOnly": true
-        },
-        "total_tokens": {
-          "type": "integer",
-          "format": "int32",
-          "description": "The total number of tokens processed for the completions request and response.",
-          "readOnly": true
-        },
-        "completion_tokens_details": {
-          "$ref": "#/definitions/CompletionsUsageDetails",
-          "description": "Breakdown of tokens used in a completion.",
-          "readOnly": true
-        },
-        "prompt_tokens_details": {
-          "$ref": "#/definitions/PromptUsageDetails",
-          "description": "Breakdown of tokens used in the prompt/chat history.",
-          "readOnly": true
-        }
-      },
-      "required": [
-        "completion_tokens",
-        "prompt_tokens",
-        "total_tokens"
-      ]
-    },
-    "CompletionsUsageDetails": {
-      "type": "object",
-      "description": "A breakdown of tokens used in a completion.",
-      "properties": {
-        "audio_tokens": {
-          "type": "integer",
-          "format": "int32",
-          "description": "The number of tokens corresponding to audio input.",
-          "readOnly": true
-        },
-        "reasoning_tokens": {
-          "type": "integer",
-          "format": "int32",
-          "description": "The number of tokens corresponding to reasoning.",
-          "readOnly": true
-        },
-        "total_tokens": {
-          "type": "integer",
-          "format": "int32",
-          "description": "The total number of tokens processed for the completions request and response.",
-          "readOnly": true
-        }
-      },
-      "required": [
-        "audio_tokens",
-        "reasoning_tokens",
-        "total_tokens"
-      ]
-    },
-    "EmbeddingEncodingFormat": {
-      "type": "string",
-      "description": "Specifies the types of embeddings to generate. Compressed embeddings types like `uint8`, `int8`, `ubinary` and \n`binary`, may reduce storage costs without sacrificing the integrity of the data. Returns a 422 error if the\nmodel doesn't support the value or parameter. Read the model's documentation to know the values supported by\nthe your model.",
-      "enum": [
-        "base64",
-        "binary",
-        "float",
-        "int8",
-        "ubinary",
-        "uint8"
-      ],
-      "x-ms-enum": {
-        "name": "EmbeddingEncodingFormat",
-        "modelAsString": true,
-        "values": [
-          {
-            "name": "base64",
-            "value": "base64",
-            "description": "Get back binary representation of the embeddings encoded as Base64 string. OpenAI Python library retrieves \nembeddings from the API as encoded binary data, rather than using intermediate decimal representations as is \nusually done."
-          },
-          {
-            "name": "binary",
-            "value": "binary",
-            "description": "Get back signed binary embeddings"
-          },
-          {
-            "name": "float",
-            "value": "float",
-            "description": "Get back full precision embeddings"
-          },
-          {
-            "name": "int8",
-            "value": "int8",
-            "description": "Get back signed int8 embeddings"
-          },
-          {
-            "name": "ubinary",
-            "value": "ubinary",
-            "description": "Get back unsigned binary embeddings"
-          },
-          {
-            "name": "uint8",
-            "value": "uint8",
-            "description": "Get back unsigned int8 embeddings"
-          }
-        ]
-      }
-    },
-    "EmbeddingInputType": {
-      "type": "string",
-      "description": "Represents the input types used for embedding search.",
-      "enum": [
-        "text",
-        "query",
-        "document"
-      ],
-      "x-ms-enum": {
-        "name": "EmbeddingInputType",
-        "modelAsString": true,
-        "values": [
-          {
-            "name": "text",
-            "value": "text",
-            "description": "Indicates the input is a general text input."
-          },
-          {
-            "name": "query",
-            "value": "query",
-            "description": "Indicates the input represents a search query to find the most relevant documents in your vector database."
-          },
-          {
-            "name": "document",
-            "value": "document",
-            "description": "Indicates the input represents a document that is stored in a vector database."
-          }
-        ]
-      }
-    },
-    "EmbeddingItem": {
-      "type": "object",
-      "description": "Representation of a single embeddings relatedness comparison.",
-      "properties": {
-        "embedding": {
-          "type": "array",
-          "description": "List of embedding values for the input prompt. These represent a measurement of the\nvector-based relatedness of the provided input. Or a base64 encoded string of the embedding vector.",
-          "items": {
-            "type": "number",
-            "format": "float"
-          },
-          "readOnly": true
-        },
-        "index": {
-          "type": "integer",
-          "format": "int32",
-          "description": "Index of the prompt to which the EmbeddingItem corresponds.",
-          "readOnly": true
-        },
-        "object": {
-          "type": "string",
-          "description": "The object type of this embeddings item. Will always be `embedding`.",
-          "enum": [
-            "embedding"
-          ],
-          "x-ms-enum": {
-            "modelAsString": false
-          }
-        }
-      },
-      "required": [
-        "embedding",
-        "index",
-        "object"
-      ]
-    },
-    "EmbeddingsOptions": {
-      "type": "object",
-      "description": "The configuration information for an embeddings request.",
-      "properties": {
-        "input": {
-          "type": "array",
-          "description": "Input text to embed, encoded as a string or array of tokens.\nTo embed multiple inputs in a single request, pass an array\nof strings or array of token arrays.",
-          "items": {
-            "type": "string"
-          }
-        },
-        "dimensions": {
-          "type": "integer",
-          "format": "int32",
-          "description": "Optional. The number of dimensions the resulting output embeddings should have.\nPassing null causes the model to use its default value.\nReturns a 422 error if the model doesn't support the value or parameter."
-        },
-        "encoding_format": {
-          "$ref": "#/definitions/EmbeddingEncodingFormat",
-          "description": "Optional. The desired format for the returned embeddings."
-        },
-        "input_type": {
-          "$ref": "#/definitions/EmbeddingInputType",
-          "description": "Optional. The type of the input.\nReturns a 422 error if the model doesn't support the value or parameter."
-        },
-        "model": {
-          "type": "string",
-          "description": "ID of the specific AI model to use, if more than one model is available on the endpoint."
-        }
-      },
-      "required": [
-        "input"
-      ],
-      "additionalProperties": {}
-    },
-    "EmbeddingsResult": {
-      "type": "object",
-      "description": "Representation of the response data from an embeddings request.\nEmbeddings measure the relatedness of text strings and are commonly used for search, clustering,\nrecommendations, and other similar scenarios.",
-      "properties": {
-        "id": {
-          "type": "string",
-          "description": "Unique identifier for the embeddings result.",
-          "readOnly": true
-        },
-        "data": {
-          "type": "array",
-          "description": "Embedding values for the prompts submitted in the request.",
-          "items": {
-            "$ref": "#/definitions/EmbeddingItem"
-          },
-          "readOnly": true,
-          "x-ms-identifiers": []
-        },
-        "usage": {
-          "$ref": "#/definitions/EmbeddingsUsage",
-          "description": "Usage counts for tokens input using the embeddings API.",
-          "readOnly": true
-        },
-        "object": {
-          "type": "string",
-          "description": "The object type of the embeddings result. Will always be `list`.",
-          "enum": [
-            "list"
-          ],
-          "x-ms-enum": {
-            "modelAsString": false
-          }
-        },
-        "model": {
-          "type": "string",
-          "description": "The model ID used to generate this result.",
-          "readOnly": true
-        }
-      },
-      "required": [
-        "id",
-        "data",
-        "usage",
-        "object",
-        "model"
-      ]
-    },
-    "EmbeddingsUsage": {
-      "type": "object",
-      "description": "Measurement of the amount of tokens used in this request and response.",
-      "properties": {
-        "prompt_tokens": {
-          "type": "integer",
-          "format": "int32",
-          "description": "Number of tokens in the request.",
-          "readOnly": true
-        },
-        "total_tokens": {
-          "type": "integer",
-          "format": "int32",
-          "description": "Total number of tokens transacted in this request/response. Should equal the\nnumber of tokens in the request.",
-          "readOnly": true
-        }
-      },
-      "required": [
-        "prompt_tokens",
-        "total_tokens"
-      ]
-    },
-    "ExtraParameters": {
-      "type": "string",
-      "description": "Controls what happens if extra parameters, undefined by the REST API, are passed in the JSON request payload.",
-      "enum": [
-        "error",
-        "drop",
-        "pass-through"
-      ],
-      "x-ms-enum": {
-        "name": "ExtraParameters",
-        "modelAsString": true,
-        "values": [
-          {
-            "name": "error",
-            "value": "error",
-            "description": "The service will error if it detected extra parameters in the request payload. This is the service default."
-          },
-          {
-            "name": "drop",
-            "value": "drop",
-            "description": "The service will ignore (drop) extra parameters in the request payload. It will only pass the known parameters to the back-end AI model."
-          },
-          {
-            "name": "pass_through",
-            "value": "pass-through",
-            "description": "The service will pass extra parameters to the back-end AI model."
-          }
-        ]
-      }
-    },
-    "FunctionCall": {
-      "type": "object",
-      "description": "The name and arguments of a function that should be called, as generated by the model.",
-      "properties": {
-        "name": {
-          "type": "string",
-          "description": "The name of the function to call.",
-          "readOnly": true
-        },
-        "arguments": {
-          "type": "string",
-          "description": "The arguments to call the function with, as generated by the model in JSON format.\nNote that the model does not always generate valid JSON, and may hallucinate parameters\nnot defined by your function schema. Validate the arguments in your code before calling\nyour function.",
-          "readOnly": true
-        }
-      },
-      "required": [
-        "name",
-        "arguments"
-      ]
-    },
-    "FunctionDefinition": {
-      "type": "object",
-      "description": "The definition of a caller-specified function that chat completions may invoke in response to matching user input.",
-      "properties": {
-        "name": {
-          "type": "string",
-          "description": "The name of the function to be called."
-        },
-        "description": {
-          "type": "string",
-          "description": "A description of what the function does. The model will use this description when selecting the function and\ninterpreting its parameters."
-        },
-        "parameters": {
-          "type": "object",
-          "description": "The parameters the function accepts, described as a JSON Schema object.",
-          "additionalProperties": {}
-        }
-      },
-      "required": [
-        "name"
-      ]
-    },
-    "ImageEmbeddingInput": {
-      "type": "object",
-      "description": "Represents an image with optional text.",
-      "properties": {
-        "image": {
-          "type": "string",
-          "description": "The input image encoded in base64 string as a data URL. Example: `data:image/{format};base64,{data}`."
-        },
-        "text": {
-          "type": "string",
-          "description": "Optional. The text input to feed into the model (like DINO, CLIP).\nReturns a 422 error if the model doesn't support the value or parameter."
-        }
-      },
-      "required": [
-        "image"
-      ]
-    },
-    "ImageEmbeddingsOptions": {
-      "type": "object",
-      "description": "The configuration information for an image embeddings request.",
-      "properties": {
-        "input": {
-          "type": "array",
-          "description": "Input image to embed. To embed multiple inputs in a single request, pass an array.\nThe input must not exceed the max input tokens for the model.",
-          "items": {
-            "$ref": "#/definitions/ImageEmbeddingInput"
-          },
-          "x-ms-identifiers": []
-        },
-        "dimensions": {
-          "type": "integer",
-          "format": "int32",
-          "description": "Optional. The number of dimensions the resulting output embeddings should have.\nPassing null causes the model to use its default value.\nReturns a 422 error if the model doesn't support the value or parameter."
-        },
-        "encoding_format": {
-          "$ref": "#/definitions/EmbeddingEncodingFormat",
-          "description": "Optional. The number of dimensions the resulting output embeddings should have.\nPassing null causes the model to use its default value.\nReturns a 422 error if the model doesn't support the value or parameter."
-        },
-        "input_type": {
-          "$ref": "#/definitions/EmbeddingInputType",
-          "description": "Optional. The type of the input.\nReturns a 422 error if the model doesn't support the value or parameter."
-        },
-        "model": {
-          "type": "string",
-          "description": "ID of the specific AI model to use, if more than one model is available on the endpoint."
-        }
-      },
-      "required": [
-        "input"
-      ],
-      "additionalProperties": {}
-    },
-    "ModelInfo": {
-      "type": "object",
-      "description": "Represents some basic information about the AI model.",
-      "properties": {
-        "model_name": {
-          "type": "string",
-          "description": "The name of the AI model. For example: `Phi21`",
-          "readOnly": true
-        },
-        "model_type": {
-          "$ref": "#/definitions/ModelType",
-          "description": "The type of the AI model. A Unique identifier for the profile.",
-          "readOnly": true
-        },
-        "model_provider_name": {
-          "type": "string",
-          "description": "The model provider name. For example: `Microsoft`",
-          "readOnly": true
-        }
-      },
-      "required": [
-        "model_name",
-        "model_type",
-        "model_provider_name"
-      ]
-    },
-    "ModelType": {
-      "type": "string",
-      "description": "The type of AI model",
-      "enum": [
-        "embeddings",
-        "chat-completion"
-      ],
-      "x-ms-enum": {
-        "name": "ModelType",
-        "modelAsString": true,
-        "values": [
-          {
-            "name": "embeddings",
-            "value": "embeddings",
-            "description": "A model capable of generating embeddings from a text"
-          },
-          {
-            "name": "chat_completion",
-            "value": "chat-completion",
-            "description": "A model capable of taking chat-formatted messages and generate responses"
-          }
-        ]
-      }
-    },
-    "PromptUsageDetails": {
-      "type": "object",
-      "description": "A breakdown of tokens used in the prompt/chat history.",
-      "properties": {
-        "audio_tokens": {
-          "type": "integer",
-          "format": "int32",
-          "description": "The number of tokens corresponding to audio input.",
-          "readOnly": true
-        },
-        "cached_tokens": {
-          "type": "integer",
-          "format": "int32",
-          "description": "The total number of tokens cached.",
-          "readOnly": true
-        }
-      },
-      "required": [
-        "audio_tokens",
-        "cached_tokens"
-      ]
-    },
-    "StreamingChatChoiceUpdate": {
-      "type": "object",
-      "description": "Represents an update to a single prompt completion when the service is streaming updates \nusing Server Sent Events (SSE).\nGenerally, `n` choices are generated per provided prompt with a default value of 1.\nToken limits and other settings may limit the number of choices generated.",
-      "properties": {
-        "index": {
-          "type": "integer",
-          "format": "int32",
-          "description": "The ordered index associated with this chat completions choice."
-        },
-        "finish_reason": {
-          "$ref": "#/definitions/CompletionsFinishReason",
-          "description": "The reason that this chat completions choice completed its generated.",
-          "x-nullable": true,
-          "readOnly": true
-        },
-        "delta": {
-          "$ref": "#/definitions/StreamingChatResponseMessageUpdate",
-          "description": "An update to the chat message for a given chat completions prompt.",
-          "readOnly": true
-        }
-      },
-      "required": [
-        "index",
-        "finish_reason",
-        "delta"
-      ]
-    },
-    "StreamingChatCompletionsUpdate": {
-      "type": "object",
-      "description": "Represents a response update to a chat completions request, when the service is streaming updates \nusing Server Sent Events (SSE).\nCompletions support a wide variety of tasks and generate text that continues from or \"completes\"\nprovided prompt data.",
-      "properties": {
-        "id": {
-          "type": "string",
-          "description": "A unique identifier associated with this chat completions response."
-        },
-        "object": {
-          "type": "string",
-          "description": "The response object type, which is always `chat.completion`.",
-          "enum": [
-            "chat.completion"
-          ],
-          "x-ms-enum": {
-            "modelAsString": false
-          }
-        },
-        "created": {
-          "type": "integer",
-          "format": "unixtime",
-          "description": "The first timestamp associated with generation activity for this completions response,\nrepresented as seconds since the beginning of the Unix epoch of 00:00 on 1 Jan 1970.",
-          "readOnly": true
-        },
-        "model": {
-          "type": "string",
-          "description": "The model used for the chat completion.",
-          "readOnly": true
-        },
-        "choices": {
-          "type": "array",
-          "description": "An update to the collection of completion choices associated with this completions response.\nGenerally, `n` choices are generated per provided prompt with a default value of 1.\nToken limits and other settings may limit the number of choices generated.",
-          "minItems": 1,
-          "items": {
-            "$ref": "#/definitions/StreamingChatChoiceUpdate"
-          },
-          "readOnly": true,
-          "x-ms-identifiers": []
-        },
-        "usage": {
-          "$ref": "#/definitions/CompletionsUsage",
-          "description": "Usage information for tokens processed and generated as part of this completions operation.",
-          "readOnly": true
-        }
-      },
-      "required": [
-        "id",
-        "object",
-        "created",
-        "model",
-        "choices"
-      ]
-    },
-    "StreamingChatResponseMessageUpdate": {
-      "type": "object",
-      "description": "A representation of a chat message update as received in a streaming response.",
-      "properties": {
-        "role": {
-          "$ref": "#/definitions/ChatRole",
-          "description": "The chat role associated with the message. If present, should always be 'assistant'",
-          "readOnly": true
-        },
-        "content": {
-          "type": "string",
-          "description": "The content of the message.",
-          "readOnly": true
-        },
-        "reasoning_content": {
-          "type": "string",
-          "description": "The reasoning content the model used for generating the response",
-          "readOnly": true
-        },
-        "tool_calls": {
-          "type": "array",
-          "description": "The tool calls that must be resolved and have their outputs appended to subsequent input messages for the chat\ncompletions request to resolve as configured.",
-          "items": {
-            "$ref": "#/definitions/StreamingChatResponseToolCallUpdate"
-          },
-          "readOnly": true,
-          "x-ms-client-name": "toolCalls"
-        }
-      }
-    },
-    "StreamingChatResponseToolCallUpdate": {
-      "type": "object",
-      "description": "An update to the function tool call information requested by the AI model.",
-      "properties": {
-        "id": {
-          "type": "string",
-          "description": "The ID of the tool call.",
-          "readOnly": true
-        },
-        "function": {
-          "$ref": "#/definitions/FunctionCall",
-          "description": "Updates to the function call requested by the AI model.",
-          "readOnly": true
-        }
-      },
-      "required": [
-        "id",
-        "function"
-      ]
-    },
-    "UserSecurityContext": {
-      "type": "object",
-      "description": "User security context contains several parameters that describe the AI application itself, and the end user that interacts with the AI application. \nThese fields assist your security operations teams to investigate and mitigate security incidents by providing a comprehensive approach to protecting your AI applications. \n[Learn more](https://aka.ms/TP4AI/Documentation/EndUserContext) about protecting AI applications using Microsoft Defender for Cloud.",
-      "properties": {
-        "application_name": {
-          "type": "string",
-          "description": "The name of the application. Sensitive personal information should not be included in this field.",
-          "maxLength": 100
-        },
-        "end_user_id": {
-          "type": "string",
-          "description": "This identifier is the Microsoft Entra ID (formerly Azure Active Directory) user object ID used to authenticate end-users within the generative AI application. Sensitive personal information should not be included in this field.",
-          "minLength": 36,
-          "maxLength": 36,
-          "pattern": "^[0-9a-fA-F]{8}-[0-9a-fA-F]{4}-[0-9a-fA-F]{4}-[0-9a-fA-F]{4}-[0-9a-fA-F]{12}$"
-        },
-        "end_user_tenant_id": {
-          "type": "string",
-          "description": "The Microsoft 365 tenant ID the end user belongs to. It's required when the generative AI application is multi tenant.",
-          "minLength": 36,
-          "maxLength": 36,
-          "pattern": "^[0-9a-fA-F]{8}-[0-9a-fA-F]{4}-[0-9a-fA-F]{4}-[0-9a-fA-F]{4}-[0-9a-fA-F]{12}$"
-        },
-        "source_ip": {
-          "type": "string",
-          "description": "Captures the original client's IP address, accepting both IPv4 and IPv6 formats.",
-          "minLength": 2,
-          "maxLength": 45
-        }
-      }
-    }
-  },
-  "parameters": {
-    "Azure.Core.Foundations.ApiVersionParameter": {
-      "name": "api-version",
-      "in": "query",
-      "description": "The API version to use for this operation.",
-      "required": true,
-      "type": "string",
-      "minLength": 1,
-      "x-ms-parameter-location": "method",
-      "x-ms-client-name": "apiVersion"
-    }
-  }
-}