diff --git a/.vscode/cspell.json b/.vscode/cspell.json
index 800eaef4b22c..490349022c7a 100644
--- a/.vscode/cspell.json
+++ b/.vscode/cspell.json
@@ -330,6 +330,7 @@
     "Mockito",
     "Mordor",
     "mosca",
+    "mpga",
     "msal",
     "msix",
     "MSRC",
diff --git a/eng/code-quality-reports/src/main/resources/spotbugs/spotbugs-exclude.xml b/eng/code-quality-reports/src/main/resources/spotbugs/spotbugs-exclude.xml
index 4fd5b0cf37d3..3586a122fa77 100644
--- a/eng/code-quality-reports/src/main/resources/spotbugs/spotbugs-exclude.xml
+++ b/eng/code-quality-reports/src/main/resources/spotbugs/spotbugs-exclude.xml
@@ -2697,4 +2697,18 @@
     <Method name="transfer"/>
     <Bug pattern="NP_NULL_ON_SOME_PATH"/>
   </Match>
+
+   <!-- Returning a new copy of the object is not necessary -->
+  <Match>
+    <Class name="~com\.azure\.ai\.openai\.models\.(AudioTranscriptionOptions|AudioTranslationOptions)"/>
+    <Method name="&lt;init&gt;"/>
+    <Bug pattern="EI_EXPOSE_REP2"/>
+  </Match>
+
+  <!-- False positive, it throws an IllegalArgumentException if non-exist method parameter type passed.-->
+  <Match>
+    <Class name="com.azure.ai.openai.implementation.MultipartDataHelper"/>
+    <Method name="serializeRequest"/>
+    <Bug pattern="BC_UNCONFIRMED_CAST_OF_RETURN_VALUE"/>
+  </Match>
 </FindBugsFilter>
diff --git a/sdk/openai/azure-ai-openai/CHANGELOG.md b/sdk/openai/azure-ai-openai/CHANGELOG.md
index c82b24b6462f..d327aded5c9b 100644
--- a/sdk/openai/azure-ai-openai/CHANGELOG.md
+++ b/sdk/openai/azure-ai-openai/CHANGELOG.md
@@ -4,6 +4,10 @@
 
 ### Features Added
 
+- Support for `Whisper` endpoints was added.
+- Translation and Transcription of audio files is available
+- The above features are available both in Azure and non-Azure OpenAI
+
 ### Breaking Changes
 
 ### Bugs Fixed
diff --git a/sdk/openai/azure-ai-openai/README.md b/sdk/openai/azure-ai-openai/README.md
index 2371d9320b52..b0a33e95d9af 100644
--- a/sdk/openai/azure-ai-openai/README.md
+++ b/sdk/openai/azure-ai-openai/README.md
@@ -19,6 +19,8 @@ For concrete examples you can have a look at the following links. Some of the mo
 * [Streaming chat completions sample](#streaming-chat-completions "Streaming chat completions")
 * [Embeddings sample](#text-embeddings "Text Embeddings")
 * [Image Generation sample](#image-generation "Image Generation")
+* [Audio Transcription sample](#audio-transcription "Audio Transcription")
+* [Audio Translation sample](#audio-translation "Audio Translation")
 
 If you want to see the full code for these snippets check out our [samples folder][samples_folder].
 
@@ -150,6 +152,8 @@ The following sections provide several code snippets covering some of the most c
 * [Streaming chat completions sample](#streaming-chat-completions "Streaming chat completions")
 * [Embeddings sample](#text-embeddings "Text Embeddings")
 * [Image Generation sample](#image-generation "Image Generation")
+* [Audio Transcription sample](#audio-transcription "Audio Transcription")
+* [Audio Translation sample](#audio-translation "Audio Translation")
 
 ### Text completions
 
@@ -286,6 +290,44 @@ for (ImageLocation imageLocation : images.getData()) {
 
 For a complete sample example, see sample [Image Generation][sample_image_generation].
 
+### Audio Transcription
+The OpenAI service starts supporting `audio transcription` with the introduction of `Whisper` models. 
+The following code snippet shows how to use the service to transcribe audio.
+
+```java readme-sample-audioTranscription
+String fileName = "{your-file-name}";
+Path filePath = Paths.get("{your-file-path}" + fileName);
+
+byte[] file = BinaryData.fromFile(filePath).toBytes();
+AudioTranscriptionOptions transcriptionOptions = new AudioTranscriptionOptions(file)
+    .setResponseFormat(AudioTranscriptionFormat.JSON);
+
+AudioTranscription transcription = client.getAudioTranscription("{deploymentOrModelId}", fileName, transcriptionOptions);
+
+System.out.println("Transcription: " + transcription.getText());
+```
+For a complete sample example, see sample [Audio Transcription][sample_audio_transcription].
+Please refer to the service documentation for a conceptual discussion of [Whisper][microsoft_docs_whisper_model].
+
+### Audio Translation
+The OpenAI service starts supporting `audio translation` with the introduction of `Whisper` models.
+The following code snippet shows how to use the service to translate audio.
+
+```java readme-sample-audioTranslation
+String fileName = "{your-file-name}";
+Path filePath = Paths.get("{your-file-path}" + fileName);
+
+byte[] file = BinaryData.fromFile(filePath).toBytes();
+AudioTranslationOptions translationOptions = new AudioTranslationOptions(file)
+    .setResponseFormat(AudioTranscriptionFormat.JSON);
+
+AudioTranscription translation = client.getAudioTranslation("{deploymentOrModelId}", fileName, translationOptions);
+
+System.out.println("Translation: " + translation.getText());
+```
+For a complete sample example, see sample [Audio Translation][sample_audio_translation].
+Please refer to the service documentation for a conceptual discussion of [Whisper][microsoft_docs_whisper_model].
+
 ## Troubleshooting
 ### Enable client logging
 You can set the `AZURE_LOG_LEVEL` environment variable to view logging statements made in the client library. For
@@ -327,6 +369,7 @@ For details on contributing to this repository, see the [contributing guide](htt
 [logLevels]: https://github.com/Azure/azure-sdk-for-java/blob/main/sdk/core/azure-core/src/main/java/com/azure/core/util/logging/ClientLogger.java
 [microsoft_docs_openai_completion]: https://learn.microsoft.com/azure/cognitive-services/openai/how-to/completions
 [microsoft_docs_openai_embedding]: https://learn.microsoft.com/azure/cognitive-services/openai/concepts/understand-embeddings
+[microsoft_docs_whisper_model]: https://learn.microsoft.com/azure/ai-services/openai/whisper-quickstart?tabs=command-line
 [non_azure_openai_authentication]: https://platform.openai.com/docs/api-reference/authentication
 [performance_tuning]: https://github.com/Azure/azure-sdk-for-java/wiki/Performance-Tuning
 [product_documentation]: https://azure.microsoft.com/services/
@@ -342,6 +385,8 @@ For details on contributing to this repository, see the [contributing guide](htt
 [sample_get_completions_streaming]: https://github.com/Azure/azure-sdk-for-java/blob/main/sdk/openai/azure-ai-openai/src/samples/java/com/azure/ai/openai/usage/GetCompletionsStreamSample.java
 [sample_get_embedding]: https://github.com/Azure/azure-sdk-for-java/blob/main/sdk/openai/azure-ai-openai/src/samples/java/com/azure/ai/openai/usage/GetEmbeddingsSample.java
 [sample_image_generation]: https://github.com/Azure/azure-sdk-for-java/blob/main/sdk/openai/azure-ai-openai/src/samples/java/com/azure/ai/openai/usage/GetImagesSample.java
+[sample_audio_transcription]: https://github.com/Azure/azure-sdk-for-java/blob/main/sdk/openai/azure-ai-openai/src/samples/java/com/azure/ai/openai/usage/AudioTranscriptionSample.java
+[sample_audio_translation]: https://github.com/Azure/azure-sdk-for-java/blob/main/sdk/openai/azure-ai-openai/src/samples/java/com/azure/ai/openai/usage/AudioTranslationSample.java
 [openai_client_async]: https://github.com/Azure/azure-sdk-for-java/blob/main/sdk/openai/azure-ai-openai/src/main/java/com/azure/ai/openai/OpenAIAsyncClient.java
 [openai_client_builder]: https://github.com/Azure/azure-sdk-for-java/blob/main/sdk/openai/azure-ai-openai/src/main/java/com/azure/ai/openai/OpenAIClientBuilder.java
 [openai_client_sync]: https://github.com/Azure/azure-sdk-for-java/blob/main/sdk/openai/azure-ai-openai/src/main/java/com/azure/ai/openai/OpenAIClient.java
diff --git a/sdk/openai/azure-ai-openai/assets.json b/sdk/openai/azure-ai-openai/assets.json
index 4a830f321b44..beb6b5b76cff 100644
--- a/sdk/openai/azure-ai-openai/assets.json
+++ b/sdk/openai/azure-ai-openai/assets.json
@@ -2,5 +2,5 @@
   "AssetsRepo": "Azure/azure-sdk-assets",
   "AssetsRepoPrefixPath": "java",
   "TagPrefix": "java/openai/azure-ai-openai",
-  "Tag": "java/openai/azure-ai-openai_57107e7a09"
+  "Tag": "java/openai/azure-ai-openai_3c34d9f076"
 }
diff --git a/sdk/openai/azure-ai-openai/src/main/java/com/azure/ai/openai/OpenAIAsyncClient.java b/sdk/openai/azure-ai-openai/src/main/java/com/azure/ai/openai/OpenAIAsyncClient.java
index e07549aaa744..aa8e0bea1bf8 100644
--- a/sdk/openai/azure-ai-openai/src/main/java/com/azure/ai/openai/OpenAIAsyncClient.java
+++ b/sdk/openai/azure-ai-openai/src/main/java/com/azure/ai/openai/OpenAIAsyncClient.java
@@ -3,10 +3,18 @@
 // Code generated by Microsoft (R) AutoRest Code Generator.
 package com.azure.ai.openai;
 
+import static com.azure.core.util.FluxUtil.monoError;
+
 import com.azure.ai.openai.implementation.CompletionsUtils;
+import com.azure.ai.openai.implementation.MultipartDataHelper;
+import com.azure.ai.openai.implementation.MultipartDataSerializationResult;
 import com.azure.ai.openai.implementation.NonAzureOpenAIClientImpl;
 import com.azure.ai.openai.implementation.OpenAIClientImpl;
 import com.azure.ai.openai.implementation.OpenAIServerSentEvents;
+import com.azure.ai.openai.models.AudioTranscription;
+import com.azure.ai.openai.models.AudioTranscriptionFormat;
+import com.azure.ai.openai.models.AudioTranscriptionOptions;
+import com.azure.ai.openai.models.AudioTranslationOptions;
 import com.azure.ai.openai.models.ChatCompletions;
 import com.azure.ai.openai.models.ChatCompletionsOptions;
 import com.azure.ai.openai.models.Completions;
@@ -24,12 +32,16 @@
 import com.azure.core.exception.HttpResponseException;
 import com.azure.core.exception.ResourceModifiedException;
 import com.azure.core.exception.ResourceNotFoundException;
+import com.azure.core.http.HttpHeaderName;
 import com.azure.core.http.rest.RequestOptions;
 import com.azure.core.http.rest.Response;
 import com.azure.core.util.BinaryData;
 import com.azure.core.util.FluxUtil;
+import com.azure.core.util.logging.ClientLogger;
 import com.azure.core.util.polling.PollerFlux;
 import java.nio.ByteBuffer;
+import java.util.ArrayList;
+import java.util.List;
 import reactor.core.publisher.Flux;
 import reactor.core.publisher.Mono;
 
@@ -39,6 +51,8 @@ public final class OpenAIAsyncClient {
 
     @Generated private final OpenAIClientImpl serviceClient;
 
+    private static final ClientLogger LOGGER = new ClientLogger(OpenAIAsyncClient.class);
+
     private final NonAzureOpenAIClientImpl openAIServiceClient;
 
     /**
@@ -657,6 +671,18 @@ PollerFlux<BinaryData, BinaryData> beginBeginAzureBatchImageGeneration(
      *                 violence (Optional): (recursive schema, see violence above)
      *                 hate (Optional): (recursive schema, see hate above)
      *                 self_harm (Optional): (recursive schema, see self_harm above)
+     *                 error (Optional): {
+     *                     code: String (Required)
+     *                     message: String (Required)
+     *                     target: String (Optional)
+     *                     details (Optional): [
+     *                         (recursive schema, see above)
+     *                     ]
+     *                     innererror (Optional): {
+     *                         code: String (Optional)
+     *                         innererror (Optional): (recursive schema, see innererror above)
+     *                     }
+     *                 }
      *             }
      *         }
      *     ]
@@ -694,4 +720,512 @@ Mono<Response<BinaryData>> getChatCompletionsWithAzureExtensionsWithResponse(
         return this.serviceClient.getChatCompletionsWithAzureExtensionsWithResponseAsync(
                 deploymentOrModelName, chatCompletionsOptions, requestOptions);
     }
+
+    /**
+     * Gets transcribed text and associated metadata from provided spoken audio data. Audio will be transcribed in the
+     * written language corresponding to the language it was spoken in.
+     *
+     * <p><strong>Request Body Schema</strong>
+     *
+     * <pre>{@code
+     * {
+     *     file: byte[] (Required)
+     *     response_format: String(json/verbose_json/text/srt/vtt) (Optional)
+     *     language: String (Optional)
+     *     prompt: String (Optional)
+     *     temperature: Double (Optional)
+     *     model: String (Optional)
+     * }
+     * }</pre>
+     *
+     * <p><strong>Response Body Schema</strong>
+     *
+     * <pre>{@code
+     * {
+     *     text: String (Required)
+     *     task: String(transcribe/translate) (Optional)
+     *     language: String (Optional)
+     *     duration: Double (Optional)
+     *     segments (Optional): [
+     *          (Optional){
+     *             id: int (Required)
+     *             start: double (Required)
+     *             end: double (Required)
+     *             text: String (Required)
+     *             temperature: double (Required)
+     *             avg_logprob: double (Required)
+     *             compression_ratio: double (Required)
+     *             no_speech_prob: double (Required)
+     *             tokens (Required): [
+     *                 int (Required)
+     *             ]
+     *             seek: int (Required)
+     *         }
+     *     ]
+     * }
+     * }</pre>
+     *
+     * @param deploymentOrModelName Specifies either the model deployment name (when using Azure OpenAI) or model name
+     *     (when using non-Azure OpenAI) to use for this request.
+     * @param audioTranscriptionOptions The configuration information for an audio transcription request.
+     * @param requestOptions The options to configure the HTTP request before HTTP client sends it.
+     * @throws HttpResponseException thrown if the request is rejected by server.
+     * @throws ClientAuthenticationException thrown if the request is rejected by server on status code 401.
+     * @throws ResourceNotFoundException thrown if the request is rejected by server on status code 404.
+     * @throws ResourceModifiedException thrown if the request is rejected by server on status code 409.
+     * @return transcribed text and associated metadata from provided spoken audio data along with {@link Response} on
+     *     successful completion of {@link Mono}.
+     */
+    @Generated
+    @ServiceMethod(returns = ReturnType.SINGLE)
+    public Mono<Response<BinaryData>> getAudioTranscriptionAsResponseObjectWithResponse(
+            String deploymentOrModelName, BinaryData audioTranscriptionOptions, RequestOptions requestOptions) {
+        return this.serviceClient.getAudioTranscriptionAsResponseObjectWithResponseAsync(
+                deploymentOrModelName, audioTranscriptionOptions, requestOptions);
+    }
+
+    /**
+     * Gets transcribed text and associated metadata from provided spoken audio file data. Audio will be transcribed in
+     * the written language corresponding to the language it was spoken in.
+     *
+     * @param deploymentOrModelName Specifies either the model deployment name (when using Azure OpenAI) or model name
+     *     (when using non-Azure OpenAI) to use for this request.
+     * @param fileName The file name that is represented in the {@code file} field of {@link AudioTranscriptionOptions}
+     * @param audioTranscriptionOptions The configuration information for an audio transcription request.
+     * @throws IllegalArgumentException thrown if parameters fail the validation.
+     * @throws HttpResponseException thrown if the request is rejected by server.
+     * @throws ClientAuthenticationException thrown if the request is rejected by server on status code 401.
+     * @throws ResourceNotFoundException thrown if the request is rejected by server on status code 404.
+     * @throws ResourceModifiedException thrown if the request is rejected by server on status code 409.
+     * @throws RuntimeException all other wrapped checked exceptions if the request fails to be sent.
+     * @return {@link AudioTranscription} transcribed text and associated metadata from provided spoken audio data on
+     *     successful completion of {@link Mono}.
+     */
+    @ServiceMethod(returns = ReturnType.SINGLE)
+    public Mono<AudioTranscription> getAudioTranscription(
+            String deploymentOrModelName, String fileName, AudioTranscriptionOptions audioTranscriptionOptions) {
+        // checking allowed formats for a JSON response
+        List<AudioTranscriptionFormat> acceptedFormats = new ArrayList<>();
+        acceptedFormats.add(AudioTranscriptionFormat.JSON);
+        acceptedFormats.add(AudioTranscriptionFormat.VERBOSE_JSON);
+        if (!acceptedFormats.contains(audioTranscriptionOptions.getResponseFormat())) {
+            return monoError(
+                    LOGGER, new IllegalArgumentException("This operation does not support the requested audio format"));
+        }
+        // embedding the `model` in the request for non-Azure case
+        if (this.openAIServiceClient != null) {
+            audioTranscriptionOptions.setModel(deploymentOrModelName);
+        }
+        MultipartDataHelper helper = new MultipartDataHelper();
+        MultipartDataSerializationResult result = helper.serializeRequest(audioTranscriptionOptions, fileName);
+        String multipartBoundary = helper.getBoundary();
+        RequestOptions requestOptions = new RequestOptions();
+        requestOptions
+                .setHeader(HttpHeaderName.CONTENT_TYPE, "multipart/form-data;" + " boundary=" + multipartBoundary)
+                .setHeader(HttpHeaderName.CONTENT_LENGTH, String.valueOf(result.getDataLength()));
+        Mono<Response<BinaryData>> response =
+                openAIServiceClient != null
+                        ? this.openAIServiceClient.getAudioTranscriptionAsResponseObjectWithResponseAsync(
+                                deploymentOrModelName, result.getData(), requestOptions)
+                        : this.serviceClient.getAudioTranscriptionAsResponseObjectWithResponseAsync(
+                                deploymentOrModelName, result.getData(), requestOptions);
+        return response.map(binaryData -> binaryData.getValue().toObject(AudioTranscription.class));
+    }
+
+    /**
+     * Gets transcribed text and associated metadata from provided spoken audio file data. Audio will be transcribed in
+     * the written language corresponding to the language it was spoken in.
+     *
+     * @param deploymentOrModelName Specifies either the model deployment name (when using Azure OpenAI) or model name
+     *     (when using non-Azure OpenAI) to use for this request.
+     * @param fileName The file name that is represented in the {@code file} field of {@link AudioTranscriptionOptions}
+     * @param audioTranscriptionOptions The configuration information for an audio transcription request.
+     * @throws IllegalArgumentException thrown if parameters fail the validation.
+     * @throws HttpResponseException thrown if the request is rejected by server.
+     * @throws ClientAuthenticationException thrown if the request is rejected by server on status code 401.
+     * @throws ResourceNotFoundException thrown if the request is rejected by server on status code 404.
+     * @throws ResourceModifiedException thrown if the request is rejected by server on status code 409.
+     * @throws RuntimeException all other wrapped checked exceptions if the request fails to be sent.
+     * @return transcribed text and associated metadata from provided spoken audio data on successful completion of
+     *     {@link Mono}.
+     */
+    @ServiceMethod(returns = ReturnType.SINGLE)
+    public Mono<String> getAudioTranscriptionText(
+            String deploymentOrModelName, String fileName, AudioTranscriptionOptions audioTranscriptionOptions) {
+        // checking allowed formats for a plain text response
+        List<AudioTranscriptionFormat> acceptedFormats = new ArrayList<>();
+        acceptedFormats.add(AudioTranscriptionFormat.TEXT);
+        acceptedFormats.add(AudioTranscriptionFormat.VTT);
+        acceptedFormats.add(AudioTranscriptionFormat.SRT);
+        if (!acceptedFormats.contains(audioTranscriptionOptions.getResponseFormat())) {
+            return monoError(
+                    LOGGER, new IllegalArgumentException("This operation does not support the requested audio format"));
+        }
+        // embedding the `model` in the request for non-Azure case
+        if (this.openAIServiceClient != null) {
+            audioTranscriptionOptions.setModel(deploymentOrModelName);
+        }
+        MultipartDataHelper helper = new MultipartDataHelper();
+        MultipartDataSerializationResult result = helper.serializeRequest(audioTranscriptionOptions, fileName);
+        String multipartBoundary = helper.getBoundary();
+        RequestOptions requestOptions = new RequestOptions();
+        requestOptions
+                .setHeader(HttpHeaderName.CONTENT_TYPE, "multipart/form-data;" + " boundary=" + multipartBoundary)
+                .setHeader(HttpHeaderName.CONTENT_LENGTH, String.valueOf(result.getDataLength()));
+        Mono<Response<BinaryData>> response =
+                openAIServiceClient != null
+                        ? this.openAIServiceClient.getAudioTranscriptionAsPlainTextWithResponseAsync(
+                                deploymentOrModelName, result.getData(), requestOptions)
+                        : this.serviceClient.getAudioTranscriptionAsPlainTextWithResponseAsync(
+                                deploymentOrModelName, result.getData(), requestOptions);
+        return response.map(binaryData -> binaryData.getValue().toString());
+    }
+
+    /**
+     * Gets English language transcribed text and associated metadata from provided spoken audio file data.
+     *
+     * @param deploymentOrModelName Specifies either the model deployment name (when using Azure OpenAI) or model name
+     *     (when using non-Azure OpenAI) to use for this request.
+     * @param fileName The file name that is represented in the {@code file} field of {@link AudioTranslationOptions}
+     * @param audioTranslationOptions The configuration information for an audio translation request.
+     * @throws IllegalArgumentException thrown if parameters fail the validation.
+     * @throws HttpResponseException thrown if the request is rejected by server.
+     * @throws ClientAuthenticationException thrown if the request is rejected by server on status code 401.
+     * @throws ResourceNotFoundException thrown if the request is rejected by server on status code 404.
+     * @throws ResourceModifiedException thrown if the request is rejected by server on status code 409.
+     * @throws RuntimeException all other wrapped checked exceptions if the request fails to be sent.
+     * @return {@link AudioTranscription} english language transcribed text and associated metadata from provided spoken
+     *     audio file data on successful completion of {@link Mono}.
+     */
+    @ServiceMethod(returns = ReturnType.SINGLE)
+    public Mono<AudioTranscription> getAudioTranslation(
+            String deploymentOrModelName, String fileName, AudioTranslationOptions audioTranslationOptions) {
+        // checking allowed formats for a JSON response
+        List<AudioTranscriptionFormat> acceptedFormats = new ArrayList<>();
+        acceptedFormats.add(AudioTranscriptionFormat.JSON);
+        acceptedFormats.add(AudioTranscriptionFormat.VERBOSE_JSON);
+        if (!acceptedFormats.contains(audioTranslationOptions.getResponseFormat())) {
+            return monoError(
+                    LOGGER, new IllegalArgumentException("This operation does not support the requested audio format"));
+        }
+        // embedding the `model` in the request for non-Azure case
+        if (this.openAIServiceClient != null) {
+            audioTranslationOptions.setModel(deploymentOrModelName);
+        }
+        MultipartDataHelper helper = new MultipartDataHelper();
+        MultipartDataSerializationResult result = helper.serializeRequest(audioTranslationOptions, fileName);
+        String multipartBoundary = helper.getBoundary();
+        RequestOptions requestOptions = new RequestOptions();
+        requestOptions
+                .setHeader(HttpHeaderName.CONTENT_TYPE, "multipart/form-data;" + " boundary=" + multipartBoundary)
+                .setHeader(HttpHeaderName.CONTENT_LENGTH, String.valueOf(result.getDataLength()));
+        Mono<Response<BinaryData>> response =
+                openAIServiceClient != null
+                        ? this.openAIServiceClient.getAudioTranslationAsResponseObjectWithResponseAsync(
+                                deploymentOrModelName, result.getData(), requestOptions)
+                        : this.serviceClient.getAudioTranslationAsResponseObjectWithResponseAsync(
+                                deploymentOrModelName, result.getData(), requestOptions);
+        return response.map(binaryData -> binaryData.getValue().toObject(AudioTranscription.class));
+    }
+
+    /**
+     * Gets English language transcribed text and associated metadata from provided spoken audio file data.
+     *
+     * @param deploymentOrModelName Specifies either the model deployment name (when using Azure OpenAI) or model name
+     *     (when using non-Azure OpenAI) to use for this request.
+     * @param fileName The file name that is represented in the {@code file} field of {@link AudioTranslationOptions}.
+     * @param audioTranslationOptions The configuration information for an audio translation request.
+     * @throws IllegalArgumentException thrown if parameters fail the validation.
+     * @throws HttpResponseException thrown if the request is rejected by server.
+     * @throws ClientAuthenticationException thrown if the request is rejected by server on status code 401.
+     * @throws ResourceNotFoundException thrown if the request is rejected by server on status code 404.
+     * @throws ResourceModifiedException thrown if the request is rejected by server on status code 409.
+     * @throws RuntimeException all other wrapped checked exceptions if the request fails to be sent.
+     * @return english language transcribed text and associated metadata from provided spoken audio file data on
+     *     successful completion of {@link Mono}.
+     */
+    @ServiceMethod(returns = ReturnType.SINGLE)
+    public Mono<String> getAudioTranslationText(
+            String deploymentOrModelName, String fileName, AudioTranslationOptions audioTranslationOptions) {
+        // checking allowed formats for a plain text response
+        List<AudioTranscriptionFormat> acceptedFormats = new ArrayList<>();
+        acceptedFormats.add(AudioTranscriptionFormat.TEXT);
+        acceptedFormats.add(AudioTranscriptionFormat.VTT);
+        acceptedFormats.add(AudioTranscriptionFormat.SRT);
+        if (!acceptedFormats.contains(audioTranslationOptions.getResponseFormat())) {
+            return monoError(
+                    LOGGER, new IllegalArgumentException("This operation does not support the requested audio format"));
+        }
+        // embedding the `model` in the request for non-Azure case
+        if (this.openAIServiceClient != null) {
+            audioTranslationOptions.setModel(deploymentOrModelName);
+        }
+        MultipartDataHelper helper = new MultipartDataHelper();
+        MultipartDataSerializationResult result = helper.serializeRequest(audioTranslationOptions, fileName);
+        String multipartBoundary = helper.getBoundary();
+        RequestOptions requestOptions = new RequestOptions();
+        requestOptions
+                .setHeader(HttpHeaderName.CONTENT_TYPE, "multipart/form-data;" + " boundary=" + multipartBoundary)
+                .setHeader(HttpHeaderName.CONTENT_LENGTH, String.valueOf(result.getDataLength()));
+        Mono<Response<BinaryData>> response =
+                openAIServiceClient != null
+                        ? this.openAIServiceClient.getAudioTranslationAsPlainTextWithResponseAsync(
+                                deploymentOrModelName, result.getData(), requestOptions)
+                        : this.serviceClient.getAudioTranslationAsPlainTextWithResponseAsync(
+                                deploymentOrModelName, result.getData(), requestOptions);
+        return response.map(binaryData -> binaryData.getValue().toString());
+    }
+
+    /**
+     * Gets transcribed text and associated metadata from provided spoken audio data. Audio will be transcribed in the
+     * written language corresponding to the language it was spoken in.
+     *
+     * <p><strong>Request Body Schema</strong>
+     *
+     * <pre>{@code
+     * {
+     *     file: byte[] (Required)
+     *     response_format: String(json/verbose_json/text/srt/vtt) (Optional)
+     *     language: String (Optional)
+     *     prompt: String (Optional)
+     *     temperature: Double (Optional)
+     *     model: String (Optional)
+     * }
+     * }</pre>
+     *
+     * <p><strong>Response Body Schema</strong>
+     *
+     * <pre>{@code
+     * String
+     * }</pre>
+     *
+     * @param deploymentOrModelName Specifies either the model deployment name (when using Azure OpenAI) or model name
+     *     (when using non-Azure OpenAI) to use for this request.
+     * @param audioTranscriptionOptions The configuration information for an audio transcription request.
+     * @param requestOptions The options to configure the HTTP request before HTTP client sends it.
+     * @throws HttpResponseException thrown if the request is rejected by server.
+     * @throws ClientAuthenticationException thrown if the request is rejected by server on status code 401.
+     * @throws ResourceNotFoundException thrown if the request is rejected by server on status code 404.
+     * @throws ResourceModifiedException thrown if the request is rejected by server on status code 409.
+     * @return transcribed text and associated metadata from provided spoken audio data along with {@link Response} on
+     *     successful completion of {@link Mono}.
+     */
+    @Generated
+    @ServiceMethod(returns = ReturnType.SINGLE)
+    public Mono<Response<BinaryData>> getAudioTranscriptionAsPlainTextWithResponse(
+            String deploymentOrModelName, BinaryData audioTranscriptionOptions, RequestOptions requestOptions) {
+        return this.serviceClient.getAudioTranscriptionAsPlainTextWithResponseAsync(
+                deploymentOrModelName, audioTranscriptionOptions, requestOptions);
+    }
+
+    /**
+     * Gets English language transcribed text and associated metadata from provided spoken audio data.
+     *
+     * <p><strong>Request Body Schema</strong>
+     *
+     * <pre>{@code
+     * {
+     *     file: byte[] (Required)
+     *     response_format: String(json/verbose_json/text/srt/vtt) (Optional)
+     *     prompt: String (Optional)
+     *     temperature: Double (Optional)
+     *     model: String (Optional)
+     * }
+     * }</pre>
+     *
+     * <p><strong>Response Body Schema</strong>
+     *
+     * <pre>{@code
+     * {
+     *     text: String (Required)
+     *     task: String(transcribe/translate) (Optional)
+     *     language: String (Optional)
+     *     duration: Double (Optional)
+     *     segments (Optional): [
+     *          (Optional){
+     *             id: int (Required)
+     *             start: double (Required)
+     *             end: double (Required)
+     *             text: String (Required)
+     *             temperature: double (Required)
+     *             avg_logprob: double (Required)
+     *             compression_ratio: double (Required)
+     *             no_speech_prob: double (Required)
+     *             tokens (Required): [
+     *                 int (Required)
+     *             ]
+     *             seek: int (Required)
+     *         }
+     *     ]
+     * }
+     * }</pre>
+     *
+     * @param deploymentOrModelName Specifies either the model deployment name (when using Azure OpenAI) or model name
+     *     (when using non-Azure OpenAI) to use for this request.
+     * @param audioTranslationOptions The configuration information for an audio translation request.
+     * @param requestOptions The options to configure the HTTP request before HTTP client sends it.
+     * @throws HttpResponseException thrown if the request is rejected by server.
+     * @throws ClientAuthenticationException thrown if the request is rejected by server on status code 401.
+     * @throws ResourceNotFoundException thrown if the request is rejected by server on status code 404.
+     * @throws ResourceModifiedException thrown if the request is rejected by server on status code 409.
+     * @return english language transcribed text and associated metadata from provided spoken audio data along with
+     *     {@link Response} on successful completion of {@link Mono}.
+     */
+    @Generated
+    @ServiceMethod(returns = ReturnType.SINGLE)
+    public Mono<Response<BinaryData>> getAudioTranslationAsResponseObjectWithResponse(
+            String deploymentOrModelName, BinaryData audioTranslationOptions, RequestOptions requestOptions) {
+        return this.serviceClient.getAudioTranslationAsResponseObjectWithResponseAsync(
+                deploymentOrModelName, audioTranslationOptions, requestOptions);
+    }
+
+    /**
+     * Gets English language transcribed text and associated metadata from provided spoken audio data.
+     *
+     * <p><strong>Request Body Schema</strong>
+     *
+     * <pre>{@code
+     * {
+     *     file: byte[] (Required)
+     *     response_format: String(json/verbose_json/text/srt/vtt) (Optional)
+     *     prompt: String (Optional)
+     *     temperature: Double (Optional)
+     *     model: String (Optional)
+     * }
+     * }</pre>
+     *
+     * <p><strong>Response Body Schema</strong>
+     *
+     * <pre>{@code
+     * String
+     * }</pre>
+     *
+     * @param deploymentOrModelName Specifies either the model deployment name (when using Azure OpenAI) or model name
+     *     (when using non-Azure OpenAI) to use for this request.
+     * @param audioTranslationOptions The configuration information for an audio translation request.
+     * @param requestOptions The options to configure the HTTP request before HTTP client sends it.
+     * @throws HttpResponseException thrown if the request is rejected by server.
+     * @throws ClientAuthenticationException thrown if the request is rejected by server on status code 401.
+     * @throws ResourceNotFoundException thrown if the request is rejected by server on status code 404.
+     * @throws ResourceModifiedException thrown if the request is rejected by server on status code 409.
+     * @return english language transcribed text and associated metadata from provided spoken audio data along with
+     *     {@link Response} on successful completion of {@link Mono}.
+     */
+    @Generated
+    @ServiceMethod(returns = ReturnType.SINGLE)
+    public Mono<Response<BinaryData>> getAudioTranslationAsPlainTextWithResponse(
+            String deploymentOrModelName, BinaryData audioTranslationOptions, RequestOptions requestOptions) {
+        return this.serviceClient.getAudioTranslationAsPlainTextWithResponseAsync(
+                deploymentOrModelName, audioTranslationOptions, requestOptions);
+    }
+
+    /**
+     * Gets transcribed text and associated metadata from provided spoken audio data. Audio will be transcribed in the
+     * written language corresponding to the language it was spoken in.
+     *
+     * @param deploymentOrModelName Specifies either the model deployment name (when using Azure OpenAI) or model name
+     *     (when using non-Azure OpenAI) to use for this request.
+     * @param audioTranscriptionOptions The configuration information for an audio transcription request.
+     * @throws IllegalArgumentException thrown if parameters fail the validation.
+     * @throws HttpResponseException thrown if the request is rejected by server.
+     * @throws ClientAuthenticationException thrown if the request is rejected by server on status code 401.
+     * @throws ResourceNotFoundException thrown if the request is rejected by server on status code 404.
+     * @throws ResourceModifiedException thrown if the request is rejected by server on status code 409.
+     * @throws RuntimeException all other wrapped checked exceptions if the request fails to be sent.
+     * @return transcribed text and associated metadata from provided spoken audio data on successful completion of
+     *     {@link Mono}.
+     */
+    @Generated
+    @ServiceMethod(returns = ReturnType.SINGLE)
+    public Mono<AudioTranscription> getAudioTranscriptionAsResponseObject(
+            String deploymentOrModelName, AudioTranscriptionOptions audioTranscriptionOptions) {
+        // Generated convenience method for getAudioTranscriptionAsResponseObjectWithResponse
+        RequestOptions requestOptions = new RequestOptions();
+        return getAudioTranscriptionAsResponseObjectWithResponse(
+                        deploymentOrModelName, BinaryData.fromObject(audioTranscriptionOptions), requestOptions)
+                .flatMap(FluxUtil::toMono)
+                .map(protocolMethodData -> protocolMethodData.toObject(AudioTranscription.class));
+    }
+
+    /**
+     * Gets transcribed text and associated metadata from provided spoken audio data. Audio will be transcribed in the
+     * written language corresponding to the language it was spoken in.
+     *
+     * @param deploymentOrModelName Specifies either the model deployment name (when using Azure OpenAI) or model name
+     *     (when using non-Azure OpenAI) to use for this request.
+     * @param audioTranscriptionOptions The configuration information for an audio transcription request.
+     * @throws IllegalArgumentException thrown if parameters fail the validation.
+     * @throws HttpResponseException thrown if the request is rejected by server.
+     * @throws ClientAuthenticationException thrown if the request is rejected by server on status code 401.
+     * @throws ResourceNotFoundException thrown if the request is rejected by server on status code 404.
+     * @throws ResourceModifiedException thrown if the request is rejected by server on status code 409.
+     * @throws RuntimeException all other wrapped checked exceptions if the request fails to be sent.
+     * @return transcribed text and associated metadata from provided spoken audio data on successful completion of
+     *     {@link Mono}.
+     */
+    @Generated
+    @ServiceMethod(returns = ReturnType.SINGLE)
+    public Mono<String> getAudioTranscriptionAsPlainText(
+            String deploymentOrModelName, AudioTranscriptionOptions audioTranscriptionOptions) {
+        // Generated convenience method for getAudioTranscriptionAsPlainTextWithResponse
+        RequestOptions requestOptions = new RequestOptions();
+        return getAudioTranscriptionAsPlainTextWithResponse(
+                        deploymentOrModelName, BinaryData.fromObject(audioTranscriptionOptions), requestOptions)
+                .flatMap(FluxUtil::toMono)
+                .map(protocolMethodData -> protocolMethodData.toObject(String.class));
+    }
+
+    /**
+     * Gets English language transcribed text and associated metadata from provided spoken audio data.
+     *
+     * @param deploymentOrModelName Specifies either the model deployment name (when using Azure OpenAI) or model name
+     *     (when using non-Azure OpenAI) to use for this request.
+     * @param audioTranslationOptions The configuration information for an audio translation request.
+     * @throws IllegalArgumentException thrown if parameters fail the validation.
+     * @throws HttpResponseException thrown if the request is rejected by server.
+     * @throws ClientAuthenticationException thrown if the request is rejected by server on status code 401.
+     * @throws ResourceNotFoundException thrown if the request is rejected by server on status code 404.
+     * @throws ResourceModifiedException thrown if the request is rejected by server on status code 409.
+     * @throws RuntimeException all other wrapped checked exceptions if the request fails to be sent.
+     * @return english language transcribed text and associated metadata from provided spoken audio data on successful
+     *     completion of {@link Mono}.
+     */
+    @Generated
+    @ServiceMethod(returns = ReturnType.SINGLE)
+    public Mono<AudioTranscription> getAudioTranslationAsResponseObject(
+            String deploymentOrModelName, AudioTranslationOptions audioTranslationOptions) {
+        // Generated convenience method for getAudioTranslationAsResponseObjectWithResponse
+        RequestOptions requestOptions = new RequestOptions();
+        return getAudioTranslationAsResponseObjectWithResponse(
+                        deploymentOrModelName, BinaryData.fromObject(audioTranslationOptions), requestOptions)
+                .flatMap(FluxUtil::toMono)
+                .map(protocolMethodData -> protocolMethodData.toObject(AudioTranscription.class));
+    }
+
+    /**
+     * Gets English language transcribed text and associated metadata from provided spoken audio data.
+     *
+     * @param deploymentOrModelName Specifies either the model deployment name (when using Azure OpenAI) or model name
+     *     (when using non-Azure OpenAI) to use for this request.
+     * @param audioTranslationOptions The configuration information for an audio translation request.
+     * @throws IllegalArgumentException thrown if parameters fail the validation.
+     * @throws HttpResponseException thrown if the request is rejected by server.
+     * @throws ClientAuthenticationException thrown if the request is rejected by server on status code 401.
+     * @throws ResourceNotFoundException thrown if the request is rejected by server on status code 404.
+     * @throws ResourceModifiedException thrown if the request is rejected by server on status code 409.
+     * @throws RuntimeException all other wrapped checked exceptions if the request fails to be sent.
+     * @return english language transcribed text and associated metadata from provided spoken audio data on successful
+     *     completion of {@link Mono}.
+     */
+    @Generated
+    @ServiceMethod(returns = ReturnType.SINGLE)
+    public Mono<String> getAudioTranslationAsPlainText(
+            String deploymentOrModelName, AudioTranslationOptions audioTranslationOptions) {
+        // Generated convenience method for getAudioTranslationAsPlainTextWithResponse
+        RequestOptions requestOptions = new RequestOptions();
+        return getAudioTranslationAsPlainTextWithResponse(
+                        deploymentOrModelName, BinaryData.fromObject(audioTranslationOptions), requestOptions)
+                .flatMap(FluxUtil::toMono)
+                .map(protocolMethodData -> protocolMethodData.toObject(String.class));
+    }
 }
diff --git a/sdk/openai/azure-ai-openai/src/main/java/com/azure/ai/openai/OpenAIClient.java b/sdk/openai/azure-ai-openai/src/main/java/com/azure/ai/openai/OpenAIClient.java
index 7e171fc56a0d..5d17c266cddd 100644
--- a/sdk/openai/azure-ai-openai/src/main/java/com/azure/ai/openai/OpenAIClient.java
+++ b/sdk/openai/azure-ai-openai/src/main/java/com/azure/ai/openai/OpenAIClient.java
@@ -4,9 +4,15 @@
 package com.azure.ai.openai;
 
 import com.azure.ai.openai.implementation.CompletionsUtils;
+import com.azure.ai.openai.implementation.MultipartDataHelper;
+import com.azure.ai.openai.implementation.MultipartDataSerializationResult;
 import com.azure.ai.openai.implementation.NonAzureOpenAIClientImpl;
 import com.azure.ai.openai.implementation.OpenAIClientImpl;
 import com.azure.ai.openai.implementation.OpenAIServerSentEvents;
+import com.azure.ai.openai.models.AudioTranscription;
+import com.azure.ai.openai.models.AudioTranscriptionFormat;
+import com.azure.ai.openai.models.AudioTranscriptionOptions;
+import com.azure.ai.openai.models.AudioTranslationOptions;
 import com.azure.ai.openai.models.ChatCompletions;
 import com.azure.ai.openai.models.ChatCompletionsOptions;
 import com.azure.ai.openai.models.Completions;
@@ -24,6 +30,7 @@
 import com.azure.core.exception.HttpResponseException;
 import com.azure.core.exception.ResourceModifiedException;
 import com.azure.core.exception.ResourceNotFoundException;
+import com.azure.core.http.HttpHeaderName;
 import com.azure.core.http.rest.RequestOptions;
 import com.azure.core.http.rest.Response;
 import com.azure.core.util.BinaryData;
@@ -31,6 +38,8 @@
 import com.azure.core.util.logging.ClientLogger;
 import com.azure.core.util.polling.SyncPoller;
 import java.nio.ByteBuffer;
+import java.util.ArrayList;
+import java.util.List;
 import reactor.core.publisher.Flux;
 
 /** Initializes a new instance of the synchronous OpenAIClient type. */
@@ -656,6 +665,18 @@ SyncPoller<BinaryData, BinaryData> beginBeginAzureBatchImageGeneration(
      *                 violence (Optional): (recursive schema, see violence above)
      *                 hate (Optional): (recursive schema, see hate above)
      *                 self_harm (Optional): (recursive schema, see self_harm above)
+     *                 error (Optional): {
+     *                     code: String (Required)
+     *                     message: String (Required)
+     *                     target: String (Optional)
+     *                     details (Optional): [
+     *                         (recursive schema, see above)
+     *                     ]
+     *                     innererror (Optional): {
+     *                         code: String (Optional)
+     *                         innererror (Optional): (recursive schema, see innererror above)
+     *                     }
+     *                 }
      *             }
      *         }
      *     ]
@@ -693,4 +714,503 @@ Response<BinaryData> getChatCompletionsWithAzureExtensionsWithResponse(
         return this.serviceClient.getChatCompletionsWithAzureExtensionsWithResponse(
                 deploymentOrModelName, chatCompletionsOptions, requestOptions);
     }
+
+    /**
+     * Gets transcribed text and associated metadata from provided spoken audio file data. Audio will be transcribed in
+     * the written language corresponding to the language it was spoken in.
+     *
+     * @param deploymentOrModelName Specifies either the model deployment name (when using Azure OpenAI) or model name
+     *     (when using non-Azure OpenAI) to use for this request.
+     * @param fileName The file name that is represented in the {@code file} field of {@link AudioTranscriptionOptions}.
+     * @param audioTranscriptionOptions The configuration information for an audio transcription request.
+     * @throws IllegalArgumentException thrown if parameters fail the validation.
+     * @throws HttpResponseException thrown if the request is rejected by server.
+     * @throws ClientAuthenticationException thrown if the request is rejected by server on status code 401.
+     * @throws ResourceNotFoundException thrown if the request is rejected by server on status code 404.
+     * @throws ResourceModifiedException thrown if the request is rejected by server on status code 409.
+     * @throws RuntimeException all other wrapped checked exceptions if the request fails to be sent.
+     * @return {@link AudioTranscription} transcribed text and associated metadata from provided spoken audio data.
+     */
+    @ServiceMethod(returns = ReturnType.SINGLE)
+    public AudioTranscription getAudioTranscription(
+            String deploymentOrModelName, String fileName, AudioTranscriptionOptions audioTranscriptionOptions) {
+        // checking allowed formats for a JSON response
+        List<AudioTranscriptionFormat> acceptedFormats = new ArrayList<>();
+        acceptedFormats.add(AudioTranscriptionFormat.JSON);
+        acceptedFormats.add(AudioTranscriptionFormat.VERBOSE_JSON);
+        if (!acceptedFormats.contains(audioTranscriptionOptions.getResponseFormat())) {
+            throw LOGGER.logExceptionAsError(
+                    new IllegalArgumentException("This operation does not support the requested audio format"));
+        }
+        // embedding the `model` in the request for non-Azure case
+        if (this.openAIServiceClient != null) {
+            audioTranscriptionOptions.setModel(deploymentOrModelName);
+        }
+        MultipartDataHelper helper = new MultipartDataHelper();
+        MultipartDataSerializationResult result = helper.serializeRequest(audioTranscriptionOptions, fileName);
+        String multipartBoundary = helper.getBoundary();
+        RequestOptions requestOptions = new RequestOptions();
+        requestOptions
+                .setHeader(HttpHeaderName.CONTENT_TYPE, "multipart/form-data;" + " boundary=" + multipartBoundary)
+                .setHeader(HttpHeaderName.CONTENT_LENGTH, String.valueOf(result.getDataLength()));
+        Response<BinaryData> response =
+                openAIServiceClient != null
+                        ? this.openAIServiceClient.getAudioTranscriptionAsPlainTextWithResponse(
+                                deploymentOrModelName, result.getData(), requestOptions)
+                        : this.serviceClient.getAudioTranscriptionAsPlainTextWithResponse(
+                                deploymentOrModelName, result.getData(), requestOptions);
+        return response.getValue().toObject(AudioTranscription.class);
+    }
+
+    /**
+     * Gets transcribed text and associated metadata from provided spoken audio file data. Audio will be transcribed in
+     * the written language corresponding to the language it was spoken in.
+     *
+     * @param deploymentOrModelName Specifies either the model deployment name (when using Azure OpenAI) or model name
+     *     (when using non-Azure OpenAI) to use for this request.
+     * @param fileName The file name that is represented in the {@code file} field of {@link AudioTranscriptionOptions}.
+     * @param audioTranscriptionOptions The configuration information for an audio transcription request.
+     * @throws IllegalArgumentException thrown if parameters fail the validation.
+     * @throws HttpResponseException thrown if the request is rejected by server.
+     * @throws ClientAuthenticationException thrown if the request is rejected by server on status code 401.
+     * @throws ResourceNotFoundException thrown if the request is rejected by server on status code 404.
+     * @throws ResourceModifiedException thrown if the request is rejected by server on status code 409.
+     * @throws RuntimeException all other wrapped checked exceptions if the request fails to be sent.
+     * @return transcribed text and associated metadata from provided spoken audio data.
+     */
+    @ServiceMethod(returns = ReturnType.SINGLE)
+    public String getAudioTranscriptionText(
+            String deploymentOrModelName, String fileName, AudioTranscriptionOptions audioTranscriptionOptions) {
+        // checking allowed formats for a plain text response
+        List<AudioTranscriptionFormat> acceptedFormats = new ArrayList<>();
+        acceptedFormats.add(AudioTranscriptionFormat.TEXT);
+        acceptedFormats.add(AudioTranscriptionFormat.VTT);
+        acceptedFormats.add(AudioTranscriptionFormat.SRT);
+        if (!acceptedFormats.contains(audioTranscriptionOptions.getResponseFormat())) {
+            throw LOGGER.logExceptionAsError(
+                    new IllegalArgumentException("This operation does not support the requested audio format"));
+        }
+        // embedding the `model` in the request for non-Azure case
+        if (this.openAIServiceClient != null) {
+            audioTranscriptionOptions.setModel(deploymentOrModelName);
+        }
+        MultipartDataHelper helper = new MultipartDataHelper();
+        MultipartDataSerializationResult result = helper.serializeRequest(audioTranscriptionOptions, fileName);
+        String multipartBoundary = helper.getBoundary();
+        RequestOptions requestOptions = new RequestOptions();
+        requestOptions
+                .setHeader(HttpHeaderName.CONTENT_TYPE, "multipart/form-data;" + " boundary=" + multipartBoundary)
+                .setHeader(HttpHeaderName.CONTENT_LENGTH, String.valueOf(result.getDataLength()));
+        Response<BinaryData> response =
+                openAIServiceClient != null
+                        ? this.openAIServiceClient.getAudioTranscriptionAsPlainTextWithResponse(
+                                deploymentOrModelName, result.getData(), requestOptions)
+                        : this.serviceClient.getAudioTranscriptionAsPlainTextWithResponse(
+                                deploymentOrModelName, result.getData(), requestOptions);
+        return response.getValue().toString();
+    }
+
+    /**
+     * Gets English language transcribed text and associated metadata from provided spoken audio file data.
+     *
+     * @param deploymentOrModelName Specifies either the model deployment name (when using Azure OpenAI) or model name
+     *     (when using non-Azure OpenAI) to use for this request.
+     * @param fileName The file name that is represented in the {@code file} field of {@link AudioTranslationOptions}.
+     * @param audioTranslationOptions The configuration information for an audio translation request.
+     * @throws IllegalArgumentException thrown if parameters fail the validation.
+     * @throws HttpResponseException thrown if the request is rejected by server.
+     * @throws ClientAuthenticationException thrown if the request is rejected by server on status code 401.
+     * @throws ResourceNotFoundException thrown if the request is rejected by server on status code 404.
+     * @throws ResourceModifiedException thrown if the request is rejected by server on status code 409.
+     * @throws RuntimeException all other wrapped checked exceptions if the request fails to be sent.
+     * @return {@link AudioTranscription} english language transcribed text and associated metadata from provided spoken
+     *     audio file data.
+     */
+    @ServiceMethod(returns = ReturnType.SINGLE)
+    public AudioTranscription getAudioTranslation(
+            String deploymentOrModelName, String fileName, AudioTranslationOptions audioTranslationOptions) {
+        // checking allowed formats for a JSON response
+        List<AudioTranscriptionFormat> acceptedFormats = new ArrayList<>();
+        acceptedFormats.add(AudioTranscriptionFormat.JSON);
+        acceptedFormats.add(AudioTranscriptionFormat.VERBOSE_JSON);
+        if (!acceptedFormats.contains(audioTranslationOptions.getResponseFormat())) {
+            throw LOGGER.logExceptionAsError(
+                    new IllegalArgumentException("This operation does not support the requested audio format"));
+        }
+        // embedding the `model` in the request for non-Azure case
+        if (this.openAIServiceClient != null) {
+            audioTranslationOptions.setModel(deploymentOrModelName);
+        }
+        MultipartDataHelper helper = new MultipartDataHelper();
+        MultipartDataSerializationResult result = helper.serializeRequest(audioTranslationOptions, fileName);
+        String multipartBoundary = helper.getBoundary();
+        RequestOptions requestOptions = new RequestOptions();
+        requestOptions
+                .setHeader(HttpHeaderName.CONTENT_TYPE, "multipart/form-data;" + " boundary=" + multipartBoundary)
+                .setHeader(HttpHeaderName.CONTENT_LENGTH, String.valueOf(result.getDataLength()));
+        Response<BinaryData> response =
+                openAIServiceClient != null
+                        ? this.openAIServiceClient.getAudioTranslationAsPlainTextWithResponse(
+                                deploymentOrModelName, result.getData(), requestOptions)
+                        : this.serviceClient.getAudioTranslationAsPlainTextWithResponse(
+                                deploymentOrModelName, result.getData(), requestOptions);
+        return response.getValue().toObject(AudioTranscription.class);
+    }
+
+    /**
+     * Gets English language transcribed text and associated metadata from provided spoken audio file data.
+     *
+     * @param deploymentOrModelName Specifies either the model deployment name (when using Azure OpenAI) or model name
+     *     (when using non-Azure OpenAI) to use for this request.
+     * @param fileName The file name that is represented in the {@code file} field of {@link AudioTranslationOptions}.
+     * @param audioTranslationOptions The configuration information for an audio translation request.
+     * @throws IllegalArgumentException thrown if parameters fail the validation.
+     * @throws HttpResponseException thrown if the request is rejected by server.
+     * @throws ClientAuthenticationException thrown if the request is rejected by server on status code 401.
+     * @throws ResourceNotFoundException thrown if the request is rejected by server on status code 404.
+     * @throws ResourceModifiedException thrown if the request is rejected by server on status code 409.
+     * @throws RuntimeException all other wrapped checked exceptions if the request fails to be sent.
+     * @return english language transcribed text and associated metadata from provided spoken audio file data.
+     */
+    @ServiceMethod(returns = ReturnType.SINGLE)
+    public String getAudioTranslationText(
+            String deploymentOrModelName, String fileName, AudioTranslationOptions audioTranslationOptions) {
+        // checking allowed formats for a plain text response
+        List<AudioTranscriptionFormat> acceptedFormats = new ArrayList<>();
+        acceptedFormats.add(AudioTranscriptionFormat.TEXT);
+        acceptedFormats.add(AudioTranscriptionFormat.VTT);
+        acceptedFormats.add(AudioTranscriptionFormat.SRT);
+        if (!acceptedFormats.contains(audioTranslationOptions.getResponseFormat())) {
+            throw LOGGER.logExceptionAsError(
+                    new IllegalArgumentException("This operation does not support the requested audio format"));
+        }
+        // embedding the `model` in the request for non-Azure case
+        if (this.openAIServiceClient != null) {
+            audioTranslationOptions.setModel(deploymentOrModelName);
+        }
+        MultipartDataHelper helper = new MultipartDataHelper();
+        MultipartDataSerializationResult result = helper.serializeRequest(audioTranslationOptions, fileName);
+        String multipartBoundary = helper.getBoundary();
+        RequestOptions requestOptions = new RequestOptions();
+        requestOptions
+                .setHeader(HttpHeaderName.CONTENT_TYPE, "multipart/form-data;" + " boundary=" + multipartBoundary)
+                .setHeader(HttpHeaderName.CONTENT_LENGTH, String.valueOf(result.getDataLength()));
+        Response<BinaryData> response =
+                openAIServiceClient != null
+                        ? this.openAIServiceClient.getAudioTranslationAsPlainTextWithResponse(
+                                deploymentOrModelName, result.getData(), requestOptions)
+                        : this.serviceClient.getAudioTranslationAsPlainTextWithResponse(
+                                deploymentOrModelName, result.getData(), requestOptions);
+        return response.getValue().toString();
+    }
+
+    /**
+     * Gets transcribed text and associated metadata from provided spoken audio data. Audio will be transcribed in the
+     * written language corresponding to the language it was spoken in.
+     *
+     * <p><strong>Request Body Schema</strong>
+     *
+     * <pre>{@code
+     * {
+     *     file: byte[] (Required)
+     *     response_format: String(json/verbose_json/text/srt/vtt) (Optional)
+     *     language: String (Optional)
+     *     prompt: String (Optional)
+     *     temperature: Double (Optional)
+     *     model: String (Optional)
+     * }
+     * }</pre>
+     *
+     * <p><strong>Response Body Schema</strong>
+     *
+     * <pre>{@code
+     * {
+     *     text: String (Required)
+     *     task: String(transcribe/translate) (Optional)
+     *     language: String (Optional)
+     *     duration: Double (Optional)
+     *     segments (Optional): [
+     *          (Optional){
+     *             id: int (Required)
+     *             start: double (Required)
+     *             end: double (Required)
+     *             text: String (Required)
+     *             temperature: double (Required)
+     *             avg_logprob: double (Required)
+     *             compression_ratio: double (Required)
+     *             no_speech_prob: double (Required)
+     *             tokens (Required): [
+     *                 int (Required)
+     *             ]
+     *             seek: int (Required)
+     *         }
+     *     ]
+     * }
+     * }</pre>
+     *
+     * @param deploymentOrModelName Specifies either the model deployment name (when using Azure OpenAI) or model name
+     *     (when using non-Azure OpenAI) to use for this request.
+     * @param audioTranscriptionOptions The configuration information for an audio transcription request.
+     * @param requestOptions The options to configure the HTTP request before HTTP client sends it.
+     * @throws HttpResponseException thrown if the request is rejected by server.
+     * @throws ClientAuthenticationException thrown if the request is rejected by server on status code 401.
+     * @throws ResourceNotFoundException thrown if the request is rejected by server on status code 404.
+     * @throws ResourceModifiedException thrown if the request is rejected by server on status code 409.
+     * @return transcribed text and associated metadata from provided spoken audio data along with {@link Response}.
+     */
+    @Generated
+    @ServiceMethod(returns = ReturnType.SINGLE)
+    public Response<BinaryData> getAudioTranscriptionAsResponseObjectWithResponse(
+            String deploymentOrModelName, BinaryData audioTranscriptionOptions, RequestOptions requestOptions) {
+        return this.serviceClient.getAudioTranscriptionAsResponseObjectWithResponse(
+                deploymentOrModelName, audioTranscriptionOptions, requestOptions);
+    }
+
+    /**
+     * Gets transcribed text and associated metadata from provided spoken audio data. Audio will be transcribed in the
+     * written language corresponding to the language it was spoken in.
+     *
+     * <p><strong>Request Body Schema</strong>
+     *
+     * <pre>{@code
+     * {
+     *     file: byte[] (Required)
+     *     response_format: String(json/verbose_json/text/srt/vtt) (Optional)
+     *     language: String (Optional)
+     *     prompt: String (Optional)
+     *     temperature: Double (Optional)
+     *     model: String (Optional)
+     * }
+     * }</pre>
+     *
+     * <p><strong>Response Body Schema</strong>
+     *
+     * <pre>{@code
+     * String
+     * }</pre>
+     *
+     * @param deploymentOrModelName Specifies either the model deployment name (when using Azure OpenAI) or model name
+     *     (when using non-Azure OpenAI) to use for this request.
+     * @param audioTranscriptionOptions The configuration information for an audio transcription request.
+     * @param requestOptions The options to configure the HTTP request before HTTP client sends it.
+     * @throws HttpResponseException thrown if the request is rejected by server.
+     * @throws ClientAuthenticationException thrown if the request is rejected by server on status code 401.
+     * @throws ResourceNotFoundException thrown if the request is rejected by server on status code 404.
+     * @throws ResourceModifiedException thrown if the request is rejected by server on status code 409.
+     * @return transcribed text and associated metadata from provided spoken audio data along with {@link Response}.
+     */
+    @Generated
+    @ServiceMethod(returns = ReturnType.SINGLE)
+    public Response<BinaryData> getAudioTranscriptionAsPlainTextWithResponse(
+            String deploymentOrModelName, BinaryData audioTranscriptionOptions, RequestOptions requestOptions) {
+        return this.serviceClient.getAudioTranscriptionAsPlainTextWithResponse(
+                deploymentOrModelName, audioTranscriptionOptions, requestOptions);
+    }
+
+    /**
+     * Gets English language transcribed text and associated metadata from provided spoken audio data.
+     *
+     * <p><strong>Request Body Schema</strong>
+     *
+     * <pre>{@code
+     * {
+     *     file: byte[] (Required)
+     *     response_format: String(json/verbose_json/text/srt/vtt) (Optional)
+     *     prompt: String (Optional)
+     *     temperature: Double (Optional)
+     *     model: String (Optional)
+     * }
+     * }</pre>
+     *
+     * <p><strong>Response Body Schema</strong>
+     *
+     * <pre>{@code
+     * {
+     *     text: String (Required)
+     *     task: String(transcribe/translate) (Optional)
+     *     language: String (Optional)
+     *     duration: Double (Optional)
+     *     segments (Optional): [
+     *          (Optional){
+     *             id: int (Required)
+     *             start: double (Required)
+     *             end: double (Required)
+     *             text: String (Required)
+     *             temperature: double (Required)
+     *             avg_logprob: double (Required)
+     *             compression_ratio: double (Required)
+     *             no_speech_prob: double (Required)
+     *             tokens (Required): [
+     *                 int (Required)
+     *             ]
+     *             seek: int (Required)
+     *         }
+     *     ]
+     * }
+     * }</pre>
+     *
+     * @param deploymentOrModelName Specifies either the model deployment name (when using Azure OpenAI) or model name
+     *     (when using non-Azure OpenAI) to use for this request.
+     * @param audioTranslationOptions The configuration information for an audio translation request.
+     * @param requestOptions The options to configure the HTTP request before HTTP client sends it.
+     * @throws HttpResponseException thrown if the request is rejected by server.
+     * @throws ClientAuthenticationException thrown if the request is rejected by server on status code 401.
+     * @throws ResourceNotFoundException thrown if the request is rejected by server on status code 404.
+     * @throws ResourceModifiedException thrown if the request is rejected by server on status code 409.
+     * @return english language transcribed text and associated metadata from provided spoken audio data along with
+     *     {@link Response}.
+     */
+    @Generated
+    @ServiceMethod(returns = ReturnType.SINGLE)
+    public Response<BinaryData> getAudioTranslationAsResponseObjectWithResponse(
+            String deploymentOrModelName, BinaryData audioTranslationOptions, RequestOptions requestOptions) {
+        return this.serviceClient.getAudioTranslationAsResponseObjectWithResponse(
+                deploymentOrModelName, audioTranslationOptions, requestOptions);
+    }
+
+    /**
+     * Gets English language transcribed text and associated metadata from provided spoken audio data.
+     *
+     * <p><strong>Request Body Schema</strong>
+     *
+     * <pre>{@code
+     * {
+     *     file: byte[] (Required)
+     *     response_format: String(json/verbose_json/text/srt/vtt) (Optional)
+     *     prompt: String (Optional)
+     *     temperature: Double (Optional)
+     *     model: String (Optional)
+     * }
+     * }</pre>
+     *
+     * <p><strong>Response Body Schema</strong>
+     *
+     * <pre>{@code
+     * String
+     * }</pre>
+     *
+     * @param deploymentOrModelName Specifies either the model deployment name (when using Azure OpenAI) or model name
+     *     (when using non-Azure OpenAI) to use for this request.
+     * @param audioTranslationOptions The configuration information for an audio translation request.
+     * @param requestOptions The options to configure the HTTP request before HTTP client sends it.
+     * @throws HttpResponseException thrown if the request is rejected by server.
+     * @throws ClientAuthenticationException thrown if the request is rejected by server on status code 401.
+     * @throws ResourceNotFoundException thrown if the request is rejected by server on status code 404.
+     * @throws ResourceModifiedException thrown if the request is rejected by server on status code 409.
+     * @return english language transcribed text and associated metadata from provided spoken audio data along with
+     *     {@link Response}.
+     */
+    @Generated
+    @ServiceMethod(returns = ReturnType.SINGLE)
+    public Response<BinaryData> getAudioTranslationAsPlainTextWithResponse(
+            String deploymentOrModelName, BinaryData audioTranslationOptions, RequestOptions requestOptions) {
+        return this.serviceClient.getAudioTranslationAsPlainTextWithResponse(
+                deploymentOrModelName, audioTranslationOptions, requestOptions);
+    }
+
+    /**
+     * Gets transcribed text and associated metadata from provided spoken audio data. Audio will be transcribed in the
+     * written language corresponding to the language it was spoken in.
+     *
+     * @param deploymentOrModelName Specifies either the model deployment name (when using Azure OpenAI) or model name
+     *     (when using non-Azure OpenAI) to use for this request.
+     * @param audioTranscriptionOptions The configuration information for an audio transcription request.
+     * @throws IllegalArgumentException thrown if parameters fail the validation.
+     * @throws HttpResponseException thrown if the request is rejected by server.
+     * @throws ClientAuthenticationException thrown if the request is rejected by server on status code 401.
+     * @throws ResourceNotFoundException thrown if the request is rejected by server on status code 404.
+     * @throws ResourceModifiedException thrown if the request is rejected by server on status code 409.
+     * @throws RuntimeException all other wrapped checked exceptions if the request fails to be sent.
+     * @return transcribed text and associated metadata from provided spoken audio data.
+     */
+    @Generated
+    @ServiceMethod(returns = ReturnType.SINGLE)
+    public AudioTranscription getAudioTranscriptionAsResponseObject(
+            String deploymentOrModelName, AudioTranscriptionOptions audioTranscriptionOptions) {
+        // Generated convenience method for getAudioTranscriptionAsResponseObjectWithResponse
+        RequestOptions requestOptions = new RequestOptions();
+        return getAudioTranscriptionAsResponseObjectWithResponse(
+                        deploymentOrModelName, BinaryData.fromObject(audioTranscriptionOptions), requestOptions)
+                .getValue()
+                .toObject(AudioTranscription.class);
+    }
+
+    /**
+     * Gets transcribed text and associated metadata from provided spoken audio data. Audio will be transcribed in the
+     * written language corresponding to the language it was spoken in.
+     *
+     * @param deploymentOrModelName Specifies either the model deployment name (when using Azure OpenAI) or model name
+     *     (when using non-Azure OpenAI) to use for this request.
+     * @param audioTranscriptionOptions The configuration information for an audio transcription request.
+     * @throws IllegalArgumentException thrown if parameters fail the validation.
+     * @throws HttpResponseException thrown if the request is rejected by server.
+     * @throws ClientAuthenticationException thrown if the request is rejected by server on status code 401.
+     * @throws ResourceNotFoundException thrown if the request is rejected by server on status code 404.
+     * @throws ResourceModifiedException thrown if the request is rejected by server on status code 409.
+     * @throws RuntimeException all other wrapped checked exceptions if the request fails to be sent.
+     * @return transcribed text and associated metadata from provided spoken audio data.
+     */
+    @Generated
+    @ServiceMethod(returns = ReturnType.SINGLE)
+    public String getAudioTranscriptionAsPlainText(
+            String deploymentOrModelName, AudioTranscriptionOptions audioTranscriptionOptions) {
+        // Generated convenience method for getAudioTranscriptionAsPlainTextWithResponse
+        RequestOptions requestOptions = new RequestOptions();
+        return getAudioTranscriptionAsPlainTextWithResponse(
+                        deploymentOrModelName, BinaryData.fromObject(audioTranscriptionOptions), requestOptions)
+                .getValue()
+                .toObject(String.class);
+    }
+
+    /**
+     * Gets English language transcribed text and associated metadata from provided spoken audio data.
+     *
+     * @param deploymentOrModelName Specifies either the model deployment name (when using Azure OpenAI) or model name
+     *     (when using non-Azure OpenAI) to use for this request.
+     * @param audioTranslationOptions The configuration information for an audio translation request.
+     * @throws IllegalArgumentException thrown if parameters fail the validation.
+     * @throws HttpResponseException thrown if the request is rejected by server.
+     * @throws ClientAuthenticationException thrown if the request is rejected by server on status code 401.
+     * @throws ResourceNotFoundException thrown if the request is rejected by server on status code 404.
+     * @throws ResourceModifiedException thrown if the request is rejected by server on status code 409.
+     * @throws RuntimeException all other wrapped checked exceptions if the request fails to be sent.
+     * @return english language transcribed text and associated metadata from provided spoken audio data.
+     */
+    @Generated
+    @ServiceMethod(returns = ReturnType.SINGLE)
+    public AudioTranscription getAudioTranslationAsResponseObject(
+            String deploymentOrModelName, AudioTranslationOptions audioTranslationOptions) {
+        // Generated convenience method for getAudioTranslationAsResponseObjectWithResponse
+        RequestOptions requestOptions = new RequestOptions();
+        return getAudioTranslationAsResponseObjectWithResponse(
+                        deploymentOrModelName, BinaryData.fromObject(audioTranslationOptions), requestOptions)
+                .getValue()
+                .toObject(AudioTranscription.class);
+    }
+
+    /**
+     * Gets English language transcribed text and associated metadata from provided spoken audio data.
+     *
+     * @param deploymentOrModelName Specifies either the model deployment name (when using Azure OpenAI) or model name
+     *     (when using non-Azure OpenAI) to use for this request.
+     * @param audioTranslationOptions The configuration information for an audio translation request.
+     * @throws IllegalArgumentException thrown if parameters fail the validation.
+     * @throws HttpResponseException thrown if the request is rejected by server.
+     * @throws ClientAuthenticationException thrown if the request is rejected by server on status code 401.
+     * @throws ResourceNotFoundException thrown if the request is rejected by server on status code 404.
+     * @throws ResourceModifiedException thrown if the request is rejected by server on status code 409.
+     * @throws RuntimeException all other wrapped checked exceptions if the request fails to be sent.
+     * @return english language transcribed text and associated metadata from provided spoken audio data.
+     */
+    @Generated
+    @ServiceMethod(returns = ReturnType.SINGLE)
+    public String getAudioTranslationAsPlainText(
+            String deploymentOrModelName, AudioTranslationOptions audioTranslationOptions) {
+        // Generated convenience method for getAudioTranslationAsPlainTextWithResponse
+        RequestOptions requestOptions = new RequestOptions();
+        return getAudioTranslationAsPlainTextWithResponse(
+                        deploymentOrModelName, BinaryData.fromObject(audioTranslationOptions), requestOptions)
+                .getValue()
+                .toObject(String.class);
+    }
 }
diff --git a/sdk/openai/azure-ai-openai/src/main/java/com/azure/ai/openai/OpenAIServiceVersion.java b/sdk/openai/azure-ai-openai/src/main/java/com/azure/ai/openai/OpenAIServiceVersion.java
index 9844431603fa..3027940ba21f 100644
--- a/sdk/openai/azure-ai-openai/src/main/java/com/azure/ai/openai/OpenAIServiceVersion.java
+++ b/sdk/openai/azure-ai-openai/src/main/java/com/azure/ai/openai/OpenAIServiceVersion.java
@@ -21,7 +21,10 @@ public enum OpenAIServiceVersion implements ServiceVersion {
     V2023_07_01_PREVIEW("2023-07-01-preview"),
 
     /** Enum value 2023-08-01-preview. */
-    V2023_08_01_PREVIEW("2023-08-01-preview");
+    V2023_08_01_PREVIEW("2023-08-01-preview"),
+
+    /** Enum value 2023-09-01-preview. */
+    V2023_09_01_PREVIEW("2023-09-01-preview");
 
     private final String version;
 
@@ -41,6 +44,6 @@ public String getVersion() {
      * @return The latest {@link OpenAIServiceVersion}.
      */
     public static OpenAIServiceVersion getLatest() {
-        return V2023_08_01_PREVIEW;
+        return V2023_09_01_PREVIEW;
     }
 }
diff --git a/sdk/openai/azure-ai-openai/src/main/java/com/azure/ai/openai/implementation/MultipartDataHelper.java b/sdk/openai/azure-ai-openai/src/main/java/com/azure/ai/openai/implementation/MultipartDataHelper.java
new file mode 100644
index 000000000000..ecad479f6c95
--- /dev/null
+++ b/sdk/openai/azure-ai-openai/src/main/java/com/azure/ai/openai/implementation/MultipartDataHelper.java
@@ -0,0 +1,214 @@
+// Copyright (c) Microsoft Corporation. All rights reserved.
+// Licensed under the MIT License.
+
+package com.azure.ai.openai.implementation;
+
+import com.azure.ai.openai.models.AudioTranscriptionOptions;
+import com.azure.ai.openai.models.AudioTranslationOptions;
+import com.azure.core.util.BinaryData;
+import com.azure.core.util.logging.ClientLogger;
+
+import java.io.ByteArrayOutputStream;
+import java.io.IOException;
+import java.nio.charset.Charset;
+import java.nio.charset.StandardCharsets;
+import java.util.ArrayList;
+import java.util.List;
+
+/**
+ * Helper class for marshaling {@link AudioTranscriptionOptions} and {@link AudioTranslationOptions} objects to be used
+ * in multipart HTTP requests according to RFC7578.
+ */
+public class MultipartDataHelper {
+    private static final ClientLogger LOGGER = new ClientLogger(MultipartDataHelper.class);
+
+    /**
+     * Value to be used as part of the divider for the multipart requests.
+     */
+    private final String boundary;
+
+    /**
+     * The actual part separator in the request. This is obtained by prepending "--" to the "boundary".
+     */
+    private final String partSeparator;
+
+    /**
+     * The marker for the ending of a multipart request. This is obtained by post-pending "--" to the "partSeparator".
+     */
+    private final String endMarker;
+
+    /**
+     * Charset used for encoding the multipart HTTP request.
+     */
+    private final Charset encoderCharset = StandardCharsets.UTF_8;
+
+    /**
+     * Line separator for the multipart HTTP request.
+     */
+    private static final String CRLF = "\r\n";
+
+    /**
+     * Default constructor used in the code. The boundary is a random value.
+     */
+    public MultipartDataHelper() {
+        // TODO: We can't use randomly generated UUIDs for now. Generating a test session record won't match the
+        //       newly generated UUID for the test run instance this(UUID.randomUUID().toString().substring(0, 16));
+        this("29580623-3d02-4a");
+    }
+
+    /**
+     * Constructor accepting a boundary generator. Used for testing.
+     *
+     * @param boundary The value to be used as "boundary".
+     */
+    public MultipartDataHelper(String boundary) {
+        this.boundary = boundary;
+        partSeparator = "--" + boundary;
+        endMarker = partSeparator + "--";
+    }
+
+    /**
+     * Gets the "boundary" value.
+     *
+     * @return the "boundary" value.
+     */
+    public String getBoundary() {
+        return boundary;
+    }
+
+    /**
+     * This method marshals the passed request into ready to be sent.
+     *
+     * @param requestOptions Object to be marshalled for the multipart HTTP request.
+     * @param fileName The name of the file that is being sent as a part of this request.
+     * @param <T> {@link AudioTranscriptionOptions} and {@link AudioTranslationOptions} are the only types supported.
+     *           This represents the type information of the request object.
+     * @return the marshalled data and its length.
+     */
+    public <T> MultipartDataSerializationResult serializeRequest(T requestOptions, String fileName) {
+        if (requestOptions instanceof AudioTranslationOptions) {
+            AudioTranslationOptions audioTranslationOptions = (AudioTranslationOptions) requestOptions;
+            byte[] file = audioTranslationOptions.getFile();
+            List<MultipartField> fields = formatAudioTranslationOptions(audioTranslationOptions);
+            return serializeRequestFields(file, fields, fileName);
+        } else if (requestOptions instanceof AudioTranscriptionOptions) {
+            AudioTranscriptionOptions audioTranscriptionOptions = (AudioTranscriptionOptions) requestOptions;
+            byte[] file = audioTranscriptionOptions.getFile();
+            List<MultipartField> fields = formatAudioTranscriptionOptions(audioTranscriptionOptions);
+            return serializeRequestFields(file, fields, fileName);
+        } else {
+            throw LOGGER.logThrowableAsError(new IllegalArgumentException(
+                "Only AudioTranslationOptions and AudioTranscriptionOptions currently supported"));
+        }
+    }
+
+    /**
+     * This helper method marshals the passed request fields.
+     *
+     * @param file is the byte[] representation of the file in the request object.
+     * @param fields a list of the members other than the file in the request object.
+     * @param fileName the name of the file passed in the "file" field of the request object.
+     * @return a structure containing the marshalled data and its length.
+     */
+    private MultipartDataSerializationResult serializeRequestFields(byte[] file, List<MultipartField> fields, String fileName) {
+        ByteArrayOutputStream byteArrayOutputStream = new ByteArrayOutputStream();
+
+        // Multipart preamble
+        String fileFieldPreamble = partSeparator
+            + CRLF + "Content-Disposition: form-data; name=\"file\"; filename=\""
+            + fileName + "\""
+            + CRLF + "Content-Type: application/octet-stream" + CRLF + CRLF;
+        try {
+            // Writing the file into the request as a byte stream
+            byteArrayOutputStream.write(fileFieldPreamble.getBytes(encoderCharset));
+            byteArrayOutputStream.write(file);
+
+            // Adding other fields to the request
+            for (MultipartField field : fields) {
+                byteArrayOutputStream.write(serializeField(field));
+            }
+            byteArrayOutputStream.write((CRLF + endMarker).getBytes(encoderCharset));
+        } catch (IOException e) {
+            throw new RuntimeException(e);
+        }
+
+        byte[] totalData = byteArrayOutputStream.toByteArray();
+        return new MultipartDataSerializationResult(BinaryData.fromBytes(totalData), totalData.length);
+    }
+
+    /**
+     * Adds member fields apart from the file to the multipart HTTP request.
+     *
+     * @param audioTranslationOptions The configuration information for an audio translation request.
+     * @return a list of the fields in the request (except for "file").
+     */
+    private List<MultipartField> formatAudioTranslationOptions(AudioTranslationOptions audioTranslationOptions) {
+        List<MultipartField> fields = new ArrayList<>();
+        if (audioTranslationOptions.getResponseFormat() != null) {
+            fields.add(new MultipartField(
+                "response_format",
+                audioTranslationOptions.getResponseFormat().toString()));
+        }
+        if (audioTranslationOptions.getModel() != null) {
+            fields.add(new MultipartField("model",
+                    audioTranslationOptions.getModel()
+            ));
+        }
+        if (audioTranslationOptions.getPrompt() != null) {
+            fields.add(new MultipartField("prompt",
+                audioTranslationOptions.getPrompt()));
+        }
+        if (audioTranslationOptions.getTemperature() != null) {
+            fields.add(new MultipartField("temperature",
+                String.valueOf(audioTranslationOptions.getTemperature())));
+        }
+        return fields;
+    }
+
+    /**
+     * Adds member fields apart from the file to the multipart HTTP request.
+     *
+     * @param audioTranscriptionOptions The configuration information for an audio transcription request.
+     * @return a list of the fields in the request (except for "file").
+     */
+    private List<MultipartField> formatAudioTranscriptionOptions(AudioTranscriptionOptions audioTranscriptionOptions) {
+        List<MultipartField> fields = new ArrayList<>();
+        if (audioTranscriptionOptions.getResponseFormat() != null) {
+            fields.add(new MultipartField("response_format",
+                audioTranscriptionOptions.getResponseFormat().toString()));
+        }
+        if (audioTranscriptionOptions.getModel() != null) {
+            fields.add(new MultipartField("model",
+                    audioTranscriptionOptions.getModel()
+            ));
+        }
+        if (audioTranscriptionOptions.getPrompt() != null) {
+            fields.add(new MultipartField("prompt",
+                audioTranscriptionOptions.getPrompt()));
+        }
+        if (audioTranscriptionOptions.getTemperature() != null) {
+            fields.add(new MultipartField("temperature",
+                String.valueOf(audioTranscriptionOptions.getTemperature())));
+        }
+        if (audioTranscriptionOptions.getLanguage() != null) {
+            fields.add(new MultipartField("language",
+                audioTranscriptionOptions.getLanguage()));
+        }
+        return fields;
+    }
+
+    /**
+     * This method formats a field for a multipart HTTP request and returns its byte[] representation.
+     *
+     * @param field the field of the request to be marshalled.
+     * @return byte[] representation of a field for a multipart HTTP request.
+     */
+    private byte[] serializeField(MultipartField field) {
+        String serialized = CRLF + partSeparator
+            + CRLF + "Content-Disposition: form-data; name=\""
+            + field.getWireName() + "\"" + CRLF + CRLF
+            + field.getValue();
+
+        return serialized.getBytes(encoderCharset);
+    }
+}
diff --git a/sdk/openai/azure-ai-openai/src/main/java/com/azure/ai/openai/implementation/MultipartDataSerializationResult.java b/sdk/openai/azure-ai-openai/src/main/java/com/azure/ai/openai/implementation/MultipartDataSerializationResult.java
new file mode 100644
index 000000000000..1150b879b6b6
--- /dev/null
+++ b/sdk/openai/azure-ai-openai/src/main/java/com/azure/ai/openai/implementation/MultipartDataSerializationResult.java
@@ -0,0 +1,50 @@
+// Copyright (c) Microsoft Corporation. All rights reserved.
+// Licensed under the MIT License.
+
+package com.azure.ai.openai.implementation;
+
+import com.azure.core.util.BinaryData;
+
+/**
+ * This class is used as a stand-in representation of marshalled data to be used in an HTTP multipart request.
+ */
+public class MultipartDataSerializationResult {
+
+    /**
+     * Represents the length of the content of this request. The value is to be used for the "Content-Length" header
+     * of the HTTP request
+     */
+    private final long dataLength;
+
+    /**
+     * The multipart form data of the request.
+     */
+    private final BinaryData data;
+
+    /**
+     * Constructor bundling both data and its length
+     * @param data the multipart form data of the request
+     * @param contentLength the length of the multipart form data of the request
+     */
+    public MultipartDataSerializationResult(BinaryData data, long contentLength) {
+        this.dataLength = contentLength;
+        this.data = data;
+    }
+
+    /**
+     *
+     * @return the result of marshaling a multipart HTTP request
+     */
+    public BinaryData getData() {
+        return data;
+    }
+
+    /**
+     *
+     * @return the length of a multipart HTTP request data
+     */
+    public long getDataLength() {
+        return dataLength;
+    }
+
+}
diff --git a/sdk/openai/azure-ai-openai/src/main/java/com/azure/ai/openai/implementation/MultipartField.java b/sdk/openai/azure-ai-openai/src/main/java/com/azure/ai/openai/implementation/MultipartField.java
new file mode 100644
index 000000000000..1ad618b7ceb6
--- /dev/null
+++ b/sdk/openai/azure-ai-openai/src/main/java/com/azure/ai/openai/implementation/MultipartField.java
@@ -0,0 +1,46 @@
+// Copyright (c) Microsoft Corporation. All rights reserved.
+// Licensed under the MIT License.
+
+package com.azure.ai.openai.implementation;
+
+/**
+ * A field of a request for a multipart HTTP request.
+ */
+public class MultipartField {
+
+    /**
+     * The JSON key name of this field.
+     */
+    private final String wireName;
+
+    /**
+     * The JSON value of this field.
+     */
+    private final String value;
+
+    /**
+     *
+     * @param wireName The JSON key name of this field.
+     * @param value The JSON value of this field.
+     */
+    public MultipartField(String wireName, String value) {
+        this.wireName = wireName;
+        this.value = value;
+    }
+
+    /**
+     *
+     * @return The JSON key name of this field.
+     */
+    public String getWireName() {
+        return wireName;
+    }
+
+    /**
+     *
+     * @return The JSON value of this field.
+     */
+    public String getValue() {
+        return value;
+    }
+}
diff --git a/sdk/openai/azure-ai-openai/src/main/java/com/azure/ai/openai/implementation/NonAzureOpenAIClientImpl.java b/sdk/openai/azure-ai-openai/src/main/java/com/azure/ai/openai/implementation/NonAzureOpenAIClientImpl.java
index 5ecd55ec21b3..8fd0413c128e 100644
--- a/sdk/openai/azure-ai-openai/src/main/java/com/azure/ai/openai/implementation/NonAzureOpenAIClientImpl.java
+++ b/sdk/openai/azure-ai-openai/src/main/java/com/azure/ai/openai/implementation/NonAzureOpenAIClientImpl.java
@@ -243,6 +243,158 @@ Response<BinaryData> generateImageSync(
             @BodyParam("application/json") BinaryData imageGenerationOptions,
             RequestOptions requestOptions,
             Context context);
+
+        @Post("/audio/transcriptions")
+        @ExpectedResponses({200})
+        @UnexpectedResponseExceptionType(
+                value = ClientAuthenticationException.class,
+                code = {401})
+        @UnexpectedResponseExceptionType(
+                value = ResourceNotFoundException.class,
+                code = {404})
+        @UnexpectedResponseExceptionType(
+                value = ResourceModifiedException.class,
+                code = {409})
+        @UnexpectedResponseExceptionType(HttpResponseException.class)
+        Mono<Response<BinaryData>> getAudioTranscriptionAsResponseObject(
+                @HostParam("endpoint") String endpoint,
+                @HeaderParam("accept") String accept,
+                @BodyParam("multipart/form-data") BinaryData audioTranscriptionOptions,
+                RequestOptions requestOptions,
+                Context context);
+
+        @Post("/audio/transcriptions")
+        @ExpectedResponses({200})
+        @UnexpectedResponseExceptionType(
+                value = ClientAuthenticationException.class,
+                code = {401})
+        @UnexpectedResponseExceptionType(
+                value = ResourceNotFoundException.class,
+                code = {404})
+        @UnexpectedResponseExceptionType(
+                value = ResourceModifiedException.class,
+                code = {409})
+        @UnexpectedResponseExceptionType(HttpResponseException.class)
+        Response<BinaryData> getAudioTranscriptionAsResponseObjectSync(
+                @HostParam("endpoint") String endpoint,
+                @HeaderParam("accept") String accept,
+                @BodyParam("multipart/form-data") BinaryData audioTranscriptionOptions,
+                RequestOptions requestOptions,
+                Context context);
+
+        @Post("/audio/transcriptions")
+        @ExpectedResponses({200})
+        @UnexpectedResponseExceptionType(
+                value = ClientAuthenticationException.class,
+                code = {401})
+        @UnexpectedResponseExceptionType(
+                value = ResourceNotFoundException.class,
+                code = {404})
+        @UnexpectedResponseExceptionType(
+                value = ResourceModifiedException.class,
+                code = {409})
+        @UnexpectedResponseExceptionType(HttpResponseException.class)
+        Mono<Response<BinaryData>> getAudioTranscriptionAsPlainText(
+                @HostParam("endpoint") String endpoint,
+                @HeaderParam("accept") String accept,
+                @BodyParam("multipart/form-data") BinaryData audioTranscriptionOptions,
+                RequestOptions requestOptions,
+                Context context);
+
+        @Post("/audio/transcriptions")
+        @ExpectedResponses({200})
+        @UnexpectedResponseExceptionType(
+                value = ClientAuthenticationException.class,
+                code = {401})
+        @UnexpectedResponseExceptionType(
+                value = ResourceNotFoundException.class,
+                code = {404})
+        @UnexpectedResponseExceptionType(
+                value = ResourceModifiedException.class,
+                code = {409})
+        @UnexpectedResponseExceptionType(HttpResponseException.class)
+        Response<BinaryData> getAudioTranscriptionAsPlainTextSync(
+                @HostParam("endpoint") String endpoint,
+                @HeaderParam("accept") String accept,
+                @BodyParam("multipart/form-data") BinaryData audioTranscriptionOptions,
+                RequestOptions requestOptions,
+                Context context);
+
+        @Post("/audio/translations")
+        @ExpectedResponses({200})
+        @UnexpectedResponseExceptionType(
+                value = ClientAuthenticationException.class,
+                code = {401})
+        @UnexpectedResponseExceptionType(
+                value = ResourceNotFoundException.class,
+                code = {404})
+        @UnexpectedResponseExceptionType(
+                value = ResourceModifiedException.class,
+                code = {409})
+        @UnexpectedResponseExceptionType(HttpResponseException.class)
+        Mono<Response<BinaryData>> getAudioTranslationAsResponseObject(
+                @HostParam("endpoint") String endpoint,
+                @HeaderParam("accept") String accept,
+                @BodyParam("multipart/form-data") BinaryData audioTranslationOptions,
+                RequestOptions requestOptions,
+                Context context);
+
+        @Post("/audio/translations")
+        @ExpectedResponses({200})
+        @UnexpectedResponseExceptionType(
+                value = ClientAuthenticationException.class,
+                code = {401})
+        @UnexpectedResponseExceptionType(
+                value = ResourceNotFoundException.class,
+                code = {404})
+        @UnexpectedResponseExceptionType(
+                value = ResourceModifiedException.class,
+                code = {409})
+        @UnexpectedResponseExceptionType(HttpResponseException.class)
+        Response<BinaryData> getAudioTranslationAsResponseObjectSync(
+                @HostParam("endpoint") String endpoint,
+                @HeaderParam("accept") String accept,
+                @BodyParam("multipart/form-data") BinaryData audioTranslationOptions,
+                RequestOptions requestOptions,
+                Context context);
+
+        @Post("/audio/translations")
+        @ExpectedResponses({200})
+        @UnexpectedResponseExceptionType(
+                value = ClientAuthenticationException.class,
+                code = {401})
+        @UnexpectedResponseExceptionType(
+                value = ResourceNotFoundException.class,
+                code = {404})
+        @UnexpectedResponseExceptionType(
+                value = ResourceModifiedException.class,
+                code = {409})
+        @UnexpectedResponseExceptionType(HttpResponseException.class)
+        Mono<Response<BinaryData>> getAudioTranslationAsPlainText(
+                @HostParam("endpoint") String endpoint,
+                @HeaderParam("accept") String accept,
+                @BodyParam("multipart/form-data") BinaryData audioTranslationOptions,
+                RequestOptions requestOptions,
+                Context context);
+
+        @Post("/audio/translations")
+        @ExpectedResponses({200})
+        @UnexpectedResponseExceptionType(
+                value = ClientAuthenticationException.class,
+                code = {401})
+        @UnexpectedResponseExceptionType(
+                value = ResourceNotFoundException.class,
+                code = {404})
+        @UnexpectedResponseExceptionType(
+                value = ResourceModifiedException.class,
+                code = {409})
+        @UnexpectedResponseExceptionType(HttpResponseException.class)
+        Response<BinaryData> getAudioTranslationAsPlainTextSync(
+                @HostParam("endpoint") String endpoint,
+                @HeaderParam("accept") String accept,
+                @BodyParam("multipart/form-data") BinaryData audioTranslationOptions,
+                RequestOptions requestOptions,
+                Context context);
     }
 
     /**
@@ -891,7 +1043,7 @@ public Response<BinaryData> generateImageWithResponse(
      *
      * @param inputJson JSON submitted by the client
      * @param modelId The LLM model ID to be injected in the JSON
-     * @return
+     * @return an updated version of the JSON with the key "model" and its corresponding value "modelId" added
      */
     private static BinaryData addModelIdJson(BinaryData inputJson, String modelId) throws JsonProcessingException {
         JsonNode jsonNode = JSON_MAPPER.readTree(inputJson.toString());
@@ -905,4 +1057,446 @@ private static BinaryData addModelIdJson(BinaryData inputJson, String modelId) t
 
         return inputJson;
     }
+
+    /**
+     * Gets transcribed text and associated metadata from provided spoken audio data. Audio will be transcribed in the
+     * written language corresponding to the language it was spoken in.
+     *
+     * <p><strong>Request Body Schema</strong>
+     *
+     * <pre>{@code
+     * {
+     *     file: byte[] (Required)
+     *     response_format: String(json/verbose_json/text/srt/vtt) (Optional)
+     *     language: String (Optional)
+     *     prompt: String (Optional)
+     *     temperature: Double (Optional)
+     *     model: String (Optional)
+     * }
+     * }</pre>
+     *
+     * <p><strong>Response Body Schema</strong>
+     *
+     * <pre>{@code
+     * {
+     *     text: String (Required)
+     *     task: String(transcribe/translate) (Optional)
+     *     language: String (Optional)
+     *     duration: Double (Optional)
+     *     segments (Optional): [
+     *          (Optional){
+     *             id: int (Required)
+     *             start: double (Required)
+     *             end: double (Required)
+     *             text: String (Required)
+     *             temperature: double (Required)
+     *             avg_logprob: double (Required)
+     *             compression_ratio: double (Required)
+     *             no_speech_prob: double (Required)
+     *             tokens (Required): [
+     *                 int (Required)
+     *             ]
+     *             seek: int (Required)
+     *         }
+     *     ]
+     * }
+     * }</pre>
+     *
+     * @param modelId Specifies the model name to use for this request.
+     * @param audioTranscriptionOptions The configuration information for an audio transcription request.
+     * @param requestOptions The options to configure the HTTP request before HTTP client sends it.
+     * @throws HttpResponseException thrown if the request is rejected by server.
+     * @throws ClientAuthenticationException thrown if the request is rejected by server on status code 401.
+     * @throws ResourceNotFoundException thrown if the request is rejected by server on status code 404.
+     * @throws ResourceModifiedException thrown if the request is rejected by server on status code 409.
+     * @return transcribed text and associated metadata from provided spoken audio data along with {@link Response} on
+     *     successful completion of {@link Mono}.
+     */
+    @ServiceMethod(returns = ReturnType.SINGLE)
+    public Mono<Response<BinaryData>> getAudioTranscriptionAsResponseObjectWithResponseAsync(
+            String modelId, BinaryData audioTranscriptionOptions, RequestOptions requestOptions) {
+        final String accept = "application/json";
+        return FluxUtil.withContext(
+                context ->
+                        service.getAudioTranscriptionAsResponseObject(
+                                OPEN_AI_ENDPOINT,
+                                accept,
+                                audioTranscriptionOptions,
+                                requestOptions,
+                                context));
+    }
+
+    /**
+     * Gets transcribed text and associated metadata from provided spoken audio data. Audio will be transcribed in the
+     * written language corresponding to the language it was spoken in.
+     *
+     * <p><strong>Request Body Schema</strong>
+     *
+     * <pre>{@code
+     * {
+     *     file: byte[] (Required)
+     *     response_format: String(json/verbose_json/text/srt/vtt) (Optional)
+     *     language: String (Optional)
+     *     prompt: String (Optional)
+     *     temperature: Double (Optional)
+     *     model: String (Optional)
+     * }
+     * }</pre>
+     *
+     * <p><strong>Response Body Schema</strong>
+     *
+     * <pre>{@code
+     * {
+     *     text: String (Required)
+     *     task: String(transcribe/translate) (Optional)
+     *     language: String (Optional)
+     *     duration: Double (Optional)
+     *     segments (Optional): [
+     *          (Optional){
+     *             id: int (Required)
+     *             start: double (Required)
+     *             end: double (Required)
+     *             text: String (Required)
+     *             temperature: double (Required)
+     *             avg_logprob: double (Required)
+     *             compression_ratio: double (Required)
+     *             no_speech_prob: double (Required)
+     *             tokens (Required): [
+     *                 int (Required)
+     *             ]
+     *             seek: int (Required)
+     *         }
+     *     ]
+     * }
+     * }</pre>
+     *
+     * @param modelId Specifies the model name to use for this request.
+     * @param audioTranscriptionOptions The configuration information for an audio transcription request.
+     * @param requestOptions The options to configure the HTTP request before HTTP client sends it.
+     * @throws HttpResponseException thrown if the request is rejected by server.
+     * @throws ClientAuthenticationException thrown if the request is rejected by server on status code 401.
+     * @throws ResourceNotFoundException thrown if the request is rejected by server on status code 404.
+     * @throws ResourceModifiedException thrown if the request is rejected by server on status code 409.
+     * @return transcribed text and associated metadata from provided spoken audio data along with {@link Response}.
+     */
+    @ServiceMethod(returns = ReturnType.SINGLE)
+    public Response<BinaryData> getAudioTranscriptionAsResponseObjectWithResponse(
+            String modelId, BinaryData audioTranscriptionOptions, RequestOptions requestOptions) {
+        final String accept = "application/json";
+        return service.getAudioTranscriptionAsResponseObjectSync(
+                OPEN_AI_ENDPOINT,
+                accept,
+                audioTranscriptionOptions,
+                requestOptions,
+                Context.NONE);
+    }
+
+    /**
+     * Gets transcribed text and associated metadata from provided spoken audio data. Audio will be transcribed in the
+     * written language corresponding to the language it was spoken in.
+     *
+     * <p><strong>Request Body Schema</strong>
+     *
+     * <pre>{@code
+     * {
+     *     file: byte[] (Required)
+     *     response_format: String(json/verbose_json/text/srt/vtt) (Optional)
+     *     language: String (Optional)
+     *     prompt: String (Optional)
+     *     temperature: Double (Optional)
+     *     model: String (Optional)
+     * }
+     * }</pre>
+     *
+     * <p><strong>Response Body Schema</strong>
+     *
+     * <pre>{@code
+     * String
+     * }</pre>
+     *
+     * @param modelId Specifies the model name to use for this request.
+     * @param audioTranscriptionOptions The configuration information for an audio transcription request.
+     * @param requestOptions The options to configure the HTTP request before HTTP client sends it.
+     * @throws HttpResponseException thrown if the request is rejected by server.
+     * @throws ClientAuthenticationException thrown if the request is rejected by server on status code 401.
+     * @throws ResourceNotFoundException thrown if the request is rejected by server on status code 404.
+     * @throws ResourceModifiedException thrown if the request is rejected by server on status code 409.
+     * @return transcribed text and associated metadata from provided spoken audio data along with {@link Response} on
+     *     successful completion of {@link Mono}.
+     */
+    @ServiceMethod(returns = ReturnType.SINGLE)
+    public Mono<Response<BinaryData>> getAudioTranscriptionAsPlainTextWithResponseAsync(
+            String modelId, BinaryData audioTranscriptionOptions, RequestOptions requestOptions) {
+        final String accept = "application/json";
+        return FluxUtil.withContext(
+                context ->
+                        service.getAudioTranscriptionAsPlainText(
+                                OPEN_AI_ENDPOINT,
+                                accept,
+                                audioTranscriptionOptions,
+                                requestOptions,
+                                context));
+    }
+
+    /**
+     * Gets transcribed text and associated metadata from provided spoken audio data. Audio will be transcribed in the
+     * written language corresponding to the language it was spoken in.
+     *
+     * <p><strong>Request Body Schema</strong>
+     *
+     * <pre>{@code
+     * {
+     *     file: byte[] (Required)
+     *     response_format: String(json/verbose_json/text/srt/vtt) (Optional)
+     *     language: String (Optional)
+     *     prompt: String (Optional)
+     *     temperature: Double (Optional)
+     *     model: String (Optional)
+     * }
+     * }</pre>
+     *
+     * <p><strong>Response Body Schema</strong>
+     *
+     * <pre>{@code
+     * String
+     * }</pre>
+     *
+     * @param modelId Specifies the model name to use for this request.
+     * @param audioTranscriptionOptions The configuration information for an audio transcription request.
+     * @param requestOptions The options to configure the HTTP request before HTTP client sends it.
+     * @throws HttpResponseException thrown if the request is rejected by server.
+     * @throws ClientAuthenticationException thrown if the request is rejected by server on status code 401.
+     * @throws ResourceNotFoundException thrown if the request is rejected by server on status code 404.
+     * @throws ResourceModifiedException thrown if the request is rejected by server on status code 409.
+     * @return transcribed text and associated metadata from provided spoken audio data along with {@link Response}.
+     */
+    @ServiceMethod(returns = ReturnType.SINGLE)
+    public Response<BinaryData> getAudioTranscriptionAsPlainTextWithResponse(
+            String modelId, BinaryData audioTranscriptionOptions, RequestOptions requestOptions) {
+        final String accept = "application/json";
+        return service.getAudioTranscriptionAsPlainTextSync(
+                OPEN_AI_ENDPOINT,
+                accept,
+                audioTranscriptionOptions,
+                requestOptions,
+                Context.NONE);
+    }
+
+    /**
+     * Gets English language transcribed text and associated metadata from provided spoken audio data.
+     *
+     * <p><strong>Request Body Schema</strong>
+     *
+     * <pre>{@code
+     * {
+     *     file: byte[] (Required)
+     *     response_format: String(json/verbose_json/text/srt/vtt) (Optional)
+     *     prompt: String (Optional)
+     *     temperature: Double (Optional)
+     *     model: String (Optional)
+     * }
+     * }</pre>
+     *
+     * <p><strong>Response Body Schema</strong>
+     *
+     * <pre>{@code
+     * {
+     *     text: String (Required)
+     *     task: String(transcribe/translate) (Optional)
+     *     language: String (Optional)
+     *     duration: Double (Optional)
+     *     segments (Optional): [
+     *          (Optional){
+     *             id: int (Required)
+     *             start: double (Required)
+     *             end: double (Required)
+     *             text: String (Required)
+     *             temperature: double (Required)
+     *             avg_logprob: double (Required)
+     *             compression_ratio: double (Required)
+     *             no_speech_prob: double (Required)
+     *             tokens (Required): [
+     *                 int (Required)
+     *             ]
+     *             seek: int (Required)
+     *         }
+     *     ]
+     * }
+     * }</pre>
+     *
+     * @param deploymentOrModelName Specifies the model name to use for this request.
+     * @param audioTranslationOptions The configuration information for an audio translation request.
+     * @param requestOptions The options to configure the HTTP request before HTTP client sends it.
+     * @throws HttpResponseException thrown if the request is rejected by server.
+     * @throws ClientAuthenticationException thrown if the request is rejected by server on status code 401.
+     * @throws ResourceNotFoundException thrown if the request is rejected by server on status code 404.
+     * @throws ResourceModifiedException thrown if the request is rejected by server on status code 409.
+     * @return english language transcribed text and associated metadata from provided spoken audio data along with
+     *     {@link Response} on successful completion of {@link Mono}.
+     */
+    @ServiceMethod(returns = ReturnType.SINGLE)
+    public Mono<Response<BinaryData>> getAudioTranslationAsResponseObjectWithResponseAsync(
+            String deploymentOrModelName, BinaryData audioTranslationOptions, RequestOptions requestOptions) {
+        final String accept = "application/json";
+        return FluxUtil.withContext(
+                context ->
+                        service.getAudioTranslationAsResponseObject(
+                                OPEN_AI_ENDPOINT,
+                                accept,
+                                audioTranslationOptions,
+                                requestOptions,
+                                context));
+    }
+
+    /**
+     * Gets English language transcribed text and associated metadata from provided spoken audio data.
+     *
+     * <p><strong>Request Body Schema</strong>
+     *
+     * <pre>{@code
+     * {
+     *     file: byte[] (Required)
+     *     response_format: String(json/verbose_json/text/srt/vtt) (Optional)
+     *     prompt: String (Optional)
+     *     temperature: Double (Optional)
+     *     model: String (Optional)
+     * }
+     * }</pre>
+     *
+     * <p><strong>Response Body Schema</strong>
+     *
+     * <pre>{@code
+     * {
+     *     text: String (Required)
+     *     task: String(transcribe/translate) (Optional)
+     *     language: String (Optional)
+     *     duration: Double (Optional)
+     *     segments (Optional): [
+     *          (Optional){
+     *             id: int (Required)
+     *             start: double (Required)
+     *             end: double (Required)
+     *             text: String (Required)
+     *             temperature: double (Required)
+     *             avg_logprob: double (Required)
+     *             compression_ratio: double (Required)
+     *             no_speech_prob: double (Required)
+     *             tokens (Required): [
+     *                 int (Required)
+     *             ]
+     *             seek: int (Required)
+     *         }
+     *     ]
+     * }
+     * }</pre>
+     *
+     * @param modelId Specifies the model name to use for this request.
+     * @param audioTranslationOptions The configuration information for an audio translation request.
+     * @param requestOptions The options to configure the HTTP request before HTTP client sends it.
+     * @throws HttpResponseException thrown if the request is rejected by server.
+     * @throws ClientAuthenticationException thrown if the request is rejected by server on status code 401.
+     * @throws ResourceNotFoundException thrown if the request is rejected by server on status code 404.
+     * @throws ResourceModifiedException thrown if the request is rejected by server on status code 409.
+     * @return english language transcribed text and associated metadata from provided spoken audio data along with
+     *     {@link Response}.
+     */
+    @ServiceMethod(returns = ReturnType.SINGLE)
+    public Response<BinaryData> getAudioTranslationAsResponseObjectWithResponse(
+            String modelId, BinaryData audioTranslationOptions, RequestOptions requestOptions) {
+        final String accept = "application/json";
+        return service.getAudioTranslationAsResponseObjectSync(
+                OPEN_AI_ENDPOINT,
+                accept,
+                audioTranslationOptions,
+                requestOptions,
+                Context.NONE);
+    }
+
+    /**
+     * Gets English language transcribed text and associated metadata from provided spoken audio data.
+     *
+     * <p><strong>Request Body Schema</strong>
+     *
+     * <pre>{@code
+     * {
+     *     file: byte[] (Required)
+     *     response_format: String(json/verbose_json/text/srt/vtt) (Optional)
+     *     prompt: String (Optional)
+     *     temperature: Double (Optional)
+     *     model: String (Optional)
+     * }
+     * }</pre>
+     *
+     * <p><strong>Response Body Schema</strong>
+     *
+     * <pre>{@code
+     * String
+     * }</pre>
+     *
+     * @param modelId Specifies the model name to use for this request.
+     * @param audioTranslationOptions The configuration information for an audio translation request.
+     * @param requestOptions The options to configure the HTTP request before HTTP client sends it.
+     * @throws HttpResponseException thrown if the request is rejected by server.
+     * @throws ClientAuthenticationException thrown if the request is rejected by server on status code 401.
+     * @throws ResourceNotFoundException thrown if the request is rejected by server on status code 404.
+     * @throws ResourceModifiedException thrown if the request is rejected by server on status code 409.
+     * @return english language transcribed text and associated metadata from provided spoken audio data along with
+     *     {@link Response} on successful completion of {@link Mono}.
+     */
+    @ServiceMethod(returns = ReturnType.SINGLE)
+    public Mono<Response<BinaryData>> getAudioTranslationAsPlainTextWithResponseAsync(
+            String modelId, BinaryData audioTranslationOptions, RequestOptions requestOptions) {
+        final String accept = "application/json";
+        return FluxUtil.withContext(
+                context ->
+                        service.getAudioTranslationAsPlainText(
+                                OPEN_AI_ENDPOINT,
+                                accept,
+                                audioTranslationOptions,
+                                requestOptions,
+                                context));
+    }
+
+    /**
+     * Gets English language transcribed text and associated metadata from provided spoken audio data.
+     *
+     * <p><strong>Request Body Schema</strong>
+     *
+     * <pre>{@code
+     * {
+     *     file: byte[] (Required)
+     *     response_format: String(json/verbose_json/text/srt/vtt) (Optional)
+     *     prompt: String (Optional)
+     *     temperature: Double (Optional)
+     *     model: String (Optional)
+     * }
+     * }</pre>
+     *
+     * <p><strong>Response Body Schema</strong>
+     *
+     * <pre>{@code
+     * String
+     * }</pre>
+     *
+     * @param modelId Specifies the model name to use for this request.
+     * @param audioTranslationOptions The configuration information for an audio translation request.
+     * @param requestOptions The options to configure the HTTP request before HTTP client sends it.
+     * @throws HttpResponseException thrown if the request is rejected by server.
+     * @throws ClientAuthenticationException thrown if the request is rejected by server on status code 401.
+     * @throws ResourceNotFoundException thrown if the request is rejected by server on status code 404.
+     * @throws ResourceModifiedException thrown if the request is rejected by server on status code 409.
+     * @return english language transcribed text and associated metadata from provided spoken audio data along with
+     *     {@link Response}.
+     */
+    @ServiceMethod(returns = ReturnType.SINGLE)
+    public Response<BinaryData> getAudioTranslationAsPlainTextWithResponse(
+            String modelId, BinaryData audioTranslationOptions, RequestOptions requestOptions) {
+        final String accept = "application/json";
+        return service.getAudioTranslationAsPlainTextSync(
+                OPEN_AI_ENDPOINT,
+                accept,
+                audioTranslationOptions,
+                requestOptions,
+                Context.NONE);
+    }
 }
diff --git a/sdk/openai/azure-ai-openai/src/main/java/com/azure/ai/openai/implementation/OpenAIClientImpl.java b/sdk/openai/azure-ai-openai/src/main/java/com/azure/ai/openai/implementation/OpenAIClientImpl.java
index d74f569bcc04..73beb5f1faa2 100644
--- a/sdk/openai/azure-ai-openai/src/main/java/com/azure/ai/openai/implementation/OpenAIClientImpl.java
+++ b/sdk/openai/azure-ai-openai/src/main/java/com/azure/ai/openai/implementation/OpenAIClientImpl.java
@@ -360,6 +360,182 @@ Response<BinaryData> beginAzureBatchImageGenerationSync(
                 @BodyParam("application/json") BinaryData imageGenerationOptions,
                 RequestOptions requestOptions,
                 Context context);
+
+        @Post("/deployments/{deploymentId}/audio/transcriptions")
+        @ExpectedResponses({200})
+        @UnexpectedResponseExceptionType(
+                value = ClientAuthenticationException.class,
+                code = {401})
+        @UnexpectedResponseExceptionType(
+                value = ResourceNotFoundException.class,
+                code = {404})
+        @UnexpectedResponseExceptionType(
+                value = ResourceModifiedException.class,
+                code = {409})
+        @UnexpectedResponseExceptionType(HttpResponseException.class)
+        Mono<Response<BinaryData>> getAudioTranscriptionAsPlainText(
+                @HostParam("endpoint") String endpoint,
+                @QueryParam("api-version") String apiVersion,
+                @PathParam("deploymentId") String deploymentOrModelName,
+                @HeaderParam("accept") String accept,
+                @BodyParam("application/json") BinaryData audioTranscriptionOptions,
+                RequestOptions requestOptions,
+                Context context);
+
+        @Post("/deployments/{deploymentId}/audio/transcriptions")
+        @ExpectedResponses({200})
+        @UnexpectedResponseExceptionType(
+                value = ClientAuthenticationException.class,
+                code = {401})
+        @UnexpectedResponseExceptionType(
+                value = ResourceNotFoundException.class,
+                code = {404})
+        @UnexpectedResponseExceptionType(
+                value = ResourceModifiedException.class,
+                code = {409})
+        @UnexpectedResponseExceptionType(HttpResponseException.class)
+        Response<BinaryData> getAudioTranscriptionAsPlainTextSync(
+                @HostParam("endpoint") String endpoint,
+                @QueryParam("api-version") String apiVersion,
+                @PathParam("deploymentId") String deploymentOrModelName,
+                @HeaderParam("accept") String accept,
+                @BodyParam("application/json") BinaryData audioTranscriptionOptions,
+                RequestOptions requestOptions,
+                Context context);
+
+        // @Multipart not supported by RestProxy
+        @Post("/deployments/{deploymentId}/audio/transcriptions")
+        @ExpectedResponses({200})
+        @UnexpectedResponseExceptionType(
+                value = ClientAuthenticationException.class,
+                code = {401})
+        @UnexpectedResponseExceptionType(
+                value = ResourceNotFoundException.class,
+                code = {404})
+        @UnexpectedResponseExceptionType(
+                value = ResourceModifiedException.class,
+                code = {409})
+        @UnexpectedResponseExceptionType(HttpResponseException.class)
+        Mono<Response<BinaryData>> getAudioTranscriptionAsResponseObject(
+                @HostParam("endpoint") String endpoint,
+                @QueryParam("api-version") String apiVersion,
+                @PathParam("deploymentId") String deploymentOrModelName,
+                @HeaderParam("content-type") String contentType,
+                @HeaderParam("accept") String accept,
+                @BodyParam("multipart/form-data") BinaryData audioTranscriptionOptions,
+                RequestOptions requestOptions,
+                Context context);
+
+        // @Multipart not supported by RestProxy
+        @Post("/deployments/{deploymentId}/audio/transcriptions")
+        @ExpectedResponses({200})
+        @UnexpectedResponseExceptionType(
+                value = ClientAuthenticationException.class,
+                code = {401})
+        @UnexpectedResponseExceptionType(
+                value = ResourceNotFoundException.class,
+                code = {404})
+        @UnexpectedResponseExceptionType(
+                value = ResourceModifiedException.class,
+                code = {409})
+        @UnexpectedResponseExceptionType(HttpResponseException.class)
+        Response<BinaryData> getAudioTranscriptionAsResponseObjectSync(
+                @HostParam("endpoint") String endpoint,
+                @QueryParam("api-version") String apiVersion,
+                @PathParam("deploymentId") String deploymentOrModelName,
+                @HeaderParam("content-type") String contentType,
+                @HeaderParam("accept") String accept,
+                @BodyParam("multipart/form-data") BinaryData audioTranscriptionOptions,
+                RequestOptions requestOptions,
+                Context context);
+
+        @Post("/deployments/{deploymentId}/audio/translations")
+        @ExpectedResponses({200})
+        @UnexpectedResponseExceptionType(
+                value = ClientAuthenticationException.class,
+                code = {401})
+        @UnexpectedResponseExceptionType(
+                value = ResourceNotFoundException.class,
+                code = {404})
+        @UnexpectedResponseExceptionType(
+                value = ResourceModifiedException.class,
+                code = {409})
+        @UnexpectedResponseExceptionType(HttpResponseException.class)
+        Mono<Response<BinaryData>> getAudioTranslationAsPlainText(
+                @HostParam("endpoint") String endpoint,
+                @QueryParam("api-version") String apiVersion,
+                @PathParam("deploymentId") String deploymentOrModelName,
+                @HeaderParam("accept") String accept,
+                @BodyParam("application/json") BinaryData audioTranslationOptions,
+                RequestOptions requestOptions,
+                Context context);
+
+        @Post("/deployments/{deploymentId}/audio/translations")
+        @ExpectedResponses({200})
+        @UnexpectedResponseExceptionType(
+                value = ClientAuthenticationException.class,
+                code = {401})
+        @UnexpectedResponseExceptionType(
+                value = ResourceNotFoundException.class,
+                code = {404})
+        @UnexpectedResponseExceptionType(
+                value = ResourceModifiedException.class,
+                code = {409})
+        @UnexpectedResponseExceptionType(HttpResponseException.class)
+        Response<BinaryData> getAudioTranslationAsPlainTextSync(
+                @HostParam("endpoint") String endpoint,
+                @QueryParam("api-version") String apiVersion,
+                @PathParam("deploymentId") String deploymentOrModelName,
+                @HeaderParam("accept") String accept,
+                @BodyParam("application/json") BinaryData audioTranslationOptions,
+                RequestOptions requestOptions,
+                Context context);
+
+        // @Multipart not supported by RestProxy
+        @Post("/deployments/{deploymentId}/audio/translations")
+        @ExpectedResponses({200})
+        @UnexpectedResponseExceptionType(
+                value = ClientAuthenticationException.class,
+                code = {401})
+        @UnexpectedResponseExceptionType(
+                value = ResourceNotFoundException.class,
+                code = {404})
+        @UnexpectedResponseExceptionType(
+                value = ResourceModifiedException.class,
+                code = {409})
+        @UnexpectedResponseExceptionType(HttpResponseException.class)
+        Mono<Response<BinaryData>> getAudioTranslationAsResponseObject(
+                @HostParam("endpoint") String endpoint,
+                @QueryParam("api-version") String apiVersion,
+                @PathParam("deploymentId") String deploymentOrModelName,
+                @HeaderParam("content-type") String contentType,
+                @HeaderParam("accept") String accept,
+                @BodyParam("multipart/form-data") BinaryData audioTranslationOptions,
+                RequestOptions requestOptions,
+                Context context);
+
+        // @Multipart not supported by RestProxy
+        @Post("/deployments/{deploymentId}/audio/translations")
+        @ExpectedResponses({200})
+        @UnexpectedResponseExceptionType(
+                value = ClientAuthenticationException.class,
+                code = {401})
+        @UnexpectedResponseExceptionType(
+                value = ResourceNotFoundException.class,
+                code = {404})
+        @UnexpectedResponseExceptionType(
+                value = ResourceModifiedException.class,
+                code = {409})
+        @UnexpectedResponseExceptionType(HttpResponseException.class)
+        Response<BinaryData> getAudioTranslationAsResponseObjectSync(
+                @HostParam("endpoint") String endpoint,
+                @QueryParam("api-version") String apiVersion,
+                @PathParam("deploymentId") String deploymentOrModelName,
+                @HeaderParam("content-type") String contentType,
+                @HeaderParam("accept") String accept,
+                @BodyParam("multipart/form-data") BinaryData audioTranslationOptions,
+                RequestOptions requestOptions,
+                Context context);
     }
 
     /**
@@ -537,6 +713,18 @@ public Response<BinaryData> getEmbeddingsWithResponse(
      *                 violence (Optional): (recursive schema, see violence above)
      *                 hate (Optional): (recursive schema, see hate above)
      *                 self_harm (Optional): (recursive schema, see self_harm above)
+     *                 error (Optional): {
+     *                     code: String (Required)
+     *                     message: String (Required)
+     *                     target: String (Optional)
+     *                     details (Optional): [
+     *                         (recursive schema, see above)
+     *                     ]
+     *                     innererror (Optional): {
+     *                         code: String (Optional)
+     *                         innererror (Optional): (recursive schema, see innererror above)
+     *                     }
+     *                 }
      *             }
      *         }
      *     ]
@@ -650,6 +838,18 @@ public Mono<Response<BinaryData>> getCompletionsWithResponseAsync(
      *                 violence (Optional): (recursive schema, see violence above)
      *                 hate (Optional): (recursive schema, see hate above)
      *                 self_harm (Optional): (recursive schema, see self_harm above)
+     *                 error (Optional): {
+     *                     code: String (Required)
+     *                     message: String (Required)
+     *                     target: String (Optional)
+     *                     details (Optional): [
+     *                         (recursive schema, see above)
+     *                     ]
+     *                     innererror (Optional): {
+     *                         code: String (Optional)
+     *                         innererror (Optional): (recursive schema, see innererror above)
+     *                     }
+     *                 }
      *             }
      *         }
      *     ]
@@ -800,6 +1000,18 @@ public Response<BinaryData> getCompletionsWithResponse(
      *                 violence (Optional): (recursive schema, see violence above)
      *                 hate (Optional): (recursive schema, see hate above)
      *                 self_harm (Optional): (recursive schema, see self_harm above)
+     *                 error (Optional): {
+     *                     code: String (Required)
+     *                     message: String (Required)
+     *                     target: String (Optional)
+     *                     details (Optional): [
+     *                         (recursive schema, see above)
+     *                     ]
+     *                     innererror (Optional): {
+     *                         code: String (Optional)
+     *                         innererror (Optional): (recursive schema, see innererror above)
+     *                     }
+     *                 }
      *             }
      *         }
      *     ]
@@ -935,6 +1147,18 @@ public Mono<Response<BinaryData>> getChatCompletionsWithResponseAsync(
      *                 violence (Optional): (recursive schema, see violence above)
      *                 hate (Optional): (recursive schema, see hate above)
      *                 self_harm (Optional): (recursive schema, see self_harm above)
+     *                 error (Optional): {
+     *                     code: String (Required)
+     *                     message: String (Required)
+     *                     target: String (Optional)
+     *                     details (Optional): [
+     *                         (recursive schema, see above)
+     *                     ]
+     *                     innererror (Optional): {
+     *                         code: String (Optional)
+     *                         innererror (Optional): (recursive schema, see innererror above)
+     *                     }
+     *                 }
      *             }
      *         }
      *     ]
@@ -1068,6 +1292,18 @@ public Response<BinaryData> getChatCompletionsWithResponse(
      *                 violence (Optional): (recursive schema, see violence above)
      *                 hate (Optional): (recursive schema, see hate above)
      *                 self_harm (Optional): (recursive schema, see self_harm above)
+     *                 error (Optional): {
+     *                     code: String (Required)
+     *                     message: String (Required)
+     *                     target: String (Optional)
+     *                     details (Optional): [
+     *                         (recursive schema, see above)
+     *                     ]
+     *                     innererror (Optional): {
+     *                         code: String (Optional)
+     *                         innererror (Optional): (recursive schema, see innererror above)
+     *                     }
+     *                 }
      *             }
      *         }
      *     ]
@@ -1204,6 +1440,18 @@ public Mono<Response<BinaryData>> getChatCompletionsWithAzureExtensionsWithRespo
      *                 violence (Optional): (recursive schema, see violence above)
      *                 hate (Optional): (recursive schema, see hate above)
      *                 self_harm (Optional): (recursive schema, see self_harm above)
+     *                 error (Optional): {
+     *                     code: String (Required)
+     *                     message: String (Required)
+     *                     target: String (Optional)
+     *                     details (Optional): [
+     *                         (recursive schema, see above)
+     *                     ]
+     *                     innererror (Optional): {
+     *                         code: String (Optional)
+     *                         innererror (Optional): (recursive schema, see innererror above)
+     *                     }
+     *                 }
      *             }
      *         }
      *     ]
@@ -1509,4 +1757,478 @@ public SyncPoller<BinaryData, BinaryData> beginBeginAzureBatchImageGeneration(
                 TypeReference.createInstance(BinaryData.class),
                 TypeReference.createInstance(BinaryData.class));
     }
+
+    /**
+     * Gets transcribed text and associated metadata from provided spoken audio data. Audio will be transcribed in the
+     * written language corresponding to the language it was spoken in.
+     *
+     * <p><strong>Request Body Schema</strong>
+     *
+     * <pre>{@code
+     * {
+     *     file: byte[] (Required)
+     *     response_format: String(json/verbose_json/text/srt/vtt) (Optional)
+     *     language: String (Optional)
+     *     prompt: String (Optional)
+     *     temperature: Double (Optional)
+     *     model: String (Optional)
+     * }
+     * }</pre>
+     *
+     * <p><strong>Response Body Schema</strong>
+     *
+     * <pre>{@code
+     * String
+     * }</pre>
+     *
+     * @param deploymentOrModelName Specifies either the model deployment name (when using Azure OpenAI) or model name
+     *     (when using non-Azure OpenAI) to use for this request.
+     * @param audioTranscriptionOptions The configuration information for an audio transcription request.
+     * @param requestOptions The options to configure the HTTP request before HTTP client sends it.
+     * @throws HttpResponseException thrown if the request is rejected by server.
+     * @throws ClientAuthenticationException thrown if the request is rejected by server on status code 401.
+     * @throws ResourceNotFoundException thrown if the request is rejected by server on status code 404.
+     * @throws ResourceModifiedException thrown if the request is rejected by server on status code 409.
+     * @return transcribed text and associated metadata from provided spoken audio data along with {@link Response} on
+     *     successful completion of {@link Mono}.
+     */
+    @ServiceMethod(returns = ReturnType.SINGLE)
+    public Mono<Response<BinaryData>> getAudioTranscriptionAsPlainTextWithResponseAsync(
+            String deploymentOrModelName, BinaryData audioTranscriptionOptions, RequestOptions requestOptions) {
+        final String accept = "application/json";
+        return FluxUtil.withContext(
+                context ->
+                        service.getAudioTranscriptionAsPlainText(
+                                this.getEndpoint(),
+                                this.getServiceVersion().getVersion(),
+                                deploymentOrModelName,
+                                accept,
+                                audioTranscriptionOptions,
+                                requestOptions,
+                                context));
+    }
+
+    /**
+     * Gets transcribed text and associated metadata from provided spoken audio data. Audio will be transcribed in the
+     * written language corresponding to the language it was spoken in.
+     *
+     * <p><strong>Request Body Schema</strong>
+     *
+     * <pre>{@code
+     * {
+     *     file: byte[] (Required)
+     *     response_format: String(json/verbose_json/text/srt/vtt) (Optional)
+     *     language: String (Optional)
+     *     prompt: String (Optional)
+     *     temperature: Double (Optional)
+     *     model: String (Optional)
+     * }
+     * }</pre>
+     *
+     * <p><strong>Response Body Schema</strong>
+     *
+     * <pre>{@code
+     * String
+     * }</pre>
+     *
+     * @param deploymentOrModelName Specifies either the model deployment name (when using Azure OpenAI) or model name
+     *     (when using non-Azure OpenAI) to use for this request.
+     * @param audioTranscriptionOptions The configuration information for an audio transcription request.
+     * @param requestOptions The options to configure the HTTP request before HTTP client sends it.
+     * @throws HttpResponseException thrown if the request is rejected by server.
+     * @throws ClientAuthenticationException thrown if the request is rejected by server on status code 401.
+     * @throws ResourceNotFoundException thrown if the request is rejected by server on status code 404.
+     * @throws ResourceModifiedException thrown if the request is rejected by server on status code 409.
+     * @return transcribed text and associated metadata from provided spoken audio data along with {@link Response}.
+     */
+    @ServiceMethod(returns = ReturnType.SINGLE)
+    public Response<BinaryData> getAudioTranscriptionAsPlainTextWithResponse(
+            String deploymentOrModelName, BinaryData audioTranscriptionOptions, RequestOptions requestOptions) {
+        final String accept = "application/json";
+        return service.getAudioTranscriptionAsPlainTextSync(
+                this.getEndpoint(),
+                this.getServiceVersion().getVersion(),
+                deploymentOrModelName,
+                accept,
+                audioTranscriptionOptions,
+                requestOptions,
+                Context.NONE);
+    }
+
+    /**
+     * Gets transcribed text and associated metadata from provided spoken audio data. Audio will be transcribed in the
+     * written language corresponding to the language it was spoken in.
+     *
+     * <p><strong>Request Body Schema</strong>
+     *
+     * <pre>{@code
+     * {
+     *     file: byte[] (Required)
+     *     response_format: String(json/verbose_json/text/srt/vtt) (Optional)
+     *     language: String (Optional)
+     *     prompt: String (Optional)
+     *     temperature: Double (Optional)
+     *     model: String (Optional)
+     * }
+     * }</pre>
+     *
+     * <p><strong>Response Body Schema</strong>
+     *
+     * <pre>{@code
+     * {
+     *     text: String (Required)
+     *     task: String(transcribe/translate) (Optional)
+     *     language: String (Optional)
+     *     duration: Double (Optional)
+     *     segments (Optional): [
+     *          (Optional){
+     *             id: int (Required)
+     *             start: double (Required)
+     *             end: double (Required)
+     *             text: String (Required)
+     *             temperature: double (Required)
+     *             avg_logprob: double (Required)
+     *             compression_ratio: double (Required)
+     *             no_speech_prob: double (Required)
+     *             tokens (Required): [
+     *                 int (Required)
+     *             ]
+     *             seek: int (Required)
+     *         }
+     *     ]
+     * }
+     * }</pre>
+     *
+     * @param deploymentOrModelName Specifies either the model deployment name (when using Azure OpenAI) or model name
+     *     (when using non-Azure OpenAI) to use for this request.
+     * @param audioTranscriptionOptions The configuration information for an audio transcription request.
+     * @param requestOptions The options to configure the HTTP request before HTTP client sends it.
+     * @throws HttpResponseException thrown if the request is rejected by server.
+     * @throws ClientAuthenticationException thrown if the request is rejected by server on status code 401.
+     * @throws ResourceNotFoundException thrown if the request is rejected by server on status code 404.
+     * @throws ResourceModifiedException thrown if the request is rejected by server on status code 409.
+     * @return transcribed text and associated metadata from provided spoken audio data along with {@link Response} on
+     *     successful completion of {@link Mono}.
+     */
+    @ServiceMethod(returns = ReturnType.SINGLE)
+    public Mono<Response<BinaryData>> getAudioTranscriptionAsResponseObjectWithResponseAsync(
+            String deploymentOrModelName, BinaryData audioTranscriptionOptions, RequestOptions requestOptions) {
+        final String contentType = "multipart/form-data";
+        final String accept = "application/json";
+        return FluxUtil.withContext(
+                context ->
+                        service.getAudioTranscriptionAsResponseObject(
+                                this.getEndpoint(),
+                                this.getServiceVersion().getVersion(),
+                                deploymentOrModelName,
+                                contentType,
+                                accept,
+                                audioTranscriptionOptions,
+                                requestOptions,
+                                context));
+    }
+
+    /**
+     * Gets transcribed text and associated metadata from provided spoken audio data. Audio will be transcribed in the
+     * written language corresponding to the language it was spoken in.
+     *
+     * <p><strong>Request Body Schema</strong>
+     *
+     * <pre>{@code
+     * {
+     *     file: byte[] (Required)
+     *     response_format: String(json/verbose_json/text/srt/vtt) (Optional)
+     *     language: String (Optional)
+     *     prompt: String (Optional)
+     *     temperature: Double (Optional)
+     *     model: String (Optional)
+     * }
+     * }</pre>
+     *
+     * <p><strong>Response Body Schema</strong>
+     *
+     * <pre>{@code
+     * {
+     *     text: String (Required)
+     *     task: String(transcribe/translate) (Optional)
+     *     language: String (Optional)
+     *     duration: Double (Optional)
+     *     segments (Optional): [
+     *          (Optional){
+     *             id: int (Required)
+     *             start: double (Required)
+     *             end: double (Required)
+     *             text: String (Required)
+     *             temperature: double (Required)
+     *             avg_logprob: double (Required)
+     *             compression_ratio: double (Required)
+     *             no_speech_prob: double (Required)
+     *             tokens (Required): [
+     *                 int (Required)
+     *             ]
+     *             seek: int (Required)
+     *         }
+     *     ]
+     * }
+     * }</pre>
+     *
+     * @param deploymentOrModelName Specifies either the model deployment name (when using Azure OpenAI) or model name
+     *     (when using non-Azure OpenAI) to use for this request.
+     * @param audioTranscriptionOptions The configuration information for an audio transcription request.
+     * @param requestOptions The options to configure the HTTP request before HTTP client sends it.
+     * @throws HttpResponseException thrown if the request is rejected by server.
+     * @throws ClientAuthenticationException thrown if the request is rejected by server on status code 401.
+     * @throws ResourceNotFoundException thrown if the request is rejected by server on status code 404.
+     * @throws ResourceModifiedException thrown if the request is rejected by server on status code 409.
+     * @return transcribed text and associated metadata from provided spoken audio data along with {@link Response}.
+     */
+    @ServiceMethod(returns = ReturnType.SINGLE)
+    public Response<BinaryData> getAudioTranscriptionAsResponseObjectWithResponse(
+            String deploymentOrModelName, BinaryData audioTranscriptionOptions, RequestOptions requestOptions) {
+        final String contentType = "multipart/form-data";
+        final String accept = "application/json";
+        return service.getAudioTranscriptionAsResponseObjectSync(
+                this.getEndpoint(),
+                this.getServiceVersion().getVersion(),
+                deploymentOrModelName,
+                contentType,
+                accept,
+                audioTranscriptionOptions,
+                requestOptions,
+                Context.NONE);
+    }
+
+    /**
+     * Gets English language transcribed text and associated metadata from provided spoken audio data.
+     *
+     * <p><strong>Request Body Schema</strong>
+     *
+     * <pre>{@code
+     * {
+     *     file: byte[] (Required)
+     *     response_format: String(json/verbose_json/text/srt/vtt) (Optional)
+     *     prompt: String (Optional)
+     *     temperature: Double (Optional)
+     *     model: String (Optional)
+     * }
+     * }</pre>
+     *
+     * <p><strong>Response Body Schema</strong>
+     *
+     * <pre>{@code
+     * String
+     * }</pre>
+     *
+     * @param deploymentOrModelName Specifies either the model deployment name (when using Azure OpenAI) or model name
+     *     (when using non-Azure OpenAI) to use for this request.
+     * @param audioTranslationOptions The configuration information for an audio translation request.
+     * @param requestOptions The options to configure the HTTP request before HTTP client sends it.
+     * @throws HttpResponseException thrown if the request is rejected by server.
+     * @throws ClientAuthenticationException thrown if the request is rejected by server on status code 401.
+     * @throws ResourceNotFoundException thrown if the request is rejected by server on status code 404.
+     * @throws ResourceModifiedException thrown if the request is rejected by server on status code 409.
+     * @return english language transcribed text and associated metadata from provided spoken audio data along with
+     *     {@link Response} on successful completion of {@link Mono}.
+     */
+    @ServiceMethod(returns = ReturnType.SINGLE)
+    public Mono<Response<BinaryData>> getAudioTranslationAsPlainTextWithResponseAsync(
+            String deploymentOrModelName, BinaryData audioTranslationOptions, RequestOptions requestOptions) {
+        final String accept = "application/json";
+        return FluxUtil.withContext(
+                context ->
+                        service.getAudioTranslationAsPlainText(
+                                this.getEndpoint(),
+                                this.getServiceVersion().getVersion(),
+                                deploymentOrModelName,
+                                accept,
+                                audioTranslationOptions,
+                                requestOptions,
+                                context));
+    }
+
+    /**
+     * Gets English language transcribed text and associated metadata from provided spoken audio data.
+     *
+     * <p><strong>Request Body Schema</strong>
+     *
+     * <pre>{@code
+     * {
+     *     file: byte[] (Required)
+     *     response_format: String(json/verbose_json/text/srt/vtt) (Optional)
+     *     prompt: String (Optional)
+     *     temperature: Double (Optional)
+     *     model: String (Optional)
+     * }
+     * }</pre>
+     *
+     * <p><strong>Response Body Schema</strong>
+     *
+     * <pre>{@code
+     * String
+     * }</pre>
+     *
+     * @param deploymentOrModelName Specifies either the model deployment name (when using Azure OpenAI) or model name
+     *     (when using non-Azure OpenAI) to use for this request.
+     * @param audioTranslationOptions The configuration information for an audio translation request.
+     * @param requestOptions The options to configure the HTTP request before HTTP client sends it.
+     * @throws HttpResponseException thrown if the request is rejected by server.
+     * @throws ClientAuthenticationException thrown if the request is rejected by server on status code 401.
+     * @throws ResourceNotFoundException thrown if the request is rejected by server on status code 404.
+     * @throws ResourceModifiedException thrown if the request is rejected by server on status code 409.
+     * @return english language transcribed text and associated metadata from provided spoken audio data along with
+     *     {@link Response}.
+     */
+    @ServiceMethod(returns = ReturnType.SINGLE)
+    public Response<BinaryData> getAudioTranslationAsPlainTextWithResponse(
+            String deploymentOrModelName, BinaryData audioTranslationOptions, RequestOptions requestOptions) {
+        final String accept = "application/json";
+        return service.getAudioTranslationAsPlainTextSync(
+                this.getEndpoint(),
+                this.getServiceVersion().getVersion(),
+                deploymentOrModelName,
+                accept,
+                audioTranslationOptions,
+                requestOptions,
+                Context.NONE);
+    }
+
+    /**
+     * Gets English language transcribed text and associated metadata from provided spoken audio data.
+     *
+     * <p><strong>Request Body Schema</strong>
+     *
+     * <pre>{@code
+     * {
+     *     file: byte[] (Required)
+     *     response_format: String(json/verbose_json/text/srt/vtt) (Optional)
+     *     prompt: String (Optional)
+     *     temperature: Double (Optional)
+     *     model: String (Optional)
+     * }
+     * }</pre>
+     *
+     * <p><strong>Response Body Schema</strong>
+     *
+     * <pre>{@code
+     * {
+     *     text: String (Required)
+     *     task: String(transcribe/translate) (Optional)
+     *     language: String (Optional)
+     *     duration: Double (Optional)
+     *     segments (Optional): [
+     *          (Optional){
+     *             id: int (Required)
+     *             start: double (Required)
+     *             end: double (Required)
+     *             text: String (Required)
+     *             temperature: double (Required)
+     *             avg_logprob: double (Required)
+     *             compression_ratio: double (Required)
+     *             no_speech_prob: double (Required)
+     *             tokens (Required): [
+     *                 int (Required)
+     *             ]
+     *             seek: int (Required)
+     *         }
+     *     ]
+     * }
+     * }</pre>
+     *
+     * @param deploymentOrModelName Specifies either the model deployment name (when using Azure OpenAI) or model name
+     *     (when using non-Azure OpenAI) to use for this request.
+     * @param audioTranslationOptions The configuration information for an audio translation request.
+     * @param requestOptions The options to configure the HTTP request before HTTP client sends it.
+     * @throws HttpResponseException thrown if the request is rejected by server.
+     * @throws ClientAuthenticationException thrown if the request is rejected by server on status code 401.
+     * @throws ResourceNotFoundException thrown if the request is rejected by server on status code 404.
+     * @throws ResourceModifiedException thrown if the request is rejected by server on status code 409.
+     * @return english language transcribed text and associated metadata from provided spoken audio data along with
+     *     {@link Response} on successful completion of {@link Mono}.
+     */
+    @ServiceMethod(returns = ReturnType.SINGLE)
+    public Mono<Response<BinaryData>> getAudioTranslationAsResponseObjectWithResponseAsync(
+            String deploymentOrModelName, BinaryData audioTranslationOptions, RequestOptions requestOptions) {
+        final String contentType = "multipart/form-data";
+        final String accept = "application/json";
+        return FluxUtil.withContext(
+                context ->
+                        service.getAudioTranslationAsResponseObject(
+                                this.getEndpoint(),
+                                this.getServiceVersion().getVersion(),
+                                deploymentOrModelName,
+                                contentType,
+                                accept,
+                                audioTranslationOptions,
+                                requestOptions,
+                                context));
+    }
+
+    /**
+     * Gets English language transcribed text and associated metadata from provided spoken audio data.
+     *
+     * <p><strong>Request Body Schema</strong>
+     *
+     * <pre>{@code
+     * {
+     *     file: byte[] (Required)
+     *     response_format: String(json/verbose_json/text/srt/vtt) (Optional)
+     *     prompt: String (Optional)
+     *     temperature: Double (Optional)
+     *     model: String (Optional)
+     * }
+     * }</pre>
+     *
+     * <p><strong>Response Body Schema</strong>
+     *
+     * <pre>{@code
+     * {
+     *     text: String (Required)
+     *     task: String(transcribe/translate) (Optional)
+     *     language: String (Optional)
+     *     duration: Double (Optional)
+     *     segments (Optional): [
+     *          (Optional){
+     *             id: int (Required)
+     *             start: double (Required)
+     *             end: double (Required)
+     *             text: String (Required)
+     *             temperature: double (Required)
+     *             avg_logprob: double (Required)
+     *             compression_ratio: double (Required)
+     *             no_speech_prob: double (Required)
+     *             tokens (Required): [
+     *                 int (Required)
+     *             ]
+     *             seek: int (Required)
+     *         }
+     *     ]
+     * }
+     * }</pre>
+     *
+     * @param deploymentOrModelName Specifies either the model deployment name (when using Azure OpenAI) or model name
+     *     (when using non-Azure OpenAI) to use for this request.
+     * @param audioTranslationOptions The configuration information for an audio translation request.
+     * @param requestOptions The options to configure the HTTP request before HTTP client sends it.
+     * @throws HttpResponseException thrown if the request is rejected by server.
+     * @throws ClientAuthenticationException thrown if the request is rejected by server on status code 401.
+     * @throws ResourceNotFoundException thrown if the request is rejected by server on status code 404.
+     * @throws ResourceModifiedException thrown if the request is rejected by server on status code 409.
+     * @return english language transcribed text and associated metadata from provided spoken audio data along with
+     *     {@link Response}.
+     */
+    @ServiceMethod(returns = ReturnType.SINGLE)
+    public Response<BinaryData> getAudioTranslationAsResponseObjectWithResponse(
+            String deploymentOrModelName, BinaryData audioTranslationOptions, RequestOptions requestOptions) {
+        final String contentType = "multipart/form-data";
+        final String accept = "application/json";
+        return service.getAudioTranslationAsResponseObjectSync(
+                this.getEndpoint(),
+                this.getServiceVersion().getVersion(),
+                deploymentOrModelName,
+                contentType,
+                accept,
+                audioTranslationOptions,
+                requestOptions,
+                Context.NONE);
+    }
 }
diff --git a/sdk/openai/azure-ai-openai/src/main/java/com/azure/ai/openai/models/AudioTaskLabel.java b/sdk/openai/azure-ai-openai/src/main/java/com/azure/ai/openai/models/AudioTaskLabel.java
new file mode 100644
index 000000000000..36f8361ad2a4
--- /dev/null
+++ b/sdk/openai/azure-ai-openai/src/main/java/com/azure/ai/openai/models/AudioTaskLabel.java
@@ -0,0 +1,50 @@
+// Copyright (c) Microsoft Corporation. All rights reserved.
+// Licensed under the MIT License.
+// Code generated by Microsoft (R) AutoRest Code Generator.
+package com.azure.ai.openai.models;
+
+import com.azure.core.annotation.Generated;
+import com.azure.core.util.ExpandableStringEnum;
+import com.fasterxml.jackson.annotation.JsonCreator;
+import java.util.Collection;
+
+/** Defines the possible descriptors for available audio operation responses. */
+public final class AudioTaskLabel extends ExpandableStringEnum<AudioTaskLabel> {
+
+    /** Accompanying response data resulted from an audio transcription task. */
+    @Generated public static final AudioTaskLabel TRANSCRIBE = fromString("transcribe");
+
+    /** Accompanying response data resulted from an audio translation task. */
+    @Generated public static final AudioTaskLabel TRANSLATE = fromString("translate");
+
+    /**
+     * Creates a new instance of AudioTaskLabel value.
+     *
+     * @deprecated Use the {@link #fromString(String)} factory method.
+     */
+    @Generated
+    @Deprecated
+    public AudioTaskLabel() {}
+
+    /**
+     * Creates or finds a AudioTaskLabel from its string representation.
+     *
+     * @param name a name to look for.
+     * @return the corresponding AudioTaskLabel.
+     */
+    @Generated
+    @JsonCreator
+    public static AudioTaskLabel fromString(String name) {
+        return fromString(name, AudioTaskLabel.class);
+    }
+
+    /**
+     * Gets known AudioTaskLabel values.
+     *
+     * @return known AudioTaskLabel values.
+     */
+    @Generated
+    public static Collection<AudioTaskLabel> values() {
+        return values(AudioTaskLabel.class);
+    }
+}
diff --git a/sdk/openai/azure-ai-openai/src/main/java/com/azure/ai/openai/models/AudioTranscription.java b/sdk/openai/azure-ai-openai/src/main/java/com/azure/ai/openai/models/AudioTranscription.java
new file mode 100644
index 000000000000..8d7b085ce8af
--- /dev/null
+++ b/sdk/openai/azure-ai-openai/src/main/java/com/azure/ai/openai/models/AudioTranscription.java
@@ -0,0 +1,119 @@
+// Copyright (c) Microsoft Corporation. All rights reserved.
+// Licensed under the MIT License.
+// Code generated by Microsoft (R) AutoRest Code Generator.
+package com.azure.ai.openai.models;
+
+import com.azure.core.annotation.Generated;
+import com.azure.core.annotation.Immutable;
+import com.fasterxml.jackson.annotation.JsonCreator;
+import com.fasterxml.jackson.annotation.JsonProperty;
+import java.time.Duration;
+import java.util.List;
+
+/** Result information for an operation that transcribed spoken audio into written text. */
+@Immutable
+public final class AudioTranscription {
+
+    /*
+     * The transcribed text for the provided audio data.
+     */
+    @Generated
+    @JsonProperty(value = "text")
+    private String text;
+
+    /*
+     * The label that describes which operation type generated the accompanying response data.
+     */
+    @Generated
+    @JsonProperty(value = "task")
+    private AudioTaskLabel task;
+
+    /*
+     * The spoken language that was detected in the transcribed audio data.
+     * This is expressed as a two-letter ISO-639-1 language code like 'en' or 'fr'.
+     */
+    @Generated
+    @JsonProperty(value = "language")
+    private String language;
+
+    /*
+     * The total duration of the audio processed to produce accompanying transcription information.
+     */
+    @Generated
+    @JsonProperty(value = "duration")
+    private Double duration;
+
+    /*
+     * A collection of information about the timing, probabilities, and other detail of each processed audio segment.
+     */
+    @Generated
+    @JsonProperty(value = "segments")
+    private List<AudioTranscriptionSegment> segments;
+
+    /**
+     * Creates an instance of AudioTranscription class.
+     *
+     * @param text the text value to set.
+     */
+    @Generated
+    @JsonCreator
+    private AudioTranscription(@JsonProperty(value = "text") String text) {
+        this.text = text;
+    }
+
+    /**
+     * Get the text property: The transcribed text for the provided audio data.
+     *
+     * @return the text value.
+     */
+    @Generated
+    public String getText() {
+        return this.text;
+    }
+
+    /**
+     * Get the task property: The label that describes which operation type generated the accompanying response data.
+     *
+     * @return the task value.
+     */
+    @Generated
+    public AudioTaskLabel getTask() {
+        return this.task;
+    }
+
+    /**
+     * Get the language property: The spoken language that was detected in the transcribed audio data. This is expressed
+     * as a two-letter ISO-639-1 language code like 'en' or 'fr'.
+     *
+     * @return the language value.
+     */
+    @Generated
+    public String getLanguage() {
+        return this.language;
+    }
+
+    /**
+     * Get the duration property: The total duration of the audio processed to produce accompanying transcription
+     * information.
+     *
+     * @return the duration value.
+     */
+    @Generated
+    public Duration getDuration() {
+        if (this.duration == null) {
+            return null;
+        }
+        return Duration.ofNanos((long) (this.duration * 1000_000_000L));
+    }
+
+    /**
+     * Get the segments property: A collection of information about the timing, probabilities, and other detail of each
+     * processed audio segment.
+     *
+     * @return the segments value.
+     */
+    @Generated
+    public List<AudioTranscriptionSegment> getSegments() {
+        return this.segments;
+    }
+}
diff --git a/sdk/openai/azure-ai-openai/src/main/java/com/azure/ai/openai/models/AudioTranscriptionFormat.java b/sdk/openai/azure-ai-openai/src/main/java/com/azure/ai/openai/models/AudioTranscriptionFormat.java
new file mode 100644
index 000000000000..8429c748e7ca
--- /dev/null
+++ b/sdk/openai/azure-ai-openai/src/main/java/com/azure/ai/openai/models/AudioTranscriptionFormat.java
@@ -0,0 +1,65 @@
+// Copyright (c) Microsoft Corporation. All rights reserved.
+// Licensed under the MIT License.
+// Code generated by Microsoft (R) AutoRest Code Generator.
+package com.azure.ai.openai.models;
+
+import com.azure.core.annotation.Generated;
+import com.azure.core.util.ExpandableStringEnum;
+import com.fasterxml.jackson.annotation.JsonCreator;
+import java.util.Collection;
+
+/** Defines available options for the underlying response format of output transcription information. */
+public final class AudioTranscriptionFormat extends ExpandableStringEnum<AudioTranscriptionFormat> {
+
+    /** Use a response body that is a JSON object containing a single 'text' field for the transcription. */
+    @Generated public static final AudioTranscriptionFormat JSON = fromString("json");
+
+    /**
+     * Use a response body that is a JSON object containing transcription text along with timing, segments, and other
+     * metadata.
+     */
+    @Generated public static final AudioTranscriptionFormat VERBOSE_JSON = fromString("verbose_json");
+
+    /** Use a response body that is plain text containing the raw, unannotated transcription. */
+    @Generated public static final AudioTranscriptionFormat TEXT = fromString("text");
+
+    /** Use a response body that is plain text in SubRip (SRT) format that also includes timing information. */
+    @Generated public static final AudioTranscriptionFormat SRT = fromString("srt");
+
+    /**
+     * Use a response body that is plain text in Web Video Text Tracks (VTT) format that also includes timing
+     * information.
+     */
+    @Generated public static final AudioTranscriptionFormat VTT = fromString("vtt");
+
+    /**
+     * Creates a new instance of AudioTranscriptionFormat value.
+     *
+     * @deprecated Use the {@link #fromString(String)} factory method.
+     */
+    @Generated
+    @Deprecated
+    public AudioTranscriptionFormat() {}
+
+    /**
+     * Creates or finds a AudioTranscriptionFormat from its string representation.
+     *
+     * @param name a name to look for.
+     * @return the corresponding AudioTranscriptionFormat.
+     */
+    @Generated
+    @JsonCreator
+    public static AudioTranscriptionFormat fromString(String name) {
+        return fromString(name, AudioTranscriptionFormat.class);
+    }
+
+    /**
+     * Gets known AudioTranscriptionFormat values.
+     *
+     * @return known AudioTranscriptionFormat values.
+     */
+    @Generated
+    public static Collection<AudioTranscriptionFormat> values() {
+        return values(AudioTranscriptionFormat.class);
+    }
+}
diff --git a/sdk/openai/azure-ai-openai/src/main/java/com/azure/ai/openai/models/AudioTranscriptionOptions.java b/sdk/openai/azure-ai-openai/src/main/java/com/azure/ai/openai/models/AudioTranscriptionOptions.java
new file mode 100644
index 000000000000..7d72fd5ea891
--- /dev/null
+++ b/sdk/openai/azure-ai-openai/src/main/java/com/azure/ai/openai/models/AudioTranscriptionOptions.java
@@ -0,0 +1,211 @@
+// Copyright (c) Microsoft Corporation. All rights reserved.
+// Licensed under the MIT License.
+// Code generated by Microsoft (R) AutoRest Code Generator.
+package com.azure.ai.openai.models;
+
+import com.azure.core.annotation.Fluent;
+import com.azure.core.annotation.Generated;
+import com.azure.core.util.CoreUtils;
+import com.fasterxml.jackson.annotation.JsonCreator;
+import com.fasterxml.jackson.annotation.JsonProperty;
+
+/** The configuration information for an audio transcription request. */
+@Fluent
+public final class AudioTranscriptionOptions {
+
+    /*
+     * The audio data to transcribe. This must be the binary content of a file in one of the supported media formats:
+     * flac, mp3, mp4, mpeg, mpga, m4a, ogg, wav, webm.
+     */
+    @Generated
+    @JsonProperty(value = "file")
+    private byte[] file;
+
+    /*
+     * The requested format of the transcription response data, which will influence the content and detail of the
+     * result.
+     */
+    @Generated
+    @JsonProperty(value = "response_format")
+    private AudioTranscriptionFormat responseFormat;
+
+    /*
+     * The primary spoken language of the audio data to be transcribed, supplied as a two-letter ISO-639-1 language
+     * code
+     * such as 'en' or 'fr'.
+     * Providing this known input language is optional but may improve the accuracy and/or latency of transcription.
+     */
+    @Generated
+    @JsonProperty(value = "language")
+    private String language;
+
+    /*
+     * An optional hint to guide the model's style or continue from a prior audio segment. The written language of the
+     * prompt should match the primary spoken language of the audio data.
+     */
+    @Generated
+    @JsonProperty(value = "prompt")
+    private String prompt;
+
+    /*
+     * The sampling temperature, between 0 and 1.
+     * Higher values like 0.8 will make the output more random, while lower values like 0.2 will make it more focused
+     * and deterministic.
+     * If set to 0, the model will use log probability to automatically increase the temperature until certain
+     * thresholds are hit.
+     */
+    @Generated
+    @JsonProperty(value = "temperature")
+    private Double temperature;
+
+    /*
+     * The model to use for this transcription request.
+     */
+    @Generated
+    @JsonProperty(value = "model")
+    private String model;
+
+    /**
+     * Creates an instance of AudioTranscriptionOptions class.
+     *
+     * @param file the file value to set.
+     */
+    @Generated
+    @JsonCreator
+    public AudioTranscriptionOptions(@JsonProperty(value = "file") byte[] file) {
+        this.file = file;
+    }
+
+    /**
+     * Get the file property: The audio data to transcribe. This must be the binary content of a file in one of the
+     * supported media formats: flac, mp3, mp4, mpeg, mpga, m4a, ogg, wav, webm.
+     *
+     * @return the file value.
+     */
+    @Generated
+    public byte[] getFile() {
+        return CoreUtils.clone(this.file);
+    }
+
+    /**
+     * Get the responseFormat property: The requested format of the transcription response data, which will influence
+     * the content and detail of the result.
+     *
+     * @return the responseFormat value.
+     */
+    @Generated
+    public AudioTranscriptionFormat getResponseFormat() {
+        return this.responseFormat;
+    }
+
+    /**
+     * Set the responseFormat property: The requested format of the transcription response data, which will influence
+     * the content and detail of the result.
+     *
+     * @param responseFormat the responseFormat value to set.
+     * @return the AudioTranscriptionOptions object itself.
+     */
+    @Generated
+    public AudioTranscriptionOptions setResponseFormat(AudioTranscriptionFormat responseFormat) {
+        this.responseFormat = responseFormat;
+        return this;
+    }
+
+    /**
+     * Get the language property: The primary spoken language of the audio data to be transcribed, supplied as a
+     * two-letter ISO-639-1 language code such as 'en' or 'fr'. Providing this known input language is optional but may
+     * improve the accuracy and/or latency of transcription.
+     *
+     * @return the language value.
+     */
+    @Generated
+    public String getLanguage() {
+        return this.language;
+    }
+
+    /**
+     * Set the language property: The primary spoken language of the audio data to be transcribed, supplied as a
+     * two-letter ISO-639-1 language code such as 'en' or 'fr'. Providing this known input language is optional but may
+     * improve the accuracy and/or latency of transcription.
+     *
+     * @param language the language value to set.
+     * @return the AudioTranscriptionOptions object itself.
+     */
+    @Generated
+    public AudioTranscriptionOptions setLanguage(String language) {
+        this.language = language;
+        return this;
+    }
+
+    /**
+     * Get the prompt property: An optional hint to guide the model's style or continue from a prior audio segment. The
+     * written language of the prompt should match the primary spoken language of the audio data.
+     *
+     * @return the prompt value.
+     */
+    @Generated
+    public String getPrompt() {
+        return this.prompt;
+    }
+
+    /**
+     * Set the prompt property: An optional hint to guide the model's style or continue from a prior audio segment. The
+     * written language of the prompt should match the primary spoken language of the audio data.
+     *
+     * @param prompt the prompt value to set.
+     * @return the AudioTranscriptionOptions object itself.
+     */
+    @Generated
+    public AudioTranscriptionOptions setPrompt(String prompt) {
+        this.prompt = prompt;
+        return this;
+    }
+
+    /**
+     * Get the temperature property: The sampling temperature, between 0 and 1. Higher values like 0.8 will make the
+     * output more random, while lower values like 0.2 will make it more focused and deterministic. If set to 0, the
+     * model will use log probability to automatically increase the temperature until certain thresholds are hit.
+     *
+     * @return the temperature value.
+     */
+    @Generated
+    public Double getTemperature() {
+        return this.temperature;
+    }
+
+    /**
+     * Set the temperature property: The sampling temperature, between 0 and 1. Higher values like 0.8 will make the
+     * output more random, while lower values like 0.2 will make it more focused and deterministic. If set to 0, the
+     * model will use log probability to automatically increase the temperature until certain thresholds are hit.
+     *
+     * @param temperature the temperature value to set.
+     * @return the AudioTranscriptionOptions object itself.
+     */
+    @Generated
+    public AudioTranscriptionOptions setTemperature(Double temperature) {
+        this.temperature = temperature;
+        return this;
+    }
+
+    /**
+     * Get the model property: The model to use for this transcription request.
+     *
+     * @return the model value.
+     */
+    @Generated
+    public String getModel() {
+        return this.model;
+    }
+
+    /**
+     * Set the model property: The model to use for this transcription request.
+     *
+     * @param model the model value to set.
+     * @return the AudioTranscriptionOptions object itself.
+     */
+    @Generated
+    public AudioTranscriptionOptions setModel(String model) {
+        this.model = model;
+        return this;
+    }
+}
diff --git a/sdk/openai/azure-ai-openai/src/main/java/com/azure/ai/openai/models/AudioTranscriptionSegment.java b/sdk/openai/azure-ai-openai/src/main/java/com/azure/ai/openai/models/AudioTranscriptionSegment.java
new file mode 100644
index 000000000000..87e289da3b0e
--- /dev/null
+++ b/sdk/openai/azure-ai-openai/src/main/java/com/azure/ai/openai/models/AudioTranscriptionSegment.java
@@ -0,0 +1,262 @@
+// Copyright (c) Microsoft Corporation. All rights reserved.
+// Licensed under the MIT License.
+// Code generated by Microsoft (R) AutoRest Code Generator.
+package com.azure.ai.openai.models;
+
+import com.azure.core.annotation.Generated;
+import com.azure.core.annotation.Immutable;
+import com.fasterxml.jackson.annotation.JsonCreator;
+import com.fasterxml.jackson.annotation.JsonProperty;
+import java.time.Duration;
+import java.util.List;
+
+/**
+ * Extended information about a single segment of transcribed audio data. Segments generally represent roughly 5-10
+ * seconds of speech. Segment boundaries typically occur between words but not necessarily sentences.
+ */
+@Immutable
+public final class AudioTranscriptionSegment {
+
+    /*
+     * The 0-based index of this segment within a transcription.
+     */
+    @Generated
+    @JsonProperty(value = "id")
+    private int id;
+
+    /*
+     * The time at which this segment started relative to the beginning of the transcribed audio.
+     */
+    @Generated
+    @JsonProperty(value = "start")
+    private double start;
+
+    /*
+     * The time at which this segment ended relative to the beginning of the transcribed audio.
+     */
+    @Generated
+    @JsonProperty(value = "end")
+    private double end;
+
+    /*
+     * The transcribed text that was part of this audio segment.
+     */
+    @Generated
+    @JsonProperty(value = "text")
+    private String text;
+
+    /*
+     * The temperature score associated with this audio segment.
+     */
+    @Generated
+    @JsonProperty(value = "temperature")
+    private double temperature;
+
+    /*
+     * The average log probability associated with this audio segment.
+     */
+    @Generated
+    @JsonProperty(value = "avg_logprob")
+    private double avgLogprob;
+
+    /*
+     * The compression ratio of this audio segment.
+     */
+    @Generated
+    @JsonProperty(value = "compression_ratio")
+    private double compressionRatio;
+
+    /*
+     * The probability of no speech detection within this audio segment.
+     */
+    @Generated
+    @JsonProperty(value = "no_speech_prob")
+    private double noSpeechProb;
+
+    /*
+     * The token IDs matching the transcribed text in this audio segment.
+     */
+    @Generated
+    @JsonProperty(value = "tokens")
+    private List<Integer> tokens;
+
+    /*
+     * The seek position associated with the processing of this audio segment.
+     * Seek positions are expressed as hundredths of seconds.
+     * The model may process several segments from a single seek position, so while the seek position will never
+     * represent
+     * a later time than the segment's start, the segment's start may represent a significantly later time than the
+     * segment's associated seek position.
+     */
+    @Generated
+    @JsonProperty(value = "seek")
+    private int seek;
+
+    /**
+     * Creates an instance of AudioTranscriptionSegment class.
+     *
+     * @param id the id value to set.
+     * @param start the start value to set.
+     * @param end the end value to set.
+     * @param text the text value to set.
+     * @param temperature the temperature value to set.
+     * @param avgLogprob the avgLogprob value to set.
+     * @param compressionRatio the compressionRatio value to set.
+     * @param noSpeechProb the noSpeechProb value to set.
+     * @param tokens the tokens value to set.
+     * @param seek the seek value to set.
+     */
+    @Generated
+    private AudioTranscriptionSegment(
+            int id,
+            Duration start,
+            Duration end,
+            String text,
+            double temperature,
+            double avgLogprob,
+            double compressionRatio,
+            double noSpeechProb,
+            List<Integer> tokens,
+            int seek) {
+        this.id = id;
+        this.start = (double) start.toNanos() / 1000_000_000L;
+        this.end = (double) end.toNanos() / 1000_000_000L;
+        this.text = text;
+        this.temperature = temperature;
+        this.avgLogprob = avgLogprob;
+        this.compressionRatio = compressionRatio;
+        this.noSpeechProb = noSpeechProb;
+        this.tokens = tokens;
+        this.seek = seek;
+    }
+
+    @Generated
+    @JsonCreator
+    private AudioTranscriptionSegment(
+            @JsonProperty(value = "id") int id,
+            @JsonProperty(value = "start") double start,
+            @JsonProperty(value = "end") double end,
+            @JsonProperty(value = "text") String text,
+            @JsonProperty(value = "temperature") double temperature,
+            @JsonProperty(value = "avg_logprob") double avgLogprob,
+            @JsonProperty(value = "compression_ratio") double compressionRatio,
+            @JsonProperty(value = "no_speech_prob") double noSpeechProb,
+            @JsonProperty(value = "tokens") List<Integer> tokens,
+            @JsonProperty(value = "seek") int seek) {
+        this(
+                id,
+                Duration.ofNanos((long) (start * 1000_000_000L)),
+                Duration.ofNanos((long) (end * 1000_000_000L)),
+                text,
+                temperature,
+                avgLogprob,
+                compressionRatio,
+                noSpeechProb,
+                tokens,
+                seek);
+    }
+
+    /**
+     * Get the id property: The 0-based index of this segment within a transcription.
+     *
+     * @return the id value.
+     */
+    @Generated
+    public int getId() {
+        return this.id;
+    }
+
+    /**
+     * Get the start property: The time at which this segment started relative to the beginning of the transcribed
+     * audio.
+     *
+     * @return the start value.
+     */
+    @Generated
+    public Duration getStart() {
+        return Duration.ofNanos((long) (this.start * 1000_000_000L));
+    }
+
+    /**
+     * Get the end property: The time at which this segment ended relative to the beginning of the transcribed audio.
+     *
+     * @return the end value.
+     */
+    @Generated
+    public Duration getEnd() {
+        return Duration.ofNanos((long) (this.end * 1000_000_000L));
+    }
+
+    /**
+     * Get the text property: The transcribed text that was part of this audio segment.
+     *
+     * @return the text value.
+     */
+    @Generated
+    public String getText() {
+        return this.text;
+    }
+
+    /**
+     * Get the temperature property: The temperature score associated with this audio segment.
+     *
+     * @return the temperature value.
+     */
+    @Generated
+    public double getTemperature() {
+        return this.temperature;
+    }
+
+    /**
+     * Get the avgLogprob property: The average log probability associated with this audio segment.
+     *
+     * @return the avgLogprob value.
+     */
+    @Generated
+    public double getAvgLogprob() {
+        return this.avgLogprob;
+    }
+
+    /**
+     * Get the compressionRatio property: The compression ratio of this audio segment.
+     *
+     * @return the compressionRatio value.
+     */
+    @Generated
+    public double getCompressionRatio() {
+        return this.compressionRatio;
+    }
+
+    /**
+     * Get the noSpeechProb property: The probability of no speech detection within this audio segment.
+     *
+     * @return the noSpeechProb value.
+     */
+    @Generated
+    public double getNoSpeechProb() {
+        return this.noSpeechProb;
+    }
+
+    /**
+     * Get the tokens property: The token IDs matching the transcribed text in this audio segment.
+     *
+     * @return the tokens value.
+     */
+    @Generated
+    public List<Integer> getTokens() {
+        return this.tokens;
+    }
+
+    /**
+     * Get the seek property: The seek position associated with the processing of this audio segment. Seek positions are
+     * expressed as hundredths of seconds. The model may process several segments from a single seek position, so while
+     * the seek position will never represent a later time than the segment's start, the segment's start may represent a
+     * significantly later time than the segment's associated seek position.
+     *
+     * @return the seek value.
+     */
+    @Generated
+    public int getSeek() {
+        return this.seek;
+    }
+}
diff --git a/sdk/openai/azure-ai-openai/src/main/java/com/azure/ai/openai/models/AudioTranslationOptions.java b/sdk/openai/azure-ai-openai/src/main/java/com/azure/ai/openai/models/AudioTranslationOptions.java
new file mode 100644
index 000000000000..65f7b1f873ad
--- /dev/null
+++ b/sdk/openai/azure-ai-openai/src/main/java/com/azure/ai/openai/models/AudioTranslationOptions.java
@@ -0,0 +1,175 @@
+// Copyright (c) Microsoft Corporation. All rights reserved.
+// Licensed under the MIT License.
+// Code generated by Microsoft (R) AutoRest Code Generator.
+package com.azure.ai.openai.models;
+
+import com.azure.core.annotation.Fluent;
+import com.azure.core.annotation.Generated;
+import com.azure.core.util.CoreUtils;
+import com.fasterxml.jackson.annotation.JsonCreator;
+import com.fasterxml.jackson.annotation.JsonProperty;
+
+/** The configuration information for an audio translation request. */
+@Fluent
+public final class AudioTranslationOptions {
+
+    /*
+     * The audio data to transcribe. This must be the binary content of a file in one of the supported media formats:
+     * flac, mp3, mp4, mpeg, mpga, m4a, ogg, wav, webm.
+     */
+    @Generated
+    @JsonProperty(value = "file")
+    private byte[] file;
+
+    /*
+     * The requested format of the transcription response data, which will influence the content and detail of the
+     * result.
+     */
+    @Generated
+    @JsonProperty(value = "response_format")
+    private AudioTranscriptionFormat responseFormat;
+
+    /*
+     * An optional hint to guide the model's style or continue from a prior audio segment. The written language of the
+     * prompt should match the primary spoken language of the audio data.
+     */
+    @Generated
+    @JsonProperty(value = "prompt")
+    private String prompt;
+
+    /*
+     * The sampling temperature, between 0 and 1.
+     * Higher values like 0.8 will make the output more random, while lower values like 0.2 will make it more focused
+     * and deterministic.
+     * If set to 0, the model will use log probability to automatically increase the temperature until certain
+     * thresholds are hit.
+     */
+    @Generated
+    @JsonProperty(value = "temperature")
+    private Double temperature;
+
+    /*
+     * The model to use for this transcription request.
+     */
+    @Generated
+    @JsonProperty(value = "model")
+    private String model;
+
+    /**
+     * Creates an instance of AudioTranslationOptions class.
+     *
+     * @param file the file value to set.
+     */
+    @Generated
+    @JsonCreator
+    public AudioTranslationOptions(@JsonProperty(value = "file") byte[] file) {
+        this.file = file;
+    }
+
+    /**
+     * Get the file property: The audio data to transcribe. This must be the binary content of a file in one of the
+     * supported media formats: flac, mp3, mp4, mpeg, mpga, m4a, ogg, wav, webm.
+     *
+     * @return the file value.
+     */
+    @Generated
+    public byte[] getFile() {
+        return CoreUtils.clone(this.file);
+    }
+
+    /**
+     * Get the responseFormat property: The requested format of the transcription response data, which will influence
+     * the content and detail of the result.
+     *
+     * @return the responseFormat value.
+     */
+    @Generated
+    public AudioTranscriptionFormat getResponseFormat() {
+        return this.responseFormat;
+    }
+
+    /**
+     * Set the responseFormat property: The requested format of the transcription response data, which will influence
+     * the content and detail of the result.
+     *
+     * @param responseFormat the responseFormat value to set.
+     * @return the AudioTranslationOptions object itself.
+     */
+    @Generated
+    public AudioTranslationOptions setResponseFormat(AudioTranscriptionFormat responseFormat) {
+        this.responseFormat = responseFormat;
+        return this;
+    }
+
+    /**
+     * Get the prompt property: An optional hint to guide the model's style or continue from a prior audio segment. The
+     * written language of the prompt should match the primary spoken language of the audio data.
+     *
+     * @return the prompt value.
+     */
+    @Generated
+    public String getPrompt() {
+        return this.prompt;
+    }
+
+    /**
+     * Set the prompt property: An optional hint to guide the model's style or continue from a prior audio segment. The
+     * written language of the prompt should match the primary spoken language of the audio data.
+     *
+     * @param prompt the prompt value to set.
+     * @return the AudioTranslationOptions object itself.
+     */
+    @Generated
+    public AudioTranslationOptions setPrompt(String prompt) {
+        this.prompt = prompt;
+        return this;
+    }
+
+    /**
+     * Get the temperature property: The sampling temperature, between 0 and 1. Higher values like 0.8 will make the
+     * output more random, while lower values like 0.2 will make it more focused and deterministic. If set to 0, the
+     * model will use log probability to automatically increase the temperature until certain thresholds are hit.
+     *
+     * @return the temperature value.
+     */
+    @Generated
+    public Double getTemperature() {
+        return this.temperature;
+    }
+
+    /**
+     * Set the temperature property: The sampling temperature, between 0 and 1. Higher values like 0.8 will make the
+     * output more random, while lower values like 0.2 will make it more focused and deterministic. If set to 0, the
+     * model will use log probability to automatically increase the temperature until certain thresholds are hit.
+     *
+     * @param temperature the temperature value to set.
+     * @return the AudioTranslationOptions object itself.
+     */
+    @Generated
+    public AudioTranslationOptions setTemperature(Double temperature) {
+        this.temperature = temperature;
+        return this;
+    }
+
+    /**
+     * Get the model property: The model to use for this transcription request.
+     *
+     * @return the model value.
+     */
+    @Generated
+    public String getModel() {
+        return this.model;
+    }
+
+    /**
+     * Set the model property: The model to use for this transcription request.
+     *
+     * @param model the model value to set.
+     * @return the AudioTranslationOptions object itself.
+     */
+    @Generated
+    public AudioTranslationOptions setModel(String model) {
+        this.model = model;
+        return this;
+    }
+}
diff --git a/sdk/openai/azure-ai-openai/src/main/java/com/azure/ai/openai/models/ContentFilterResults.java b/sdk/openai/azure-ai-openai/src/main/java/com/azure/ai/openai/models/ContentFilterResults.java
index 65883af4465f..2c1c3c668bd3 100644
--- a/sdk/openai/azure-ai-openai/src/main/java/com/azure/ai/openai/models/ContentFilterResults.java
+++ b/sdk/openai/azure-ai-openai/src/main/java/com/azure/ai/openai/models/ContentFilterResults.java
@@ -5,6 +5,7 @@
 
 import com.azure.core.annotation.Generated;
 import com.azure.core.annotation.Immutable;
+import com.azure.core.models.ResponseError;
 import com.fasterxml.jackson.annotation.JsonProperty;
 
 /** Information about the content filtering category, if it has been detected. */
@@ -98,4 +99,23 @@ public ContentFilterResult getSelfHarm() {
     /** Creates an instance of ContentFilterResults class. */
     @Generated
     private ContentFilterResults() {}
+
+    /*
+     * Describes an error returned if the content filtering system is
+     * down or otherwise unable to complete the operation in time.
+     */
+    @Generated
+    @JsonProperty(value = "error")
+    private ResponseError error;
+
+    /**
+     * Get the error property: Describes an error returned if the content filtering system is down or otherwise unable
+     * to complete the operation in time.
+     *
+     * @return the error value.
+     */
+    @Generated
+    public ResponseError getError() {
+        return this.error;
+    }
 }
diff --git a/sdk/openai/azure-ai-openai/src/main/java/module-info.java b/sdk/openai/azure-ai-openai/src/main/java/module-info.java
index 016c2a1fc8be..c8eafa553ff1 100644
--- a/sdk/openai/azure-ai-openai/src/main/java/module-info.java
+++ b/sdk/openai/azure-ai-openai/src/main/java/module-info.java
@@ -7,7 +7,6 @@
 
     exports com.azure.ai.openai;
     exports com.azure.ai.openai.models;
-    exports com.azure.ai.openai.implementation.models;
 
     opens com.azure.ai.openai.models to
             com.azure.core,
diff --git a/sdk/openai/azure-ai-openai/src/samples/README.md b/sdk/openai/azure-ai-openai/src/samples/README.md
index cf37cf05b527..fa5a898c27eb 100644
--- a/sdk/openai/azure-ai-openai/src/samples/README.md
+++ b/sdk/openai/azure-ai-openai/src/samples/README.md
@@ -28,12 +28,16 @@ Synchronous:
 - [Chat Completions][sample_get_chat_completions]
 - [Embeddings][sample_get_embedding]
 - [Image Generation][sample_image_generation]
+- [Audio Transcription][sample_audio_transcription]
+- [Audio Translation][sample_audio_translation]
 
 Asynchronous:
 - [Text Completions][async_sample_get_completions]
 - [Chat Completions][async_sample_get_chat_completions]
 - [Embeddings][async_sample_get_embedding]
 - [Image Generation][async_sample_image_generation]
+- [Audio Transcription][async_sample_audio_transcription]
+- [Audio Translation][async_sample_audio_translation]
 
 Cookbook:
 - [Chat bot][cookbook_chat_bot]
@@ -66,11 +70,15 @@ This project welcomes contributions and suggestions. Find [more contributing][SD
 [async_sample_get_chat_completions]: https://github.com/Azure/azure-sdk-for-java/blob/main/sdk/openai/azure-ai-openai/src/samples/java/com/azure/ai/openai/usage/GetChatCompletionsAsyncSample.java
 [async_sample_get_embedding]: https://github.com/Azure/azure-sdk-for-java/blob/main/sdk/openai/azure-ai-openai/src/samples/java/com/azure/ai/openai/usage/GetEmbeddingsAsyncSample.java
 [async_sample_image_generation]: https://github.com/Azure/azure-sdk-for-java/blob/main/sdk/openai/azure-ai-openai/src/samples/java/com/azure/ai/openai/usage/GetImagesAsyncSample.java
+[async_sample_audio_transcription]: https://github.com/Azure/azure-sdk-for-java/blob/main/sdk/openai/azure-ai-openai/src/samples/java/com/azure/ai/openai/usage/AudioTranscriptionAsyncSample.java
+[async_sample_audio_translation]: https://github.com/Azure/azure-sdk-for-java/blob/main/sdk/openai/azure-ai-openai/src/samples/java/com/azure/ai/openai/usage/AudioTranslationAsyncSample.java
 
 [sample_get_completions]: https://github.com/Azure/azure-sdk-for-java/blob/main/sdk/openai/azure-ai-openai/src/samples/java/com/azure/ai/openai/usage/GetCompletionsSample.java
 [sample_get_chat_completions]: https://github.com/Azure/azure-sdk-for-java/blob/main/sdk/openai/azure-ai-openai/src/samples/java/com/azure/ai/openai/usage/GetChatCompletionsSample.java
 [sample_get_embedding]: https://github.com/Azure/azure-sdk-for-java/blob/main/sdk/openai/azure-ai-openai/src/samples/java/com/azure/ai/openai/usage/GetEmbeddingsSample.java
 [sample_image_generation]: https://github.com/Azure/azure-sdk-for-java/blob/main/sdk/openai/azure-ai-openai/src/samples/java/com/azure/ai/openai/usage/GetImagesSample.java
+[sample_audio_transcription]: https://github.com/Azure/azure-sdk-for-java/blob/main/sdk/openai/azure-ai-openai/src/samples/java/com/azure/ai/openai/usage/AudioTranscriptionSample.java
+[sample_audio_translation]: https://github.com/Azure/azure-sdk-for-java/blob/main/sdk/openai/azure-ai-openai/src/samples/java/com/azure/ai/openai/usage/AudioTranslationSample.java
 
 [cookbook_chat_bot]: https://github.com/Azure/azure-sdk-for-java/blob/main/sdk/openai/azure-ai-openai/src/samples/java/com/azure/ai/openai/ChatbotSample.java
 [cookbook_chat_bot_with_key]: https://github.com/Azure/azure-sdk-for-java/blob/main/sdk/openai/azure-ai-openai/src/samples/java/com/azure/ai/openai/ChatbotWithKeySample.java
diff --git a/sdk/openai/azure-ai-openai/src/samples/java/com/azure/ai/openai/ChatCompletionsWithYourData.java b/sdk/openai/azure-ai-openai/src/samples/java/com/azure/ai/openai/ChatCompletionsWithYourData.java
index 0d732704c90c..fecaa9dccf77 100644
--- a/sdk/openai/azure-ai-openai/src/samples/java/com/azure/ai/openai/ChatCompletionsWithYourData.java
+++ b/sdk/openai/azure-ai-openai/src/samples/java/com/azure/ai/openai/ChatCompletionsWithYourData.java
@@ -29,7 +29,7 @@ public class ChatCompletionsWithYourData {
      *
      * @param args Unused. Arguments to the program.
      */
-    public static void main(String[] args){
+    public static void main(String[] args) {
         String azureOpenaiKey = "{azure-open-ai-key}";
         String endpoint = "{azure-open-ai-endpoint}";
         String deploymentOrModelId = "{azure-open-ai-deployment-model-id}";
diff --git a/sdk/openai/azure-ai-openai/src/samples/java/com/azure/ai/openai/impl/ReadmeSamples.java b/sdk/openai/azure-ai-openai/src/samples/java/com/azure/ai/openai/impl/ReadmeSamples.java
index 3384e3cb3e2f..7488e04c3271 100644
--- a/sdk/openai/azure-ai-openai/src/samples/java/com/azure/ai/openai/impl/ReadmeSamples.java
+++ b/sdk/openai/azure-ai-openai/src/samples/java/com/azure/ai/openai/impl/ReadmeSamples.java
@@ -6,6 +6,10 @@
 import com.azure.ai.openai.OpenAIAsyncClient;
 import com.azure.ai.openai.OpenAIClient;
 import com.azure.ai.openai.OpenAIClientBuilder;
+import com.azure.ai.openai.models.AudioTranscription;
+import com.azure.ai.openai.models.AudioTranscriptionFormat;
+import com.azure.ai.openai.models.AudioTranscriptionOptions;
+import com.azure.ai.openai.models.AudioTranslationOptions;
 import com.azure.ai.openai.models.ChatChoice;
 import com.azure.ai.openai.models.ChatCompletions;
 import com.azure.ai.openai.models.ChatCompletionsOptions;
@@ -25,11 +29,14 @@
 import com.azure.core.credential.TokenCredential;
 import com.azure.core.http.ProxyOptions;
 import com.azure.core.models.ResponseError;
+import com.azure.core.util.BinaryData;
 import com.azure.core.util.HttpClientOptions;
 import com.azure.core.util.IterableStream;
 import com.azure.identity.DefaultAzureCredentialBuilder;
 
 import java.net.InetSocketAddress;
+import java.nio.file.Path;
+import java.nio.file.Paths;
 import java.util.ArrayList;
 import java.util.Arrays;
 import java.util.List;
@@ -221,4 +228,34 @@ public void imageGeneration() {
         }
         // END: readme-sample-imageGeneration
     }
+
+    public void audioTranscription() {
+        // BEGIN: readme-sample-audioTranscription
+        String fileName = "{your-file-name}";
+        Path filePath = Paths.get("{your-file-path}" + fileName);
+
+        byte[] file = BinaryData.fromFile(filePath).toBytes();
+        AudioTranscriptionOptions transcriptionOptions = new AudioTranscriptionOptions(file)
+            .setResponseFormat(AudioTranscriptionFormat.JSON);
+
+        AudioTranscription transcription = client.getAudioTranscription("{deploymentOrModelId}", fileName, transcriptionOptions);
+
+        System.out.println("Transcription: " + transcription.getText());
+        // END: readme-sample-audioTranscription
+    }
+
+    public void audioTranslation() {
+        // BEGIN: readme-sample-audioTranslation
+        String fileName = "{your-file-name}";
+        Path filePath = Paths.get("{your-file-path}" + fileName);
+
+        byte[] file = BinaryData.fromFile(filePath).toBytes();
+        AudioTranslationOptions translationOptions = new AudioTranslationOptions(file)
+            .setResponseFormat(AudioTranscriptionFormat.JSON);
+
+        AudioTranscription translation = client.getAudioTranslation("{deploymentOrModelId}", fileName, translationOptions);
+
+        System.out.println("Translation: " + translation.getText());
+        // END: readme-sample-audioTranslation
+    }
 }
diff --git a/sdk/openai/azure-ai-openai/src/samples/java/com/azure/ai/openai/resources/JP_it_is_rainy_today.wav b/sdk/openai/azure-ai-openai/src/samples/java/com/azure/ai/openai/resources/JP_it_is_rainy_today.wav
new file mode 100644
index 000000000000..5970c85ec1cd
Binary files /dev/null and b/sdk/openai/azure-ai-openai/src/samples/java/com/azure/ai/openai/resources/JP_it_is_rainy_today.wav differ
diff --git a/sdk/openai/azure-ai-openai/src/samples/java/com/azure/ai/openai/resources/batman.wav b/sdk/openai/azure-ai-openai/src/samples/java/com/azure/ai/openai/resources/batman.wav
new file mode 100644
index 000000000000..4c0b7248a39c
Binary files /dev/null and b/sdk/openai/azure-ai-openai/src/samples/java/com/azure/ai/openai/resources/batman.wav differ
diff --git a/sdk/openai/azure-ai-openai/src/samples/java/com/azure/ai/openai/usage/AudioTranscriptionAsyncSample.java b/sdk/openai/azure-ai-openai/src/samples/java/com/azure/ai/openai/usage/AudioTranscriptionAsyncSample.java
new file mode 100644
index 000000000000..fbebd49b5965
--- /dev/null
+++ b/sdk/openai/azure-ai-openai/src/samples/java/com/azure/ai/openai/usage/AudioTranscriptionAsyncSample.java
@@ -0,0 +1,52 @@
+// Copyright (c) Microsoft Corporation. All rights reserved.
+// Licensed under the MIT License.
+
+package com.azure.ai.openai.usage;
+
+import com.azure.ai.openai.OpenAIAsyncClient;
+import com.azure.ai.openai.OpenAIClientBuilder;
+import com.azure.ai.openai.models.AudioTranscriptionFormat;
+import com.azure.ai.openai.models.AudioTranscriptionOptions;
+import com.azure.core.credential.AzureKeyCredential;
+import com.azure.core.util.BinaryData;
+
+import java.nio.file.Path;
+import java.nio.file.Paths;
+import java.util.concurrent.TimeUnit;
+
+/**
+ * An asynchronous sample demonstrates how to transcript a given audio file.
+ */
+public class AudioTranscriptionAsyncSample {
+    /**
+     * Runs the sample algorithm and demonstrates how to transcript a given audio file.
+     *
+     * @param args Unused. Arguments to the program.
+     */
+    public static void main(String[] args) throws InterruptedException {
+        String azureOpenaiKey = "{azure-open-ai-key}";
+        String endpoint = "{azure-open-ai-endpoint}";
+        String deploymentOrModelId = "{azure-open-ai-deployment-model-id}";
+        String fileName = "batman.wav";
+        Path filePath = Paths.get("src/samples/java/com/azure/ai/openai/resources/" + fileName);
+
+        OpenAIAsyncClient client = new OpenAIClientBuilder()
+            .endpoint(endpoint)
+            .credential(new AzureKeyCredential(azureOpenaiKey))
+            .buildAsyncClient();
+
+        byte[] file = BinaryData.fromFile(filePath).toBytes();
+        AudioTranscriptionOptions transcriptionOptions = new AudioTranscriptionOptions(file)
+            .setResponseFormat(AudioTranscriptionFormat.JSON);
+
+        client.getAudioTranscription(deploymentOrModelId, fileName, transcriptionOptions)
+            .subscribe(transcription -> {
+                System.out.println("Transcription: " + transcription.getText());
+            });
+
+        // The .subscribe() creation and assignment is not a blocking call. For the purpose of this example, we sleep
+        // the thread so the program does not end before the send operation is complete. Using .block() instead of
+        // .subscribe() will turn this into a synchronous call.
+        TimeUnit.SECONDS.sleep(10);
+    }
+}
diff --git a/sdk/openai/azure-ai-openai/src/samples/java/com/azure/ai/openai/usage/AudioTranscriptionSample.java b/sdk/openai/azure-ai-openai/src/samples/java/com/azure/ai/openai/usage/AudioTranscriptionSample.java
new file mode 100644
index 000000000000..e16238116533
--- /dev/null
+++ b/sdk/openai/azure-ai-openai/src/samples/java/com/azure/ai/openai/usage/AudioTranscriptionSample.java
@@ -0,0 +1,46 @@
+// Copyright (c) Microsoft Corporation. All rights reserved.
+// Licensed under the MIT License.
+
+package com.azure.ai.openai.usage;
+
+import com.azure.ai.openai.OpenAIClient;
+import com.azure.ai.openai.OpenAIClientBuilder;
+import com.azure.ai.openai.models.AudioTranscription;
+import com.azure.ai.openai.models.AudioTranscriptionFormat;
+import com.azure.ai.openai.models.AudioTranscriptionOptions;
+import com.azure.core.credential.AzureKeyCredential;
+import com.azure.core.util.BinaryData;
+
+import java.nio.file.Path;
+import java.nio.file.Paths;
+
+/**
+ * A sample demonstrates how to transcript a given audio file.
+ */
+public class AudioTranscriptionSample {
+    /**
+     * Runs the sample algorithm and demonstrates how to get the images for a given prompt.
+     *
+     * @param args Unused. Arguments to the program.
+     */
+    public static void main(String[] args) {
+        String azureOpenaiKey = "{azure-open-ai-key}";
+        String endpoint = "{azure-open-ai-endpoint}";
+        String deploymentOrModelId = "{azure-open-ai-deployment-model-id}";
+        String fileName = "batman.wav";
+        Path filePath = Paths.get("src/samples/java/com/azure/ai/openai/resources/" + fileName);
+
+        OpenAIClient client = new OpenAIClientBuilder()
+            .endpoint(endpoint)
+            .credential(new AzureKeyCredential(azureOpenaiKey))
+            .buildClient();
+
+        byte[] file = BinaryData.fromFile(filePath).toBytes();
+        AudioTranscriptionOptions transcriptionOptions = new AudioTranscriptionOptions(file)
+            .setResponseFormat(AudioTranscriptionFormat.JSON);
+
+        AudioTranscription transcription = client.getAudioTranscription(deploymentOrModelId, fileName, transcriptionOptions);
+
+        System.out.println("Transcription: " + transcription.getText());
+    }
+}
diff --git a/sdk/openai/azure-ai-openai/src/samples/java/com/azure/ai/openai/usage/AudioTranslationAsyncSample.java b/sdk/openai/azure-ai-openai/src/samples/java/com/azure/ai/openai/usage/AudioTranslationAsyncSample.java
new file mode 100644
index 000000000000..4ba19ad37b7f
--- /dev/null
+++ b/sdk/openai/azure-ai-openai/src/samples/java/com/azure/ai/openai/usage/AudioTranslationAsyncSample.java
@@ -0,0 +1,51 @@
+// Copyright (c) Microsoft Corporation. All rights reserved.
+// Licensed under the MIT License.
+
+package com.azure.ai.openai.usage;
+
+import com.azure.ai.openai.OpenAIAsyncClient;
+import com.azure.ai.openai.OpenAIClientBuilder;
+import com.azure.ai.openai.models.AudioTranscriptionFormat;
+import com.azure.ai.openai.models.AudioTranslationOptions;
+import com.azure.core.credential.AzureKeyCredential;
+import com.azure.core.util.BinaryData;
+
+import java.nio.file.Path;
+import java.nio.file.Paths;
+import java.util.concurrent.TimeUnit;
+
+/**
+ * An asynchronous sample demonstrates how to translate a given audio file.
+ */
+public class AudioTranslationAsyncSample {
+    /**
+     * Runs the sample algorithm and demonstrates how to translate a given audio file.
+     *
+     * @param args Unused. Arguments to the program.
+     */
+    public static void main(String[] args) throws InterruptedException {
+        String azureOpenaiKey = "{azure-open-ai-key}";
+        String endpoint = "{azure-open-ai-endpoint}";
+        String deploymentOrModelId = "{azure-open-ai-deployment-model-id}";
+        String fileName = "JP_it_is_rainy_today.wav";
+        Path filePath = Paths.get("src/samples/java/com/azure/ai/openai/resources/" + fileName);
+
+        OpenAIAsyncClient client = new OpenAIClientBuilder()
+            .endpoint(endpoint)
+            .credential(new AzureKeyCredential(azureOpenaiKey))
+            .buildAsyncClient();
+        byte[] file = BinaryData.fromFile(filePath).toBytes();
+        AudioTranslationOptions translationOptions = new AudioTranslationOptions(file)
+            .setResponseFormat(AudioTranscriptionFormat.JSON);
+
+        client.getAudioTranslation(deploymentOrModelId, fileName, translationOptions)
+            .subscribe(translation -> {
+                System.out.println("Translation: " + translation.getText());
+            });
+
+        // The .subscribe() creation and assignment is not a blocking call. For the purpose of this example, we sleep
+        // the thread so the program does not end before the send operation is complete. Using .block() instead of
+        // .subscribe() will turn this into a synchronous call.
+        TimeUnit.SECONDS.sleep(10);
+    }
+}
diff --git a/sdk/openai/azure-ai-openai/src/samples/java/com/azure/ai/openai/usage/AudioTranslationSample.java b/sdk/openai/azure-ai-openai/src/samples/java/com/azure/ai/openai/usage/AudioTranslationSample.java
new file mode 100644
index 000000000000..18a56d967fef
--- /dev/null
+++ b/sdk/openai/azure-ai-openai/src/samples/java/com/azure/ai/openai/usage/AudioTranslationSample.java
@@ -0,0 +1,45 @@
+// Copyright (c) Microsoft Corporation. All rights reserved.
+// Licensed under the MIT License.
+
+package com.azure.ai.openai.usage;
+
+import com.azure.ai.openai.OpenAIClient;
+import com.azure.ai.openai.OpenAIClientBuilder;
+import com.azure.ai.openai.models.AudioTranscription;
+import com.azure.ai.openai.models.AudioTranscriptionFormat;
+import com.azure.ai.openai.models.AudioTranslationOptions;
+import com.azure.core.credential.AzureKeyCredential;
+import com.azure.core.util.BinaryData;
+
+import java.nio.file.Path;
+import java.nio.file.Paths;
+
+/**
+ * A sample demonstrates how to translate a given audio file.
+ */
+public class AudioTranslationSample {
+    /**
+     * Runs the sample algorithm and demonstrates how to translate a given audio file.
+     *
+     * @param args Unused. Arguments to the program.
+     */
+    public static void main(String[] args) {
+        String azureOpenaiKey = "{azure-open-ai-key}";
+        String endpoint = "{azure-open-ai-endpoint}";
+        String deploymentOrModelId = "{azure-open-ai-deployment-model-id}";
+        String fileName = "JP_it_is_rainy_today.wav";
+        Path filePath = Paths.get("src/samples/java/com/azure/ai/openai/resources/" + fileName);
+
+        OpenAIClient client = new OpenAIClientBuilder()
+            .endpoint(endpoint)
+            .credential(new AzureKeyCredential(azureOpenaiKey))
+            .buildClient();
+        byte[] file = BinaryData.fromFile(filePath).toBytes();
+        AudioTranslationOptions translationOptions = new AudioTranslationOptions(file)
+            .setResponseFormat(AudioTranscriptionFormat.JSON);
+
+        AudioTranscription translation = client.getAudioTranslation(deploymentOrModelId, fileName, translationOptions);
+
+        System.out.println("Translation: " + translation.getText());
+    }
+}
diff --git a/sdk/openai/azure-ai-openai/src/test/java/com/azure/ai/openai/NonAzureOpenAIAsyncClientTest.java b/sdk/openai/azure-ai-openai/src/test/java/com/azure/ai/openai/NonAzureOpenAIAsyncClientTest.java
index 7cc7ec3429c6..fb842b09df8a 100644
--- a/sdk/openai/azure-ai-openai/src/test/java/com/azure/ai/openai/NonAzureOpenAIAsyncClientTest.java
+++ b/sdk/openai/azure-ai-openai/src/test/java/com/azure/ai/openai/NonAzureOpenAIAsyncClientTest.java
@@ -4,6 +4,10 @@
 package com.azure.ai.openai;
 
 import com.azure.ai.openai.functions.MyFunctionCallArguments;
+import com.azure.ai.openai.models.AudioTaskLabel;
+import com.azure.ai.openai.models.AudioTranscriptionFormat;
+import com.azure.ai.openai.models.AudioTranscriptionOptions;
+import com.azure.ai.openai.models.AudioTranslationOptions;
 import com.azure.ai.openai.models.ChatChoice;
 import com.azure.ai.openai.models.ChatCompletions;
 import com.azure.ai.openai.models.ChatCompletionsOptions;
@@ -25,6 +29,7 @@
 
 import java.util.ArrayList;
 import java.util.Arrays;
+import java.util.List;
 
 import static com.azure.ai.openai.TestUtils.DISPLAY_NAME_WITH_ARGUMENTS;
 import static org.junit.jupiter.api.Assertions.assertEquals;
@@ -322,4 +327,281 @@ public void testCompletionContentFiltering(HttpClient httpClient, OpenAIServiceV
                 }).verifyComplete();
         });
     }
+
+    @ParameterizedTest(name = DISPLAY_NAME_WITH_ARGUMENTS)
+    @MethodSource("com.azure.ai.openai.TestUtils#getTestParameters")
+    public void testGetAudioTranscriptionJson(HttpClient httpClient, OpenAIServiceVersion serviceVersion) {
+        client = getNonAzureOpenAIAsyncClient(httpClient);
+
+        getAudioTranscriptionRunnerForNonAzure((deploymentName, fileName) -> {
+            byte[] file = BinaryData.fromFile(openTestResourceFile(fileName)).toBytes();
+            AudioTranscriptionOptions transcriptionOptions = new AudioTranscriptionOptions(file);
+            transcriptionOptions.setResponseFormat(AudioTranscriptionFormat.JSON);
+
+            StepVerifier.create(client.getAudioTranscription(deploymentName, fileName, transcriptionOptions))
+                    .assertNext(transcription ->
+                            assertAudioTranscriptionSimpleJson(transcription, BATMAN_TRANSCRIPTION))
+                    .verifyComplete();
+        });
+    }
+
+    @ParameterizedTest(name = DISPLAY_NAME_WITH_ARGUMENTS)
+    @MethodSource("com.azure.ai.openai.TestUtils#getTestParameters")
+    public void testGetAudioTranscriptionVerboseJson(HttpClient httpClient, OpenAIServiceVersion serviceVersion) {
+        client = getNonAzureOpenAIAsyncClient(httpClient);
+
+        getAudioTranscriptionRunnerForNonAzure((deploymentName, fileName) -> {
+            byte[] file = BinaryData.fromFile(openTestResourceFile(fileName)).toBytes();
+            AudioTranscriptionOptions transcriptionOptions = new AudioTranscriptionOptions(file);
+            transcriptionOptions.setResponseFormat(AudioTranscriptionFormat.VERBOSE_JSON);
+
+            StepVerifier.create(client.getAudioTranscription(deploymentName, fileName, transcriptionOptions))
+                    .assertNext(transcription ->
+                            assertAudioTranscriptionVerboseJson(transcription, BATMAN_TRANSCRIPTION, AudioTaskLabel.TRANSCRIBE))
+                    .verifyComplete();
+        });
+    }
+
+    @ParameterizedTest(name = DISPLAY_NAME_WITH_ARGUMENTS)
+    @MethodSource("com.azure.ai.openai.TestUtils#getTestParameters")
+    public void testGetAudioTranscriptionTextPlain(HttpClient httpClient, OpenAIServiceVersion serviceVersion) {
+        client = getNonAzureOpenAIAsyncClient(httpClient);
+
+        getAudioTranscriptionRunnerForNonAzure((deploymentName, fileName) -> {
+            byte[] file = BinaryData.fromFile(openTestResourceFile(fileName)).toBytes();
+            AudioTranscriptionOptions transcriptionOptions = new AudioTranscriptionOptions(file);
+            transcriptionOptions.setResponseFormat(AudioTranscriptionFormat.TEXT);
+
+            StepVerifier.create(client.getAudioTranscriptionText(deploymentName, fileName, transcriptionOptions))
+                    .assertNext(transcription ->
+                            // A plain/text request adds a line break as an artifact. Also observed for translations
+                            assertEquals(BATMAN_TRANSCRIPTION + "\n", transcription))
+                    .verifyComplete();
+        });
+    }
+
+    @ParameterizedTest(name = DISPLAY_NAME_WITH_ARGUMENTS)
+    @MethodSource("com.azure.ai.openai.TestUtils#getTestParameters")
+    public void testGetAudioTranscriptionSrt(HttpClient httpClient, OpenAIServiceVersion serviceVersion) {
+        client = getNonAzureOpenAIAsyncClient(httpClient);
+
+        getAudioTranscriptionRunnerForNonAzure((modelId, fileName) -> {
+            byte[] file = BinaryData.fromFile(openTestResourceFile(fileName)).toBytes();
+            AudioTranscriptionOptions transcriptionOptions = new AudioTranscriptionOptions(file);
+            transcriptionOptions.setResponseFormat(AudioTranscriptionFormat.SRT);
+
+            StepVerifier.create(client.getAudioTranscriptionText(modelId, fileName, transcriptionOptions))
+                    .assertNext(translation -> {
+                        // Sequence number
+                        assertTrue(translation.contains("1\n"));
+                        // First sequence starts at timestamp 0
+                        assertTrue(translation.contains("00:00:00,000 --> "));
+                        // Contains at least one expected word
+                        assertTrue(translation.contains("Batman"));
+                    }).verifyComplete();
+        });
+    }
+
+    @ParameterizedTest(name = DISPLAY_NAME_WITH_ARGUMENTS)
+    @MethodSource("com.azure.ai.openai.TestUtils#getTestParameters")
+    public void testGetAudioTranscriptionVtt(HttpClient httpClient, OpenAIServiceVersion serviceVersion) {
+        client = getNonAzureOpenAIAsyncClient(httpClient);
+
+        getAudioTranscriptionRunnerForNonAzure((modelId, fileName) -> {
+            byte[] file = BinaryData.fromFile(openTestResourceFile(fileName)).toBytes();
+            AudioTranscriptionOptions transcriptionOptions = new AudioTranscriptionOptions(file);
+            transcriptionOptions.setResponseFormat(AudioTranscriptionFormat.VTT);
+
+            StepVerifier.create(client.getAudioTranscriptionText(modelId, fileName, transcriptionOptions))
+                    .assertNext(translation -> {
+                        // Start value according to spec
+                        assertTrue(translation.startsWith("WEBVTT\n"));
+                        // First sequence starts at timestamp 0. Note: unlike SRT, the millisecond separator is a "."
+                        assertTrue(translation.contains("00:00:00.000 --> "));
+                        // Contains at least one expected word
+                        assertTrue(translation.contains("Batman"));
+                    }).verifyComplete();
+        });
+    }
+
+    @ParameterizedTest(name = DISPLAY_NAME_WITH_ARGUMENTS)
+    @MethodSource("com.azure.ai.openai.TestUtils#getTestParameters")
+    public void testGetAudioTranscriptionTextWrongFormats(HttpClient httpClient, OpenAIServiceVersion serviceVersion) {
+        client = getNonAzureOpenAIAsyncClient(httpClient);
+        List<AudioTranscriptionFormat> wrongFormats = Arrays.asList(
+                AudioTranscriptionFormat.JSON,
+                AudioTranscriptionFormat.VERBOSE_JSON
+        );
+
+        getAudioTranscriptionRunnerForNonAzure((modelId, fileName) -> {
+            byte[] file = BinaryData.fromFile(openTestResourceFile(fileName)).toBytes();
+            AudioTranscriptionOptions transcriptionOptions = new AudioTranscriptionOptions(file);
+
+            for (AudioTranscriptionFormat format: wrongFormats) {
+                transcriptionOptions.setResponseFormat(format);
+                StepVerifier.create(client.getAudioTranscriptionText(modelId, fileName, transcriptionOptions))
+                    .verifyErrorSatisfies(error -> assertTrue(error instanceof IllegalArgumentException));
+            }
+        });
+    }
+
+    @ParameterizedTest(name = DISPLAY_NAME_WITH_ARGUMENTS)
+    @MethodSource("com.azure.ai.openai.TestUtils#getTestParameters")
+    public void testGetAudioTranscriptionJsonWrongFormats(HttpClient httpClient, OpenAIServiceVersion serviceVersion) {
+        client = getNonAzureOpenAIAsyncClient(httpClient);
+        List<AudioTranscriptionFormat> wrongFormats = Arrays.asList(
+                AudioTranscriptionFormat.TEXT,
+                AudioTranscriptionFormat.SRT,
+                AudioTranscriptionFormat.VTT
+        );
+
+        getAudioTranscriptionRunnerForNonAzure((modelId, fileName) -> {
+            byte[] file = BinaryData.fromFile(openTestResourceFile(fileName)).toBytes();
+            AudioTranscriptionOptions transcriptionOptions = new AudioTranscriptionOptions(file);
+
+            for (AudioTranscriptionFormat format: wrongFormats) {
+                transcriptionOptions.setResponseFormat(format);
+                StepVerifier.create(client.getAudioTranscription(modelId, fileName, transcriptionOptions))
+                    .verifyErrorSatisfies(error -> assertTrue(error instanceof IllegalArgumentException));
+            }
+        });
+    }
+
+    @ParameterizedTest(name = DISPLAY_NAME_WITH_ARGUMENTS)
+    @MethodSource("com.azure.ai.openai.TestUtils#getTestParameters")
+    public void testGetAudioTranslationJson(HttpClient httpClient, OpenAIServiceVersion serviceVersion) {
+        client = getNonAzureOpenAIAsyncClient(httpClient);
+
+        getAudioTranslationRunnerForNonAzure((modelId, fileName) -> {
+            byte[] file = BinaryData.fromFile(openTestResourceFile(fileName)).toBytes();
+            AudioTranslationOptions translationOptions = new AudioTranslationOptions(file);
+            translationOptions.setResponseFormat(AudioTranscriptionFormat.JSON);
+
+            StepVerifier.create(client.getAudioTranslation(modelId, fileName, translationOptions))
+                .assertNext(translation ->
+                    assertAudioTranscriptionSimpleJson(translation, "It's raining today."))
+                .verifyComplete();
+        });
+    }
+
+    @ParameterizedTest(name = DISPLAY_NAME_WITH_ARGUMENTS)
+    @MethodSource("com.azure.ai.openai.TestUtils#getTestParameters")
+    public void testGetAudioTranslationVerboseJson(HttpClient httpClient, OpenAIServiceVersion serviceVersion) {
+        client = getNonAzureOpenAIAsyncClient(httpClient);
+
+        getAudioTranslationRunnerForNonAzure((modelId, fileName) -> {
+            byte[] file = BinaryData.fromFile(openTestResourceFile(fileName)).toBytes();
+            AudioTranslationOptions translationOptions = new AudioTranslationOptions(file);
+            translationOptions.setResponseFormat(AudioTranscriptionFormat.VERBOSE_JSON);
+
+            StepVerifier.create(client.getAudioTranslation(modelId, fileName, translationOptions))
+                .assertNext(translation ->
+                    assertAudioTranscriptionVerboseJson(translation, "It's raining today.", AudioTaskLabel.TRANSLATE))
+                .verifyComplete();
+        });
+    }
+
+    @ParameterizedTest(name = DISPLAY_NAME_WITH_ARGUMENTS)
+    @MethodSource("com.azure.ai.openai.TestUtils#getTestParameters")
+    public void testGetAudioTranslationTextPlain(HttpClient httpClient, OpenAIServiceVersion serviceVersion) {
+        client = getNonAzureOpenAIAsyncClient(httpClient);
+
+        getAudioTranslationRunnerForNonAzure((modelId, fileName) -> {
+            byte[] file = BinaryData.fromFile(openTestResourceFile(fileName)).toBytes();
+            AudioTranslationOptions translationOptions = new AudioTranslationOptions(file);
+            translationOptions.setResponseFormat(AudioTranscriptionFormat.TEXT);
+
+            StepVerifier.create(client.getAudioTranslationText(modelId, fileName, translationOptions))
+                .assertNext(translation -> {
+                    assertEquals("It's raining today.\n", translation);
+                }).verifyComplete();
+        });
+    }
+
+    @ParameterizedTest(name = DISPLAY_NAME_WITH_ARGUMENTS)
+    @MethodSource("com.azure.ai.openai.TestUtils#getTestParameters")
+    public void testGetAudioTranslationSrt(HttpClient httpClient, OpenAIServiceVersion serviceVersion) {
+        client = getNonAzureOpenAIAsyncClient(httpClient);
+
+        getAudioTranslationRunnerForNonAzure((modelId, fileName) -> {
+            byte[] file = BinaryData.fromFile(openTestResourceFile(fileName)).toBytes();
+            AudioTranslationOptions translationOptions = new AudioTranslationOptions(file);
+            translationOptions.setResponseFormat(AudioTranscriptionFormat.SRT);
+
+            StepVerifier.create(client.getAudioTranslationText(modelId, fileName, translationOptions))
+                    .assertNext(translation -> {
+                        // Sequence number
+                        assertTrue(translation.contains("1\n"));
+                        // First sequence starts at timestamp 0
+                        assertTrue(translation.contains("00:00:00,000 --> "));
+                        // Actual translation value
+                        assertTrue(translation.contains("It's raining today."));
+                    }).verifyComplete();
+        });
+    }
+
+    @ParameterizedTest(name = DISPLAY_NAME_WITH_ARGUMENTS)
+    @MethodSource("com.azure.ai.openai.TestUtils#getTestParameters")
+    public void testGetAudioTranslationVtt(HttpClient httpClient, OpenAIServiceVersion serviceVersion) {
+        client = getNonAzureOpenAIAsyncClient(httpClient);
+
+        getAudioTranslationRunnerForNonAzure((modelId, fileName) -> {
+            byte[] file = BinaryData.fromFile(openTestResourceFile(fileName)).toBytes();
+            AudioTranslationOptions translationOptions = new AudioTranslationOptions(file);
+            translationOptions.setResponseFormat(AudioTranscriptionFormat.VTT);
+
+            StepVerifier.create(client.getAudioTranslationText(modelId, fileName, translationOptions))
+                .assertNext(translation -> {
+                    // Start value according to spec
+                    assertTrue(translation.startsWith("WEBVTT\n"));
+                    // First sequence starts at timestamp 0. Note: unlike SRT, the millisecond separator is a "."
+                    assertTrue(translation.contains("00:00:00.000 --> "));
+                    // Actual translation value
+                    assertTrue(translation.contains("It's raining today."));
+                }).verifyComplete();
+        });
+    }
+
+    @ParameterizedTest(name = DISPLAY_NAME_WITH_ARGUMENTS)
+    @MethodSource("com.azure.ai.openai.TestUtils#getTestParameters")
+    public void testGetAudioTranslationTextWrongFormats(HttpClient httpClient, OpenAIServiceVersion serviceVersion) {
+        client = getNonAzureOpenAIAsyncClient(httpClient);
+        List<AudioTranscriptionFormat> wrongFormats = Arrays.asList(
+                AudioTranscriptionFormat.JSON,
+                AudioTranscriptionFormat.VERBOSE_JSON
+        );
+
+        getAudioTranslationRunnerForNonAzure((modelId, fileName) -> {
+            byte[] file = BinaryData.fromFile(openTestResourceFile(fileName)).toBytes();
+            AudioTranslationOptions translationOptions = new AudioTranslationOptions(file);
+
+            for (AudioTranscriptionFormat format: wrongFormats) {
+                translationOptions.setResponseFormat(format);
+                StepVerifier.create(client.getAudioTranslationText(modelId, fileName, translationOptions))
+                    .verifyErrorSatisfies(error -> assertTrue(error instanceof IllegalArgumentException));
+            }
+        });
+    }
+
+    @ParameterizedTest(name = DISPLAY_NAME_WITH_ARGUMENTS)
+    @MethodSource("com.azure.ai.openai.TestUtils#getTestParameters")
+    public void testGetAudioTranslationJsonWrongFormats(HttpClient httpClient, OpenAIServiceVersion serviceVersion) {
+        client = getNonAzureOpenAIAsyncClient(httpClient);
+        List<AudioTranscriptionFormat> wrongFormats = Arrays.asList(
+                AudioTranscriptionFormat.TEXT,
+                AudioTranscriptionFormat.SRT,
+                AudioTranscriptionFormat.VTT
+        );
+
+        getAudioTranslationRunnerForNonAzure((modelId, fileName) -> {
+            byte[] file = BinaryData.fromFile(openTestResourceFile(fileName)).toBytes();
+            AudioTranslationOptions translationOptions = new AudioTranslationOptions(file);
+
+            for (AudioTranscriptionFormat format: wrongFormats) {
+                translationOptions.setResponseFormat(format);
+                StepVerifier.create(client.getAudioTranslation(modelId, fileName, translationOptions))
+                    .verifyErrorSatisfies(error -> assertTrue(error instanceof IllegalArgumentException));
+            }
+        });
+    }
 }
diff --git a/sdk/openai/azure-ai-openai/src/test/java/com/azure/ai/openai/NonAzureOpenAISyncClientTest.java b/sdk/openai/azure-ai-openai/src/test/java/com/azure/ai/openai/NonAzureOpenAISyncClientTest.java
index 43af2bf9bc43..25055df90aa7 100644
--- a/sdk/openai/azure-ai-openai/src/test/java/com/azure/ai/openai/NonAzureOpenAISyncClientTest.java
+++ b/sdk/openai/azure-ai-openai/src/test/java/com/azure/ai/openai/NonAzureOpenAISyncClientTest.java
@@ -4,6 +4,11 @@
 package com.azure.ai.openai;
 
 import com.azure.ai.openai.functions.MyFunctionCallArguments;
+import com.azure.ai.openai.models.AudioTaskLabel;
+import com.azure.ai.openai.models.AudioTranscription;
+import com.azure.ai.openai.models.AudioTranscriptionFormat;
+import com.azure.ai.openai.models.AudioTranscriptionOptions;
+import com.azure.ai.openai.models.AudioTranslationOptions;
 import com.azure.ai.openai.models.ChatChoice;
 import com.azure.ai.openai.models.ChatCompletions;
 import com.azure.ai.openai.models.ChatCompletionsOptions;
@@ -25,6 +30,7 @@
 import org.junit.jupiter.params.provider.MethodSource;
 
 import java.util.Arrays;
+import java.util.List;
 
 import static com.azure.ai.openai.TestUtils.DISPLAY_NAME_WITH_ARGUMENTS;
 import static org.junit.jupiter.api.Assertions.assertEquals;
@@ -274,4 +280,265 @@ public void testCompletionContentFiltering(HttpClient httpClient, OpenAIServiceV
             assertNull(completions.getChoices().get(0).getContentFilterResults());
         });
     }
+
+    @ParameterizedTest(name = DISPLAY_NAME_WITH_ARGUMENTS)
+    @MethodSource("com.azure.ai.openai.TestUtils#getTestParameters")
+    public void testGetAudioTranscriptionJson(HttpClient httpClient, OpenAIServiceVersion serviceVersion) {
+        client = getNonAzureOpenAISyncClient(httpClient);
+
+        getAudioTranscriptionRunnerForNonAzure((modelId, fileName) -> {
+            byte[] file = BinaryData.fromFile(openTestResourceFile(fileName)).toBytes();
+            AudioTranscriptionOptions transcriptionOptions = new AudioTranscriptionOptions(file);
+            transcriptionOptions.setResponseFormat(AudioTranscriptionFormat.JSON);
+
+            AudioTranscription transcription = client.getAudioTranscription(modelId, fileName, transcriptionOptions);
+            assertAudioTranscriptionSimpleJson(transcription, BATMAN_TRANSCRIPTION);
+        });
+    }
+
+    @ParameterizedTest(name = DISPLAY_NAME_WITH_ARGUMENTS)
+    @MethodSource("com.azure.ai.openai.TestUtils#getTestParameters")
+    public void testGetAudioTranscriptionVerboseJson(HttpClient httpClient, OpenAIServiceVersion serviceVersion) {
+        client = getNonAzureOpenAISyncClient(httpClient);
+
+        getAudioTranscriptionRunnerForNonAzure((modelId, fileName) -> {
+            byte[] file = BinaryData.fromFile(openTestResourceFile(fileName)).toBytes();
+            AudioTranscriptionOptions transcriptionOptions = new AudioTranscriptionOptions(file);
+            transcriptionOptions.setResponseFormat(AudioTranscriptionFormat.VERBOSE_JSON);
+
+            AudioTranscription transcription = client.getAudioTranscription(modelId, fileName, transcriptionOptions);
+            assertAudioTranscriptionVerboseJson(transcription, BATMAN_TRANSCRIPTION, AudioTaskLabel.TRANSCRIBE);
+        });
+    }
+
+    @ParameterizedTest(name = DISPLAY_NAME_WITH_ARGUMENTS)
+    @MethodSource("com.azure.ai.openai.TestUtils#getTestParameters")
+    public void testGetAudioTranscriptionTextPlain(HttpClient httpClient, OpenAIServiceVersion serviceVersion) {
+        client = getNonAzureOpenAISyncClient(httpClient);
+
+        getAudioTranscriptionRunnerForNonAzure((modelId, fileName) -> {
+            byte[] file = BinaryData.fromFile(openTestResourceFile(fileName)).toBytes();
+            AudioTranscriptionOptions transcriptionOptions = new AudioTranscriptionOptions(file);
+            transcriptionOptions.setResponseFormat(AudioTranscriptionFormat.TEXT);
+
+            String transcription = client.getAudioTranscriptionText(modelId, fileName, transcriptionOptions);
+            // A plain/text request adds a line break as an artifact. Also observed for translations
+            assertEquals(BATMAN_TRANSCRIPTION + "\n", transcription);
+        });
+    }
+
+    @ParameterizedTest(name = DISPLAY_NAME_WITH_ARGUMENTS)
+    @MethodSource("com.azure.ai.openai.TestUtils#getTestParameters")
+    public void testGetAudioTranscriptionSrt(HttpClient httpClient, OpenAIServiceVersion serviceVersion) {
+        client = getNonAzureOpenAISyncClient(httpClient);
+
+        getAudioTranscriptionRunnerForNonAzure((modelId, fileName) -> {
+            byte[] file = BinaryData.fromFile(openTestResourceFile(fileName)).toBytes();
+            AudioTranscriptionOptions transcriptionOptions = new AudioTranscriptionOptions(file);
+            transcriptionOptions.setResponseFormat(AudioTranscriptionFormat.SRT);
+
+            String transcription = client.getAudioTranscriptionText(modelId, fileName, transcriptionOptions);
+            // Sequence number
+            assertTrue(transcription.contains("1\n"));
+            // First sequence starts at timestamp 0
+            assertTrue(transcription.contains("00:00:00,000 --> "));
+            // Contains one expected word
+            assertTrue(transcription.contains("Batman"));
+        });
+    }
+
+    @ParameterizedTest(name = DISPLAY_NAME_WITH_ARGUMENTS)
+    @MethodSource("com.azure.ai.openai.TestUtils#getTestParameters")
+    public void testGetAudioTranscriptionVtt(HttpClient httpClient, OpenAIServiceVersion serviceVersion) {
+        client = getNonAzureOpenAISyncClient(httpClient);
+
+        getAudioTranscriptionRunnerForNonAzure((modelId, fileName) -> {
+            byte[] file = BinaryData.fromFile(openTestResourceFile(fileName)).toBytes();
+            AudioTranscriptionOptions transcriptionOptions = new AudioTranscriptionOptions(file);
+            transcriptionOptions.setResponseFormat(AudioTranscriptionFormat.VTT);
+
+            String transcription = client.getAudioTranscriptionText(modelId, fileName, transcriptionOptions);
+            // Start value according to spec
+            assertTrue(transcription.startsWith("WEBVTT\n"));
+            // First sequence starts at timestamp 0. Note: unlike SRT, the millisecond separator is a "."
+            assertTrue(transcription.contains("00:00:00.000 --> "));
+            // Contains at least one expected word
+            assertTrue(transcription.contains("Batman"));
+        });
+    }
+
+    @ParameterizedTest(name = DISPLAY_NAME_WITH_ARGUMENTS)
+    @MethodSource("com.azure.ai.openai.TestUtils#getTestParameters")
+    public void testGetAudioTranscriptionTextWrongFormats(HttpClient httpClient, OpenAIServiceVersion serviceVersion) {
+        client = getNonAzureOpenAISyncClient(httpClient);
+        List<AudioTranscriptionFormat> wrongFormats = Arrays.asList(
+                AudioTranscriptionFormat.JSON,
+                AudioTranscriptionFormat.VERBOSE_JSON
+        );
+
+        getAudioTranscriptionRunnerForNonAzure((modelId, fileName) -> {
+            byte[] file = BinaryData.fromFile(openTestResourceFile(fileName)).toBytes();
+            AudioTranscriptionOptions transcriptionOptions = new AudioTranscriptionOptions(file);
+
+            for (AudioTranscriptionFormat format: wrongFormats) {
+                transcriptionOptions.setResponseFormat(format);
+                assertThrows(IllegalArgumentException.class, () -> {
+                    client.getAudioTranscriptionText(modelId, fileName, transcriptionOptions);
+                });
+            }
+        });
+    }
+
+    @ParameterizedTest(name = DISPLAY_NAME_WITH_ARGUMENTS)
+    @MethodSource("com.azure.ai.openai.TestUtils#getTestParameters")
+    public void testGetAudioTranscriptionJsonWrongFormats(HttpClient httpClient, OpenAIServiceVersion serviceVersion) {
+        client = getNonAzureOpenAISyncClient(httpClient);
+        List<AudioTranscriptionFormat> wrongFormats = Arrays.asList(
+                AudioTranscriptionFormat.TEXT,
+                AudioTranscriptionFormat.SRT,
+                AudioTranscriptionFormat.VTT
+        );
+
+        getAudioTranscriptionRunnerForNonAzure((modelId, fileName) -> {
+            byte[] file = BinaryData.fromFile(openTestResourceFile(fileName)).toBytes();
+            AudioTranscriptionOptions transcriptionOptions = new AudioTranscriptionOptions(file);
+
+            for (AudioTranscriptionFormat format: wrongFormats) {
+                transcriptionOptions.setResponseFormat(format);
+                assertThrows(IllegalArgumentException.class, () -> {
+                    client.getAudioTranscription(modelId, fileName, transcriptionOptions);
+                });
+            }
+        });
+    }
+
+    @ParameterizedTest(name = DISPLAY_NAME_WITH_ARGUMENTS)
+    @MethodSource("com.azure.ai.openai.TestUtils#getTestParameters")
+    public void testGetAudioTranslationJson(HttpClient httpClient, OpenAIServiceVersion serviceVersion) {
+        client = getNonAzureOpenAISyncClient(httpClient);
+
+        getAudioTranslationRunnerForNonAzure((modelId, fileName) -> {
+            byte[] file = BinaryData.fromFile(openTestResourceFile(fileName)).toBytes();
+            AudioTranslationOptions translationOptions = new AudioTranslationOptions(file);
+            translationOptions.setResponseFormat(AudioTranscriptionFormat.JSON);
+
+            AudioTranscription translation = client.getAudioTranslation(modelId, fileName, translationOptions);
+            assertAudioTranscriptionSimpleJson(translation, "It's raining today.");
+        });
+    }
+
+    @ParameterizedTest(name = DISPLAY_NAME_WITH_ARGUMENTS)
+    @MethodSource("com.azure.ai.openai.TestUtils#getTestParameters")
+    public void testGetAudioTranslationVerboseJson(HttpClient httpClient, OpenAIServiceVersion serviceVersion) {
+        client = getNonAzureOpenAISyncClient(httpClient);
+
+        getAudioTranslationRunnerForNonAzure((modelId, fileName) -> {
+            byte[] file = BinaryData.fromFile(openTestResourceFile(fileName)).toBytes();
+            AudioTranslationOptions translationOptions = new AudioTranslationOptions(file);
+            translationOptions.setResponseFormat(AudioTranscriptionFormat.VERBOSE_JSON);
+
+            AudioTranscription translation = client.getAudioTranslation(modelId, fileName, translationOptions);
+            assertAudioTranscriptionVerboseJson(translation, "It's raining today.", AudioTaskLabel.TRANSLATE);
+        });
+    }
+
+    @ParameterizedTest(name = DISPLAY_NAME_WITH_ARGUMENTS)
+    @MethodSource("com.azure.ai.openai.TestUtils#getTestParameters")
+    public void testGetAudioTranslationTextPlain(HttpClient httpClient, OpenAIServiceVersion serviceVersion) {
+        client = getNonAzureOpenAISyncClient(httpClient);
+
+        getAudioTranslationRunnerForNonAzure((modelId, fileName) -> {
+            byte[] file = BinaryData.fromFile(openTestResourceFile(fileName)).toBytes();
+            AudioTranslationOptions translationOptions = new AudioTranslationOptions(file);
+            translationOptions.setResponseFormat(AudioTranscriptionFormat.TEXT);
+
+            String transcription = client.getAudioTranslationText(modelId, fileName, translationOptions);
+            assertEquals("It's raining today.\n", transcription);
+        });
+    }
+
+    @ParameterizedTest(name = DISPLAY_NAME_WITH_ARGUMENTS)
+    @MethodSource("com.azure.ai.openai.TestUtils#getTestParameters")
+    public void testGetAudioTranslationSrt(HttpClient httpClient, OpenAIServiceVersion serviceVersion) {
+        client = getNonAzureOpenAISyncClient(httpClient);
+
+        getAudioTranslationRunnerForNonAzure((modelId, fileName) -> {
+            byte[] file = BinaryData.fromFile(openTestResourceFile(fileName)).toBytes();
+            AudioTranslationOptions translationOptions = new AudioTranslationOptions(file);
+            translationOptions.setResponseFormat(AudioTranscriptionFormat.SRT);
+
+            String transcription = client.getAudioTranslationText(modelId, fileName, translationOptions);
+            // Sequence number
+            assertTrue(transcription.contains("1\n"));
+            // First sequence starts at timestamp 0
+            assertTrue(transcription.contains("00:00:00,000 --> "));
+            // Actual translation value
+            assertTrue(transcription.contains("It's raining today."));
+        });
+    }
+
+    @ParameterizedTest(name = DISPLAY_NAME_WITH_ARGUMENTS)
+    @MethodSource("com.azure.ai.openai.TestUtils#getTestParameters")
+    public void testGetAudioTranslationVtt(HttpClient httpClient, OpenAIServiceVersion serviceVersion) {
+        client = getNonAzureOpenAISyncClient(httpClient);
+
+        getAudioTranslationRunnerForNonAzure((modelId, fileName) -> {
+            byte[] file = BinaryData.fromFile(openTestResourceFile(fileName)).toBytes();
+            AudioTranslationOptions translationOptions = new AudioTranslationOptions(file);
+            translationOptions.setResponseFormat(AudioTranscriptionFormat.VTT);
+
+            String transcription = client.getAudioTranslationText(modelId, fileName, translationOptions);
+            // Start value according to spec
+            assertTrue(transcription.startsWith("WEBVTT\n"));
+            // First sequence starts at timestamp 0. Note: unlike SRT, the millisecond separator is a "."
+            assertTrue(transcription.contains("00:00:00.000 --> "));
+            // Actual translation value
+            assertTrue(transcription.contains("It's raining today."));
+        });
+    }
+
+    @ParameterizedTest(name = DISPLAY_NAME_WITH_ARGUMENTS)
+    @MethodSource("com.azure.ai.openai.TestUtils#getTestParameters")
+    public void testGetAudioTranslationTextWrongFormats(HttpClient httpClient, OpenAIServiceVersion serviceVersion) {
+        client = getNonAzureOpenAISyncClient(httpClient);
+        List<AudioTranscriptionFormat> wrongFormats = Arrays.asList(
+                AudioTranscriptionFormat.JSON,
+                AudioTranscriptionFormat.VERBOSE_JSON
+        );
+
+        getAudioTranslationRunnerForNonAzure((modelId, fileName) -> {
+            byte[] file = BinaryData.fromFile(openTestResourceFile(fileName)).toBytes();
+            AudioTranslationOptions translationOptions = new AudioTranslationOptions(file);
+
+            for (AudioTranscriptionFormat format: wrongFormats) {
+                translationOptions.setResponseFormat(format);
+                assertThrows(IllegalArgumentException.class, () -> {
+                    client.getAudioTranslationText(modelId, fileName, translationOptions);
+                });
+            }
+        });
+    }
+
+    @ParameterizedTest(name = DISPLAY_NAME_WITH_ARGUMENTS)
+    @MethodSource("com.azure.ai.openai.TestUtils#getTestParameters")
+    public void testGetAudioTranslationJsonWrongFormats(HttpClient httpClient, OpenAIServiceVersion serviceVersion) {
+        client = getNonAzureOpenAISyncClient(httpClient);
+        List<AudioTranscriptionFormat> wrongFormats = Arrays.asList(
+                AudioTranscriptionFormat.TEXT,
+                AudioTranscriptionFormat.SRT,
+                AudioTranscriptionFormat.VTT
+        );
+
+        getAudioTranslationRunnerForNonAzure((modelId, fileName) -> {
+            byte[] file = BinaryData.fromFile(openTestResourceFile(fileName)).toBytes();
+            AudioTranslationOptions translationOptions = new AudioTranslationOptions(file);
+
+            for (AudioTranscriptionFormat format: wrongFormats) {
+                translationOptions.setResponseFormat(format);
+                assertThrows(IllegalArgumentException.class, () -> {
+                    client.getAudioTranslation(modelId, fileName, translationOptions);
+                });
+            }
+        });
+    }
 }
diff --git a/sdk/openai/azure-ai-openai/src/test/java/com/azure/ai/openai/OpenAIAsyncClientTest.java b/sdk/openai/azure-ai-openai/src/test/java/com/azure/ai/openai/OpenAIAsyncClientTest.java
index 0c99aa4a6fb4..44987bd6c7c4 100644
--- a/sdk/openai/azure-ai-openai/src/test/java/com/azure/ai/openai/OpenAIAsyncClientTest.java
+++ b/sdk/openai/azure-ai-openai/src/test/java/com/azure/ai/openai/OpenAIAsyncClientTest.java
@@ -4,6 +4,10 @@
 package com.azure.ai.openai;
 
 import com.azure.ai.openai.functions.MyFunctionCallArguments;
+import com.azure.ai.openai.models.AudioTaskLabel;
+import com.azure.ai.openai.models.AudioTranscriptionFormat;
+import com.azure.ai.openai.models.AudioTranscriptionOptions;
+import com.azure.ai.openai.models.AudioTranslationOptions;
 import com.azure.ai.openai.models.AzureChatExtensionConfiguration;
 import com.azure.ai.openai.models.AzureChatExtensionType;
 import com.azure.ai.openai.models.AzureCognitiveSearchChatExtensionConfiguration;
@@ -31,13 +35,10 @@
 import java.util.ArrayList;
 import java.util.Arrays;
 import java.util.Iterator;
+import java.util.List;
 
 import static com.azure.ai.openai.TestUtils.DISPLAY_NAME_WITH_ARGUMENTS;
-import static org.junit.jupiter.api.Assertions.assertEquals;
-import static org.junit.jupiter.api.Assertions.assertInstanceOf;
-import static org.junit.jupiter.api.Assertions.assertNotNull;
-import static org.junit.jupiter.api.Assertions.assertNull;
-import static org.junit.jupiter.api.Assertions.assertTrue;
+import static org.junit.jupiter.api.Assertions.*;
 
 public class OpenAIAsyncClientTest extends OpenAIClientTestBase {
     private OpenAIAsyncClient client;
@@ -294,7 +295,7 @@ public void testChatFunctionNotSuppliedByNamePreset(HttpClient httpClient, OpenA
     @ParameterizedTest(name = DISPLAY_NAME_WITH_ARGUMENTS)
     @MethodSource("com.azure.ai.openai.TestUtils#getTestParameters")
     public void testChatCompletionContentFiltering(HttpClient httpClient, OpenAIServiceVersion serviceVersion) {
-        client = getOpenAIAsyncClient(httpClient, serviceVersion);
+        client = getOpenAIAsyncClient(httpClient, OpenAIServiceVersion.V2023_08_01_PREVIEW);
         getChatCompletionsContentFilterRunner((modelId, chatMessages) -> {
             StepVerifier.create(client.getChatCompletions(modelId, new ChatCompletionsOptions(chatMessages)))
                 .assertNext(chatCompletions -> {
@@ -310,7 +311,7 @@ public void testChatCompletionContentFiltering(HttpClient httpClient, OpenAIServ
     @ParameterizedTest(name = DISPLAY_NAME_WITH_ARGUMENTS)
     @MethodSource("com.azure.ai.openai.TestUtils#getTestParameters")
     public void testChatCompletionStreamContentFiltering(HttpClient httpClient, OpenAIServiceVersion serviceVersion) {
-        client = getOpenAIAsyncClient(httpClient, serviceVersion);
+        client = getOpenAIAsyncClient(httpClient, OpenAIServiceVersion.V2023_08_01_PREVIEW);
         getChatCompletionsContentFilterRunner((modelId, chatMessages) -> {
             StepVerifier.create(client.getChatCompletionsStream(modelId, new ChatCompletionsOptions(chatMessages)))
                 .recordWith(ArrayList::new)
@@ -362,7 +363,7 @@ public void testChatCompletionStreamContentFiltering(HttpClient httpClient, Open
     @ParameterizedTest(name = DISPLAY_NAME_WITH_ARGUMENTS)
     @MethodSource("com.azure.ai.openai.TestUtils#getTestParameters")
     public void testCompletionContentFiltering(HttpClient httpClient, OpenAIServiceVersion serviceVersion) {
-        client = getOpenAIAsyncClient(httpClient, serviceVersion);
+        client = getOpenAIAsyncClient(httpClient, OpenAIServiceVersion.V2023_08_01_PREVIEW);
         getCompletionsContentFilterRunner((modelId, prompt) -> {
             CompletionsOptions completionsOptions = new CompletionsOptions(Arrays.asList(prompt));
             // work around for this model, there seem to be some issues with Completions in gpt-turbo models
@@ -380,7 +381,7 @@ public void testCompletionContentFiltering(HttpClient httpClient, OpenAIServiceV
     @ParameterizedTest(name = DISPLAY_NAME_WITH_ARGUMENTS)
     @MethodSource("com.azure.ai.openai.TestUtils#getTestParameters")
     public void testCompletionStreamContentFiltering(HttpClient httpClient, OpenAIServiceVersion serviceVersion) {
-        client = getOpenAIAsyncClient(httpClient, serviceVersion);
+        client = getOpenAIAsyncClient(httpClient, OpenAIServiceVersion.V2023_08_01_PREVIEW);
         getCompletionsContentFilterRunner((modelId, prompt) -> {
             CompletionsOptions completionsOptions = new CompletionsOptions(Arrays.asList(prompt));
             // work around for this model, there seem to be some issues with Completions in gpt-turbo models
@@ -427,7 +428,7 @@ public void testCompletionStreamContentFiltering(HttpClient httpClient, OpenAISe
     @ParameterizedTest(name = DISPLAY_NAME_WITH_ARGUMENTS)
     @MethodSource("com.azure.ai.openai.TestUtils#getTestParameters")
     public void testChatCompletionsBasicSearchExtension(HttpClient httpClient, OpenAIServiceVersion serviceVersion) {
-        client = getOpenAIAsyncClient(httpClient, serviceVersion);
+        client = getOpenAIAsyncClient(httpClient, OpenAIServiceVersion.V2023_08_01_PREVIEW);
 
         getChatCompletionsAzureChatSearchRunner((deploymentName, chatCompletionsOptions) -> {
             AzureCognitiveSearchChatExtensionConfiguration cognitiveSearchConfiguration =
@@ -452,7 +453,7 @@ public void testChatCompletionsBasicSearchExtension(HttpClient httpClient, OpenA
     @ParameterizedTest(name = DISPLAY_NAME_WITH_ARGUMENTS)
     @MethodSource("com.azure.ai.openai.TestUtils#getTestParameters")
     public void testChatCompletionsStreamingBasicSearchExtension(HttpClient httpClient, OpenAIServiceVersion serviceVersion) {
-        client = getOpenAIAsyncClient(httpClient, serviceVersion);
+        client = getOpenAIAsyncClient(httpClient, OpenAIServiceVersion.V2023_08_01_PREVIEW);
 
         getChatCompletionsAzureChatSearchRunner((deploymentName, chatCompletionsOptions) -> {
             AzureCognitiveSearchChatExtensionConfiguration cognitiveSearchConfiguration =
@@ -475,4 +476,281 @@ public void testChatCompletionsStreamingBasicSearchExtension(HttpClient httpClie
                 .verifyComplete();
         });
     }
+
+    @ParameterizedTest(name = DISPLAY_NAME_WITH_ARGUMENTS)
+    @MethodSource("com.azure.ai.openai.TestUtils#getTestParameters")
+    public void testGetAudioTranscriptionJson(HttpClient httpClient, OpenAIServiceVersion serviceVersion) {
+        client = getOpenAIAsyncClient(httpClient, serviceVersion);
+
+        getAudioTranscriptionRunner((deploymentName, fileName) -> {
+            byte[] file = BinaryData.fromFile(openTestResourceFile(fileName)).toBytes();
+            AudioTranscriptionOptions transcriptionOptions = new AudioTranscriptionOptions(file);
+            transcriptionOptions.setResponseFormat(AudioTranscriptionFormat.JSON);
+
+            StepVerifier.create(client.getAudioTranscription(deploymentName, fileName, transcriptionOptions))
+                .assertNext(transcription ->
+                    assertAudioTranscriptionSimpleJson(transcription, BATMAN_TRANSCRIPTION))
+                .verifyComplete();
+        });
+    }
+
+    @ParameterizedTest(name = DISPLAY_NAME_WITH_ARGUMENTS)
+    @MethodSource("com.azure.ai.openai.TestUtils#getTestParameters")
+    public void testGetAudioTranscriptionVerboseJson(HttpClient httpClient, OpenAIServiceVersion serviceVersion) {
+        client = getOpenAIAsyncClient(httpClient, serviceVersion);
+
+        getAudioTranscriptionRunner((deploymentName, fileName) -> {
+            byte[] file = BinaryData.fromFile(openTestResourceFile(fileName)).toBytes();
+            AudioTranscriptionOptions transcriptionOptions = new AudioTranscriptionOptions(file);
+            transcriptionOptions.setResponseFormat(AudioTranscriptionFormat.VERBOSE_JSON);
+
+            StepVerifier.create(client.getAudioTranscription(deploymentName, fileName, transcriptionOptions))
+                .assertNext(transcription ->
+                    assertAudioTranscriptionVerboseJson(transcription, BATMAN_TRANSCRIPTION, AudioTaskLabel.TRANSCRIBE))
+                .verifyComplete();
+        });
+    }
+
+    @ParameterizedTest(name = DISPLAY_NAME_WITH_ARGUMENTS)
+    @MethodSource("com.azure.ai.openai.TestUtils#getTestParameters")
+    public void testGetAudioTranscriptionTextPlain(HttpClient httpClient, OpenAIServiceVersion serviceVersion) {
+        client = getOpenAIAsyncClient(httpClient, serviceVersion);
+
+        getAudioTranscriptionRunner((deploymentName, fileName) -> {
+            byte[] file = BinaryData.fromFile(openTestResourceFile(fileName)).toBytes();
+            AudioTranscriptionOptions transcriptionOptions = new AudioTranscriptionOptions(file);
+            transcriptionOptions.setResponseFormat(AudioTranscriptionFormat.TEXT);
+
+            StepVerifier.create(client.getAudioTranscriptionText(deploymentName, fileName, transcriptionOptions))
+                .assertNext(transcription ->
+                    // A plain/text request adds a line break as an artifact. Also observed for translations
+                    assertEquals(BATMAN_TRANSCRIPTION + "\n", transcription))
+                .verifyComplete();
+        });
+    }
+
+    @ParameterizedTest(name = DISPLAY_NAME_WITH_ARGUMENTS)
+    @MethodSource("com.azure.ai.openai.TestUtils#getTestParameters")
+    public void testGetAudioTranscriptionSrt(HttpClient httpClient, OpenAIServiceVersion serviceVersion) {
+        client = getOpenAIAsyncClient(httpClient, serviceVersion);
+
+        getAudioTranscriptionRunner((deploymentName, fileName) -> {
+            byte[] file = BinaryData.fromFile(openTestResourceFile(fileName)).toBytes();
+            AudioTranscriptionOptions transcriptionOptions = new AudioTranscriptionOptions(file);
+            transcriptionOptions.setResponseFormat(AudioTranscriptionFormat.SRT);
+
+            StepVerifier.create(client.getAudioTranscriptionText(deploymentName, fileName, transcriptionOptions))
+                    .assertNext(translation -> {
+                        // 1st Sequence number
+                        assertTrue(translation.contains("1\n"));
+                        // First sequence starts at timestamp 0
+                        assertTrue(translation.contains("00:00:00,000 --> "));
+                        // Transcription contains at least one expected word
+                        assertTrue(translation.contains("Batman"));
+                    }).verifyComplete();
+        });
+    }
+
+    @ParameterizedTest(name = DISPLAY_NAME_WITH_ARGUMENTS)
+    @MethodSource("com.azure.ai.openai.TestUtils#getTestParameters")
+    public void testGetAudioTranscriptionVtt(HttpClient httpClient, OpenAIServiceVersion serviceVersion) {
+        client = getOpenAIAsyncClient(httpClient, serviceVersion);
+
+        getAudioTranscriptionRunner((deploymentName, fileName) -> {
+            byte[] file = BinaryData.fromFile(openTestResourceFile(fileName)).toBytes();
+            AudioTranscriptionOptions transcriptionOptions = new AudioTranscriptionOptions(file);
+            transcriptionOptions.setResponseFormat(AudioTranscriptionFormat.VTT);
+
+            StepVerifier.create(client.getAudioTranscriptionText(deploymentName, fileName, transcriptionOptions))
+                    .assertNext(translation -> {
+                        // Start value according to spec
+                        assertTrue(translation.startsWith("WEBVTT\n"));
+                        // First sequence starts at timestamp 0. Note: unlike SRT, the millisecond separator is a "."
+                        assertTrue(translation.contains("00:00:00.000 --> "));
+                        // Transcription contains at least one expected word
+                        assertTrue(translation.contains("Batman"));
+                    }).verifyComplete();
+        });
+    }
+
+    @ParameterizedTest(name = DISPLAY_NAME_WITH_ARGUMENTS)
+    @MethodSource("com.azure.ai.openai.TestUtils#getTestParameters")
+    public void testGetAudioTranscriptionTextWrongFormats(HttpClient httpClient, OpenAIServiceVersion serviceVersion) {
+        client = getOpenAIAsyncClient(httpClient, serviceVersion);
+        List<AudioTranscriptionFormat> wrongFormats = Arrays.asList(
+                AudioTranscriptionFormat.JSON,
+                AudioTranscriptionFormat.VERBOSE_JSON
+        );
+
+        getAudioTranscriptionRunner((deploymentName, fileName) -> {
+            byte[] file = BinaryData.fromFile(openTestResourceFile(fileName)).toBytes();
+            AudioTranscriptionOptions transcriptionOptions = new AudioTranscriptionOptions(file);
+
+            for (AudioTranscriptionFormat format: wrongFormats) {
+                transcriptionOptions.setResponseFormat(format);
+                StepVerifier.create(client.getAudioTranscriptionText(deploymentName, fileName, transcriptionOptions))
+                        .verifyErrorSatisfies(error -> assertTrue(error instanceof IllegalArgumentException));
+            }
+        });
+    }
+
+    @ParameterizedTest(name = DISPLAY_NAME_WITH_ARGUMENTS)
+    @MethodSource("com.azure.ai.openai.TestUtils#getTestParameters")
+    public void testGetAudioTranscriptionJsonWrongFormats(HttpClient httpClient, OpenAIServiceVersion serviceVersion) {
+        client = getOpenAIAsyncClient(httpClient, serviceVersion);
+        List<AudioTranscriptionFormat> wrongFormats = Arrays.asList(
+                AudioTranscriptionFormat.TEXT,
+                AudioTranscriptionFormat.SRT,
+                AudioTranscriptionFormat.VTT
+        );
+
+        getAudioTranscriptionRunner((deploymentName, fileName) -> {
+            byte[] file = BinaryData.fromFile(openTestResourceFile(fileName)).toBytes();
+            AudioTranscriptionOptions transcriptionOptions = new AudioTranscriptionOptions(file);
+
+            for (AudioTranscriptionFormat format: wrongFormats) {
+                transcriptionOptions.setResponseFormat(format);
+                StepVerifier.create(client.getAudioTranscription(deploymentName, fileName, transcriptionOptions))
+                        .verifyErrorSatisfies(error -> assertTrue(error instanceof IllegalArgumentException));
+            }
+        });
+    }
+
+    @ParameterizedTest(name = DISPLAY_NAME_WITH_ARGUMENTS)
+    @MethodSource("com.azure.ai.openai.TestUtils#getTestParameters")
+    public void testGetAudioTranslationJson(HttpClient httpClient, OpenAIServiceVersion serviceVersion) {
+        client = getOpenAIAsyncClient(httpClient, serviceVersion);
+
+        getAudioTranslationRunner((deploymentName, fileName) -> {
+            byte[] file = BinaryData.fromFile(openTestResourceFile(fileName)).toBytes();
+            AudioTranslationOptions translationOptions = new AudioTranslationOptions(file);
+            translationOptions.setResponseFormat(AudioTranscriptionFormat.JSON);
+
+            StepVerifier.create(client.getAudioTranslation(deploymentName, fileName, translationOptions))
+                .assertNext(translation ->
+                    assertAudioTranscriptionSimpleJson(translation, "It's raining today."))
+                .verifyComplete();
+        });
+    }
+
+    @ParameterizedTest(name = DISPLAY_NAME_WITH_ARGUMENTS)
+    @MethodSource("com.azure.ai.openai.TestUtils#getTestParameters")
+    public void testGetAudioTranslationVerboseJson(HttpClient httpClient, OpenAIServiceVersion serviceVersion) {
+        client = getOpenAIAsyncClient(httpClient, serviceVersion);
+
+        getAudioTranslationRunner((deploymentName, fileName) -> {
+            byte[] file = BinaryData.fromFile(openTestResourceFile(fileName)).toBytes();
+            AudioTranslationOptions translationOptions = new AudioTranslationOptions(file);
+            translationOptions.setResponseFormat(AudioTranscriptionFormat.VERBOSE_JSON);
+
+            StepVerifier.create(client.getAudioTranslation(deploymentName, fileName, translationOptions))
+                .assertNext(translation ->
+                    assertAudioTranscriptionVerboseJson(translation, "It's raining today.", AudioTaskLabel.TRANSLATE))
+                .verifyComplete();
+        });
+    }
+
+    @ParameterizedTest(name = DISPLAY_NAME_WITH_ARGUMENTS)
+    @MethodSource("com.azure.ai.openai.TestUtils#getTestParameters")
+    public void testGetAudioTranslationTextPlain(HttpClient httpClient, OpenAIServiceVersion serviceVersion) {
+        client = getOpenAIAsyncClient(httpClient, serviceVersion);
+
+        getAudioTranslationRunner((deploymentName, fileName) -> {
+            byte[] file = BinaryData.fromFile(openTestResourceFile(fileName)).toBytes();
+            AudioTranslationOptions translationOptions = new AudioTranslationOptions(file);
+            translationOptions.setResponseFormat(AudioTranscriptionFormat.TEXT);
+
+            StepVerifier.create(client.getAudioTranslationText(deploymentName, fileName, translationOptions))
+                .assertNext(translation -> {
+                    assertEquals("It's raining today.\n", translation);
+                }).verifyComplete();
+        });
+    }
+
+    @ParameterizedTest(name = DISPLAY_NAME_WITH_ARGUMENTS)
+    @MethodSource("com.azure.ai.openai.TestUtils#getTestParameters")
+    public void testGetAudioTranslationSrt(HttpClient httpClient, OpenAIServiceVersion serviceVersion) {
+        client = getOpenAIAsyncClient(httpClient, serviceVersion);
+
+        getAudioTranslationRunner((deploymentName, fileName) -> {
+            byte[] file = BinaryData.fromFile(openTestResourceFile(fileName)).toBytes();
+            AudioTranslationOptions translationOptions = new AudioTranslationOptions(file);
+            translationOptions.setResponseFormat(AudioTranscriptionFormat.SRT);
+
+            StepVerifier.create(client.getAudioTranslationText(deploymentName, fileName, translationOptions))
+                .assertNext(translation -> {
+                    // Sequence number
+                    assertTrue(translation.contains("1\n"));
+                    // First sequence starts at timestamp 0
+                    assertTrue(translation.contains("00:00:00,000 --> "));
+                    // Actual translation value
+                    assertTrue(translation.contains("It's raining today."));
+                }).verifyComplete();
+        });
+    }
+
+    @ParameterizedTest(name = DISPLAY_NAME_WITH_ARGUMENTS)
+    @MethodSource("com.azure.ai.openai.TestUtils#getTestParameters")
+    public void testGetAudioTranslationVtt(HttpClient httpClient, OpenAIServiceVersion serviceVersion) {
+        client = getOpenAIAsyncClient(httpClient, serviceVersion);
+
+        getAudioTranslationRunner((deploymentName, fileName) -> {
+            byte[] file = BinaryData.fromFile(openTestResourceFile(fileName)).toBytes();
+            AudioTranslationOptions translationOptions = new AudioTranslationOptions(file);
+            translationOptions.setResponseFormat(AudioTranscriptionFormat.VTT);
+
+            StepVerifier.create(client.getAudioTranslationText(deploymentName, fileName, translationOptions))
+                .assertNext(translation -> {
+                    // Start value according to spec
+                    assertTrue(translation.startsWith("WEBVTT\n"));
+                    // First sequence starts at timestamp 0. Note: unlike SRT, the millisecond separator is a "."
+                    assertTrue(translation.contains("00:00:00.000 --> "));
+                    // Actual translation value
+                    assertTrue(translation.contains("It's raining today."));
+                }).verifyComplete();
+        });
+    }
+
+    @ParameterizedTest(name = DISPLAY_NAME_WITH_ARGUMENTS)
+    @MethodSource("com.azure.ai.openai.TestUtils#getTestParameters")
+    public void testGetAudioTranslationTextWrongFormats(HttpClient httpClient, OpenAIServiceVersion serviceVersion) {
+        client = getOpenAIAsyncClient(httpClient, serviceVersion);
+        List<AudioTranscriptionFormat> wrongFormats = Arrays.asList(
+                AudioTranscriptionFormat.JSON,
+                AudioTranscriptionFormat.VERBOSE_JSON
+        );
+
+        getAudioTranslationRunner((deploymentName, fileName) -> {
+            byte[] file = BinaryData.fromFile(openTestResourceFile(fileName)).toBytes();
+            AudioTranslationOptions translationOptions = new AudioTranslationOptions(file);
+
+            for (AudioTranscriptionFormat format: wrongFormats) {
+                translationOptions.setResponseFormat(format);
+                StepVerifier.create(client.getAudioTranslationText(deploymentName, fileName, translationOptions))
+                                .verifyErrorSatisfies(error -> assertTrue(error instanceof IllegalArgumentException));
+            }
+        });
+    }
+
+    @ParameterizedTest(name = DISPLAY_NAME_WITH_ARGUMENTS)
+    @MethodSource("com.azure.ai.openai.TestUtils#getTestParameters")
+    public void testGetAudioTranslationJsonWrongFormats(HttpClient httpClient, OpenAIServiceVersion serviceVersion) {
+        client = getOpenAIAsyncClient(httpClient, serviceVersion);
+        List<AudioTranscriptionFormat> wrongFormats = Arrays.asList(
+                AudioTranscriptionFormat.TEXT,
+                AudioTranscriptionFormat.SRT,
+                AudioTranscriptionFormat.VTT
+        );
+
+        getAudioTranslationRunner((deploymentName, fileName) -> {
+            byte[] file = BinaryData.fromFile(openTestResourceFile(fileName)).toBytes();
+            AudioTranslationOptions translationOptions = new AudioTranslationOptions(file);
+
+            for (AudioTranscriptionFormat format: wrongFormats) {
+                translationOptions.setResponseFormat(format);
+                StepVerifier.create(client.getAudioTranslation(deploymentName, fileName, translationOptions))
+                        .verifyErrorSatisfies(error -> assertTrue(error instanceof IllegalArgumentException));
+            }
+        });
+    }
 }
diff --git a/sdk/openai/azure-ai-openai/src/test/java/com/azure/ai/openai/OpenAIClientTestBase.java b/sdk/openai/azure-ai-openai/src/test/java/com/azure/ai/openai/OpenAIClientTestBase.java
index 9b8cb0014cd0..a3364ee90bdc 100644
--- a/sdk/openai/azure-ai-openai/src/test/java/com/azure/ai/openai/OpenAIClientTestBase.java
+++ b/sdk/openai/azure-ai-openai/src/test/java/com/azure/ai/openai/OpenAIClientTestBase.java
@@ -5,6 +5,8 @@
 package com.azure.ai.openai;
 
 import com.azure.ai.openai.functions.Parameters;
+import com.azure.ai.openai.models.AudioTaskLabel;
+import com.azure.ai.openai.models.AudioTranscription;
 import com.azure.ai.openai.models.AzureChatExtensionsMessageContext;
 import com.azure.ai.openai.models.ChatChoice;
 import com.azure.ai.openai.models.ChatCompletions;
@@ -26,6 +28,8 @@
 import com.azure.core.credential.AzureKeyCredential;
 import com.azure.core.credential.KeyCredential;
 import com.azure.core.http.HttpClient;
+import com.azure.core.http.policy.HttpLogDetailLevel;
+import com.azure.core.http.policy.HttpLogOptions;
 import com.azure.core.http.rest.Response;
 import com.azure.core.test.TestMode;
 import com.azure.core.test.TestProxyTestBase;
@@ -35,6 +39,8 @@
 import com.azure.core.util.Configuration;
 import org.junit.jupiter.api.Test;
 
+import java.nio.file.Path;
+import java.nio.file.Paths;
 import java.util.ArrayList;
 import java.util.Arrays;
 import java.util.List;
@@ -55,7 +61,7 @@ public abstract class OpenAIClientTestBase extends TestProxyTestBase {
 
     OpenAIClientBuilder getOpenAIClientBuilder(HttpClient httpClient, OpenAIServiceVersion serviceVersion) {
         OpenAIClientBuilder builder = new OpenAIClientBuilder()
-//            .httpLogOptions(new HttpLogOptions().setLogLevel(HttpLogDetailLevel.BODY_AND_HEADERS))
+            .httpLogOptions(new HttpLogOptions().setLogLevel(HttpLogDetailLevel.BODY_AND_HEADERS))
             .httpClient(httpClient)
             .serviceVersion(serviceVersion);
 
@@ -122,7 +128,6 @@ protected String getAzureCognitiveSearchKey() {
         }
     }
 
-
     @Test
     public abstract void testGetCompletions(HttpClient httpClient, OpenAIServiceVersion serviceVersion);
 
@@ -206,6 +211,22 @@ void getCompletionsContentFilterRunnerForNonAzure(BiConsumer<String, String> tes
         testRunner.accept("text-davinci-002", "What is 3 times 4?");
     }
 
+    void getAudioTranscriptionRunner(BiConsumer<String, String> testRunner) {
+        testRunner.accept("whisper-deployment", "batman.wav");
+    }
+
+    void getAudioTranslationRunner(BiConsumer<String, String> testRunner) {
+        testRunner.accept("whisper-deployment", "JP_it_is_rainy_today.wav");
+    }
+
+    void getAudioTranscriptionRunnerForNonAzure(BiConsumer<String, String> testRunner) {
+        testRunner.accept("whisper-1", "batman.wav");
+    }
+
+    void getAudioTranslationRunnerForNonAzure(BiConsumer<String, String> testRunner) {
+        testRunner.accept("whisper-1", "JP_it_is_rainy_today.wav");
+    }
+
     private List<ChatMessage> getChatMessages() {
         List<ChatMessage> chatMessages = new ArrayList<>();
         chatMessages.add(new ChatMessage(ChatRole.SYSTEM, "You are a helpful assistant. You will talk like a pirate."));
@@ -229,6 +250,10 @@ private ChatCompletionsOptions getChatMessagesWithFunction() {
         return chatCompletionOptions;
     }
 
+    static Path openTestResourceFile(String fileName) {
+        return Paths.get("src/test/resources/" + fileName);
+    }
+
     static void assertCompletions(int choicesPerPrompt, Completions actual) {
         assertCompletions(choicesPerPrompt, "stop", actual);
     }
@@ -413,4 +438,42 @@ static void assertChatCompletionsStreamingCognitiveSearch(Stream<ChatCompletions
             }
         }
     }
+
+    static void assertAudioTranscriptionSimpleJson(AudioTranscription transcription, String expectedText) {
+        assertNotNull(transcription);
+        assertEquals(expectedText, transcription.getText());
+        assertNull(transcription.getDuration());
+        assertNull(transcription.getLanguage());
+        assertNull(transcription.getTask());
+        assertNull(transcription.getSegments());
+    }
+
+    static void assertAudioTranscriptionVerboseJson(AudioTranscription transcription, String expectedText, AudioTaskLabel audioTaskLabel) {
+        assertNotNull(transcription);
+        assertEquals(expectedText, transcription.getText());
+        assertNotNull(transcription.getDuration());
+        assertNotNull(transcription.getLanguage());
+        assertEquals(audioTaskLabel, transcription.getTask());
+        assertNotNull(transcription.getSegments());
+        assertFalse(transcription.getSegments().isEmpty());
+    }
+
+    protected static final String BATMAN_TRANSCRIPTION =
+            "Skills and Abilities. Batman has no inherent superpowers. He relies on his own "
+            + "scientific knowledge, detective skills, and athletic prowess. In the stories, Batman is "
+            + "regarded as one of the world's greatest detectives, if not the world's greatest "
+            + "crime solver. Batman has been repeatedly described as having genius-level intellect, one of"
+            + " the greatest martial artists in the DC universe, and having peak human physical "
+            + "conditioning. He has traveled the world acquiring the skills needed to aid his crusade "
+            + "against crime. His knowledge and expertise in almost every discipline known to man is nearly "
+            + "unparalleled by any other character in the universe. Batman's inexhaustible wealth allows "
+            + "him to access advanced technology. As a proficient scientist, he is able to use and modify "
+            + "those technologies to his advantage. Batman describes Superman as the most dangerous man on "
+            + "earth, able to defeat a team of super-powered extraterrestrials by himself in order to "
+            + "rescue his imprisoned teammates in the first storyline. Superman also considers Batman "
+            + "to be one of the most brilliant minds on the planet. Batman has the ability to function "
+            + "under great physical pain and withstand mind control. He is a master of disguise, multilingual, "
+            + "and an expert in espionage, often gathering information under different identities. "
+            + "Batman's karate, judo, and jujitsu training has made him a master of stealth and escape, "
+            + "allowing him to appear and disappear at will, and to break free from the chains of his past.";
 }
diff --git a/sdk/openai/azure-ai-openai/src/test/java/com/azure/ai/openai/OpenAISyncClientTest.java b/sdk/openai/azure-ai-openai/src/test/java/com/azure/ai/openai/OpenAISyncClientTest.java
index ba6d16c8538e..5d2160fcc0a5 100644
--- a/sdk/openai/azure-ai-openai/src/test/java/com/azure/ai/openai/OpenAISyncClientTest.java
+++ b/sdk/openai/azure-ai-openai/src/test/java/com/azure/ai/openai/OpenAISyncClientTest.java
@@ -3,6 +3,11 @@
 package com.azure.ai.openai;
 
 import com.azure.ai.openai.functions.MyFunctionCallArguments;
+import com.azure.ai.openai.models.AudioTaskLabel;
+import com.azure.ai.openai.models.AudioTranscription;
+import com.azure.ai.openai.models.AudioTranscriptionFormat;
+import com.azure.ai.openai.models.AudioTranscriptionOptions;
+import com.azure.ai.openai.models.AudioTranslationOptions;
 import com.azure.ai.openai.models.AzureChatExtensionConfiguration;
 import com.azure.ai.openai.models.AzureChatExtensionType;
 import com.azure.ai.openai.models.AzureCognitiveSearchChatExtensionConfiguration;
@@ -30,6 +35,7 @@
 
 import java.util.Arrays;
 import java.util.Iterator;
+import java.util.List;
 
 import static com.azure.ai.openai.TestUtils.DISPLAY_NAME_WITH_ARGUMENTS;
 import static org.junit.jupiter.api.Assertions.assertEquals;
@@ -247,7 +253,7 @@ public void testChatFunctionNotSuppliedByNamePreset(HttpClient httpClient, OpenA
     @ParameterizedTest(name = DISPLAY_NAME_WITH_ARGUMENTS)
     @MethodSource("com.azure.ai.openai.TestUtils#getTestParameters")
     public void testChatCompletionContentFiltering(HttpClient httpClient, OpenAIServiceVersion serviceVersion) {
-        client = getOpenAIClient(httpClient, serviceVersion);
+        client = getOpenAIClient(httpClient, OpenAIServiceVersion.V2023_08_01_PREVIEW);
         getChatCompletionsContentFilterRunner((modelId, chatMessages) -> {
             ChatCompletions chatCompletions = client.getChatCompletions(modelId, new ChatCompletionsOptions(chatMessages));
             assertSafeContentFilterResults(chatCompletions.getPromptFilterResults().get(0).getContentFilterResults());
@@ -260,7 +266,7 @@ public void testChatCompletionContentFiltering(HttpClient httpClient, OpenAIServ
     @ParameterizedTest(name = DISPLAY_NAME_WITH_ARGUMENTS)
     @MethodSource("com.azure.ai.openai.TestUtils#getTestParameters")
     public void testChatCompletionStreamContentFiltering(HttpClient httpClient, OpenAIServiceVersion serviceVersion) {
-        client = getOpenAIClient(httpClient, serviceVersion);
+        client = getOpenAIClient(httpClient, OpenAIServiceVersion.V2023_08_01_PREVIEW);
         getChatCompletionsContentFilterRunner((modelId, chatMessages) -> {
             IterableStream<ChatCompletions> messageList = client.getChatCompletionsStream(modelId, new ChatCompletionsOptions(chatMessages));
 
@@ -306,7 +312,7 @@ public void testChatCompletionStreamContentFiltering(HttpClient httpClient, Open
     @ParameterizedTest(name = DISPLAY_NAME_WITH_ARGUMENTS)
     @MethodSource("com.azure.ai.openai.TestUtils#getTestParameters")
     public void testCompletionContentFiltering(HttpClient httpClient, OpenAIServiceVersion serviceVersion) {
-        client = getOpenAIClient(httpClient, serviceVersion);
+        client = getOpenAIClient(httpClient, OpenAIServiceVersion.V2023_08_01_PREVIEW);
         getCompletionsContentFilterRunner((modelId, prompt) -> {
             CompletionsOptions completionsOptions = new CompletionsOptions(Arrays.asList(prompt));
             // work around for this model, there seem to be some issues with Completions in gpt-turbo models
@@ -358,7 +364,7 @@ public void testCompletionStreamContentFiltering(HttpClient httpClient, OpenAISe
     @ParameterizedTest(name = DISPLAY_NAME_WITH_ARGUMENTS)
     @MethodSource("com.azure.ai.openai.TestUtils#getTestParameters")
     public void testChatCompletionsBasicSearchExtension(HttpClient httpClient, OpenAIServiceVersion serviceVersion) {
-        client = getOpenAIClient(httpClient, serviceVersion);
+        client = getOpenAIClient(httpClient, OpenAIServiceVersion.V2023_08_01_PREVIEW);
 
         getChatCompletionsAzureChatSearchRunner((deploymentName, chatCompletionsOptions) -> {
             AzureCognitiveSearchChatExtensionConfiguration cognitiveSearchConfiguration =
@@ -382,7 +388,7 @@ public void testChatCompletionsBasicSearchExtension(HttpClient httpClient, OpenA
     @ParameterizedTest(name = DISPLAY_NAME_WITH_ARGUMENTS)
     @MethodSource("com.azure.ai.openai.TestUtils#getTestParameters")
     public void testChatCompletionsStreamingBasicSearchExtension(HttpClient httpClient, OpenAIServiceVersion serviceVersion) {
-        client = getOpenAIClient(httpClient, serviceVersion);
+        client = getOpenAIClient(httpClient, OpenAIServiceVersion.V2023_08_01_PREVIEW);
 
         getChatCompletionsAzureChatSearchRunner((deploymentName, chatCompletionsOptions) -> {
             AzureCognitiveSearchChatExtensionConfiguration cognitiveSearchConfiguration =
@@ -402,4 +408,263 @@ public void testChatCompletionsStreamingBasicSearchExtension(HttpClient httpClie
             assertChatCompletionsStreamingCognitiveSearch(resultChatCompletions.stream());
         });
     }
+
+    @ParameterizedTest(name = DISPLAY_NAME_WITH_ARGUMENTS)
+    @MethodSource("com.azure.ai.openai.TestUtils#getTestParameters")
+    public void testGetAudioTranscriptionJson(HttpClient httpClient, OpenAIServiceVersion serviceVersion) {
+        client = getOpenAIClient(httpClient, serviceVersion);
+
+        getAudioTranscriptionRunner((deploymentName, fileName) -> {
+            byte[] file = BinaryData.fromFile(openTestResourceFile(fileName)).toBytes();
+            AudioTranscriptionOptions transcriptionOptions = new AudioTranscriptionOptions(file);
+            transcriptionOptions.setResponseFormat(AudioTranscriptionFormat.JSON);
+
+            AudioTranscription transcription = client.getAudioTranscription(deploymentName, fileName, transcriptionOptions);
+            assertAudioTranscriptionSimpleJson(transcription, BATMAN_TRANSCRIPTION);
+        });
+    }
+
+    @ParameterizedTest(name = DISPLAY_NAME_WITH_ARGUMENTS)
+    @MethodSource("com.azure.ai.openai.TestUtils#getTestParameters")
+    public void testGetAudioTranscriptionVerboseJson(HttpClient httpClient, OpenAIServiceVersion serviceVersion) {
+        client = getOpenAIClient(httpClient, serviceVersion);
+
+        getAudioTranscriptionRunner((deploymentName, fileName) -> {
+            byte[] file = BinaryData.fromFile(openTestResourceFile(fileName)).toBytes();
+            AudioTranscriptionOptions transcriptionOptions = new AudioTranscriptionOptions(file);
+            transcriptionOptions.setResponseFormat(AudioTranscriptionFormat.VERBOSE_JSON);
+
+            AudioTranscription transcription = client.getAudioTranscription(deploymentName, fileName, transcriptionOptions);
+            assertAudioTranscriptionVerboseJson(transcription, BATMAN_TRANSCRIPTION, AudioTaskLabel.TRANSCRIBE);
+        });
+    }
+
+    @ParameterizedTest(name = DISPLAY_NAME_WITH_ARGUMENTS)
+    @MethodSource("com.azure.ai.openai.TestUtils#getTestParameters")
+    public void testGetAudioTranscriptionTextPlain(HttpClient httpClient, OpenAIServiceVersion serviceVersion) {
+        client = getOpenAIClient(httpClient, serviceVersion);
+
+        getAudioTranscriptionRunner((deploymentName, fileName) -> {
+            byte[] file = BinaryData.fromFile(openTestResourceFile(fileName)).toBytes();
+            AudioTranscriptionOptions transcriptionOptions = new AudioTranscriptionOptions(file);
+            transcriptionOptions.setResponseFormat(AudioTranscriptionFormat.TEXT);
+
+            String transcription = client.getAudioTranscriptionText(deploymentName, fileName, transcriptionOptions);
+            // A plain/text request adds a line break as an artifact. Also observed for translations
+            assertEquals(BATMAN_TRANSCRIPTION + "\n", transcription);
+        });
+    }
+
+    @ParameterizedTest(name = DISPLAY_NAME_WITH_ARGUMENTS)
+    @MethodSource("com.azure.ai.openai.TestUtils#getTestParameters")
+    public void testGetAudioTranscriptionSrt(HttpClient httpClient, OpenAIServiceVersion serviceVersion) {
+        client = getOpenAIClient(httpClient, serviceVersion);
+
+        getAudioTranscriptionRunner((deploymentName, fileName) -> {
+            byte[] file = BinaryData.fromFile(openTestResourceFile(fileName)).toBytes();
+            AudioTranscriptionOptions transcriptionOptions = new AudioTranscriptionOptions(file);
+            transcriptionOptions.setResponseFormat(AudioTranscriptionFormat.SRT);
+
+            String transcription = client.getAudioTranscriptionText(deploymentName, fileName, transcriptionOptions);
+            // Contains at least one sequence
+            assertTrue(transcription.contains("1\n"));
+            // First sequence starts at timestamp 0
+            assertTrue(transcription.contains("00:00:00,000 --> "));
+            // Contains at least one expected word
+            assertTrue(transcription.contains("Batman"));
+        });
+    }
+
+    @ParameterizedTest(name = DISPLAY_NAME_WITH_ARGUMENTS)
+    @MethodSource("com.azure.ai.openai.TestUtils#getTestParameters")
+    public void testGetAudioTranscriptionVtt(HttpClient httpClient, OpenAIServiceVersion serviceVersion) {
+        client = getOpenAIClient(httpClient, serviceVersion);
+
+        getAudioTranscriptionRunner((deploymentName, fileName) -> {
+            byte[] file = BinaryData.fromFile(openTestResourceFile(fileName)).toBytes();
+            AudioTranscriptionOptions transcriptionOptions = new AudioTranscriptionOptions(file);
+            transcriptionOptions.setResponseFormat(AudioTranscriptionFormat.VTT);
+
+            String transcription = client.getAudioTranscriptionText(deploymentName, fileName, transcriptionOptions);
+            // Start value according to spec
+            assertTrue(transcription.startsWith("WEBVTT\n"));
+            // First sequence starts at timestamp 0. Note: unlike SRT, the millisecond separator is a "."
+            assertTrue(transcription.contains("00:00:00.000 --> "));
+            // Contains at least one expected word in the transcription
+            assertTrue(transcription.contains("Batman"));
+        });
+    }
+
+    @ParameterizedTest(name = DISPLAY_NAME_WITH_ARGUMENTS)
+    @MethodSource("com.azure.ai.openai.TestUtils#getTestParameters")
+    public void testGetAudioTranscriptionTextWrongFormats(HttpClient httpClient, OpenAIServiceVersion serviceVersion) {
+        client = getOpenAIClient(httpClient, serviceVersion);
+        List<AudioTranscriptionFormat> wrongFormats = Arrays.asList(
+                AudioTranscriptionFormat.JSON,
+                AudioTranscriptionFormat.VERBOSE_JSON
+        );
+
+        getAudioTranscriptionRunner((deploymentName, fileName) -> {
+            byte[] file = BinaryData.fromFile(openTestResourceFile(fileName)).toBytes();
+            AudioTranscriptionOptions audioTranscriptionOptions = new AudioTranscriptionOptions(file);
+
+            for (AudioTranscriptionFormat format: wrongFormats) {
+                audioTranscriptionOptions.setResponseFormat(format);
+                assertThrows(IllegalArgumentException.class, () ->
+                        client.getAudioTranscriptionText(deploymentName, fileName, audioTranscriptionOptions));
+            }
+        });
+    }
+
+    @ParameterizedTest(name = DISPLAY_NAME_WITH_ARGUMENTS)
+    @MethodSource("com.azure.ai.openai.TestUtils#getTestParameters")
+    public void testGetAudioTranscriptionJsonWrongFormats(HttpClient httpClient, OpenAIServiceVersion serviceVersion) {
+        client = getOpenAIClient(httpClient, serviceVersion);
+        List<AudioTranscriptionFormat> wrongFormats = Arrays.asList(
+                AudioTranscriptionFormat.TEXT,
+                AudioTranscriptionFormat.SRT,
+                AudioTranscriptionFormat.VTT
+        );
+
+        getAudioTranscriptionRunner((deploymentName, fileName) -> {
+            byte[] file = BinaryData.fromFile(openTestResourceFile(fileName)).toBytes();
+            AudioTranscriptionOptions audioTranscriptionOptions = new AudioTranscriptionOptions(file);
+
+            for (AudioTranscriptionFormat format: wrongFormats) {
+                audioTranscriptionOptions.setResponseFormat(format);
+                assertThrows(IllegalArgumentException.class, () ->
+                        client.getAudioTranscription(deploymentName, fileName, audioTranscriptionOptions));
+            }
+        });
+    }
+
+    @ParameterizedTest(name = DISPLAY_NAME_WITH_ARGUMENTS)
+    @MethodSource("com.azure.ai.openai.TestUtils#getTestParameters")
+    public void testGetAudioTranslationJson(HttpClient httpClient, OpenAIServiceVersion serviceVersion) {
+        client = getOpenAIClient(httpClient, serviceVersion);
+
+        getAudioTranslationRunner((deploymentName, fileName) -> {
+            byte[] file = BinaryData.fromFile(openTestResourceFile(fileName)).toBytes();
+            AudioTranslationOptions translationOptions = new AudioTranslationOptions(file);
+            translationOptions.setResponseFormat(AudioTranscriptionFormat.JSON);
+
+            AudioTranscription translation = client.getAudioTranslation(deploymentName, fileName, translationOptions);
+            assertAudioTranscriptionSimpleJson(translation, "It's raining today.");
+        });
+    }
+
+    @ParameterizedTest(name = DISPLAY_NAME_WITH_ARGUMENTS)
+    @MethodSource("com.azure.ai.openai.TestUtils#getTestParameters")
+    public void testGetAudioTranslationVerboseJson(HttpClient httpClient, OpenAIServiceVersion serviceVersion) {
+        client = getOpenAIClient(httpClient, serviceVersion);
+
+        getAudioTranslationRunner((deploymentName, fileName) -> {
+            byte[] file = BinaryData.fromFile(openTestResourceFile(fileName)).toBytes();
+            AudioTranslationOptions translationOptions = new AudioTranslationOptions(file);
+            translationOptions.setResponseFormat(AudioTranscriptionFormat.VERBOSE_JSON);
+
+            AudioTranscription translation = client.getAudioTranslation(deploymentName, fileName, translationOptions);
+            assertAudioTranscriptionVerboseJson(translation, "It's raining today.", AudioTaskLabel.TRANSLATE);
+        });
+    }
+
+    @ParameterizedTest(name = DISPLAY_NAME_WITH_ARGUMENTS)
+    @MethodSource("com.azure.ai.openai.TestUtils#getTestParameters")
+    public void testGetAudioTranslationTextPlain(HttpClient httpClient, OpenAIServiceVersion serviceVersion) {
+        client = getOpenAIClient(httpClient, serviceVersion);
+
+        getAudioTranslationRunner((deploymentName, fileName) -> {
+            byte[] file = BinaryData.fromFile(openTestResourceFile(fileName)).toBytes();
+            AudioTranslationOptions translationOptions = new AudioTranslationOptions(file);
+            translationOptions.setResponseFormat(AudioTranscriptionFormat.TEXT);
+
+            String transcription = client.getAudioTranslationText(deploymentName, fileName, translationOptions);
+            assertEquals("It's raining today.\n", transcription);
+        });
+    }
+
+    @ParameterizedTest(name = DISPLAY_NAME_WITH_ARGUMENTS)
+    @MethodSource("com.azure.ai.openai.TestUtils#getTestParameters")
+    public void testGetAudioTranslationSrt(HttpClient httpClient, OpenAIServiceVersion serviceVersion) {
+        client = getOpenAIClient(httpClient, serviceVersion);
+
+        getAudioTranslationRunner((deploymentName, fileName) -> {
+            byte[] file = BinaryData.fromFile(openTestResourceFile(fileName)).toBytes();
+            AudioTranslationOptions translationOptions = new AudioTranslationOptions(file);
+            translationOptions.setResponseFormat(AudioTranscriptionFormat.SRT);
+
+            String transcription = client.getAudioTranslationText(deploymentName, fileName, translationOptions);
+            // Sequence number
+            assertTrue(transcription.contains("1\n"));
+            // First sequence starts at timestamp 0
+            assertTrue(transcription.contains("00:00:00,000 --> "));
+            // Actual translation value
+            assertTrue(transcription.contains("It's raining today."));
+        });
+    }
+
+    @ParameterizedTest(name = DISPLAY_NAME_WITH_ARGUMENTS)
+    @MethodSource("com.azure.ai.openai.TestUtils#getTestParameters")
+    public void testGetAudioTranslationVtt(HttpClient httpClient, OpenAIServiceVersion serviceVersion) {
+        client = getOpenAIClient(httpClient, serviceVersion);
+
+        getAudioTranslationRunner((deploymentName, fileName) -> {
+            byte[] file = BinaryData.fromFile(openTestResourceFile(fileName)).toBytes();
+            AudioTranslationOptions translationOptions = new AudioTranslationOptions(file);
+            translationOptions.setResponseFormat(AudioTranscriptionFormat.VTT);
+
+            String transcription = client.getAudioTranslationText(deploymentName, fileName, translationOptions);
+            // Start value according to spec
+            assertTrue(transcription.startsWith("WEBVTT\n"));
+            // First sequence starts at timestamp 0. Note: unlike SRT, the millisecond separator is a "."
+            assertTrue(transcription.contains("00:00:00.000 --> "));
+            // Actual translation value
+            assertTrue(transcription.contains("It's raining today."));
+        });
+    }
+
+    @ParameterizedTest(name = DISPLAY_NAME_WITH_ARGUMENTS)
+    @MethodSource("com.azure.ai.openai.TestUtils#getTestParameters")
+    public void testGetAudioTranslationTextWrongFormats(HttpClient httpClient, OpenAIServiceVersion serviceVersion) {
+        client = getOpenAIClient(httpClient, serviceVersion);
+        List<AudioTranscriptionFormat> wrongFormats = Arrays.asList(
+                AudioTranscriptionFormat.JSON,
+                AudioTranscriptionFormat.VERBOSE_JSON
+        );
+
+        getAudioTranslationRunner((deploymentName, fileName) -> {
+            byte[] file = BinaryData.fromFile(openTestResourceFile(fileName)).toBytes();
+            AudioTranslationOptions translationOptions = new AudioTranslationOptions(file);
+
+            for (AudioTranscriptionFormat format: wrongFormats) {
+                translationOptions.setResponseFormat(format);
+                assertThrows(IllegalArgumentException.class, () -> {
+                    client.getAudioTranslationText(deploymentName, fileName, translationOptions);
+                });
+            }
+        });
+    }
+
+    @ParameterizedTest(name = DISPLAY_NAME_WITH_ARGUMENTS)
+    @MethodSource("com.azure.ai.openai.TestUtils#getTestParameters")
+    public void testGetAudioTranslationJsonWrongFormats(HttpClient httpClient, OpenAIServiceVersion serviceVersion) {
+        client = getOpenAIClient(httpClient, serviceVersion);
+        List<AudioTranscriptionFormat> wrongFormats = Arrays.asList(
+                AudioTranscriptionFormat.TEXT,
+                AudioTranscriptionFormat.SRT,
+                AudioTranscriptionFormat.VTT
+        );
+
+        getAudioTranslationRunner((deploymentName, fileName) -> {
+            byte[] file = BinaryData.fromFile(openTestResourceFile(fileName)).toBytes();
+            AudioTranslationOptions translationOptions = new AudioTranslationOptions(file);
+
+            for (AudioTranscriptionFormat format: wrongFormats) {
+                translationOptions.setResponseFormat(format);
+                assertThrows(IllegalArgumentException.class, () -> {
+                    client.getAudioTranslation(deploymentName, fileName, translationOptions);
+                });
+            }
+        });
+    }
 }
diff --git a/sdk/openai/azure-ai-openai/src/test/java/com/azure/ai/openai/implementation/MultipartDataHelperTest.java b/sdk/openai/azure-ai-openai/src/test/java/com/azure/ai/openai/implementation/MultipartDataHelperTest.java
new file mode 100644
index 000000000000..be4fdadbe97e
--- /dev/null
+++ b/sdk/openai/azure-ai-openai/src/test/java/com/azure/ai/openai/implementation/MultipartDataHelperTest.java
@@ -0,0 +1,132 @@
+// Copyright (c) Microsoft Corporation. All rights reserved.
+// Licensed under the MIT License.
+
+package com.azure.ai.openai.implementation;
+
+import com.azure.ai.openai.models.AudioTranscriptionFormat;
+import com.azure.ai.openai.models.AudioTranscriptionOptions;
+import com.azure.ai.openai.models.AudioTranslationOptions;
+import com.azure.ai.openai.models.EmbeddingsOptions;
+import org.junit.jupiter.api.Test;
+
+import java.nio.charset.StandardCharsets;
+import java.util.ArrayList;
+
+import static org.junit.jupiter.api.Assertions.assertEquals;
+import static org.junit.jupiter.api.Assertions.assertThrows;
+
+/**
+ * Unit tests for {@link MultipartDataHelper}
+ */
+public class MultipartDataHelperTest {
+
+    private static final String TEST_BOUNDARY = "test-boundary";
+
+    @Test
+    public void serializeAudioTranslationOptionsAllFields() {
+        MultipartDataHelper helper = new MultipartDataHelper(TEST_BOUNDARY);
+        byte[] file = new byte[] {73, 32, 115, 104, 111, 117, 108, 100, 32, 104, 97, 118, 101, 32, 116, 104, 111, 117,
+            103, 104, 116, 32, 111, 102, 32, 97, 32, 103, 111, 111, 100, 32, 101, 97, 115, 116, 101, 114, 32, 101,
+            103, 103};
+        String fileName = "file_name.wav";
+        AudioTranslationOptions translationOptions = new AudioTranslationOptions(file);
+        translationOptions.setModel("model_name")
+                .setPrompt("prompt text")
+                .setResponseFormat(AudioTranscriptionFormat.TEXT)
+                .setTemperature(0.1);
+        MultipartDataSerializationResult actual = helper.serializeRequest(translationOptions, fileName);
+
+        String expected = multipartFileSegment(fileName, file)
+                + fieldFormData("response_format", "text")
+                + fieldFormData("model", "model_name")
+                + fieldFormData("prompt", "prompt text")
+                + fieldFormData("temperature", "0.1")
+                + closingMarker();
+
+        assertEquals(expected, actual.getData().toString());
+        assertEquals(expected.getBytes(StandardCharsets.US_ASCII).length, actual.getDataLength());
+    }
+
+    @Test
+    public void serializeAudioTranscriptionOptionsAllFields() {
+        MultipartDataHelper helper = new MultipartDataHelper(TEST_BOUNDARY);
+        byte[] file = new byte[] {73, 32, 115, 104, 111, 117, 108, 100, 32, 104, 97, 118, 101, 32, 116, 104, 111, 117,
+            103, 104, 116, 32, 111, 102, 32, 97, 32, 103, 111, 111, 100, 32, 101, 97, 115, 116, 101, 114, 32, 101,
+            103, 103};
+        String fileName = "file_name.wav";
+        AudioTranscriptionOptions transcriptionOptions = new AudioTranscriptionOptions(file);
+        transcriptionOptions.setModel("model_name")
+                .setPrompt("prompt text")
+                .setResponseFormat(AudioTranscriptionFormat.TEXT)
+                .setLanguage("en")
+                .setTemperature(0.1);
+        MultipartDataSerializationResult actual = helper.serializeRequest(transcriptionOptions, fileName);
+
+        String expected = multipartFileSegment(fileName, file)
+                + fieldFormData("response_format", "text")
+                + fieldFormData("model", "model_name")
+                + fieldFormData("prompt", "prompt text")
+                + fieldFormData("temperature", "0.1")
+                + fieldFormData("language", "en")
+                + closingMarker();
+
+        assertEquals(expected, actual.getData().toString());
+        assertEquals(expected.getBytes(StandardCharsets.US_ASCII).length, actual.getDataLength());
+    }
+
+    @Test
+    public void serializeAudioTranslationOptionsNoFields() {
+        MultipartDataHelper helper = new MultipartDataHelper(TEST_BOUNDARY);
+        byte[] file = new byte[] {};
+        String fileName = "file_name.wav";
+        AudioTranslationOptions translationOptions = new AudioTranslationOptions(file);
+        MultipartDataSerializationResult actual = helper.serializeRequest(translationOptions, fileName);
+
+        String expected = multipartFileSegment(fileName, file)
+                + closingMarker();
+
+        assertEquals(expected, actual.getData().toString());
+        assertEquals(expected.getBytes(StandardCharsets.US_ASCII).length, actual.getDataLength());
+    }
+
+    @Test
+    public void serializeAudioTranscriptionOptionsNoFields() {
+        MultipartDataHelper helper = new MultipartDataHelper(TEST_BOUNDARY);
+        byte[] file = new byte[] {};
+        String fileName = "file_name.wav";
+        AudioTranscriptionOptions transcriptionOptions = new AudioTranscriptionOptions(file);
+        MultipartDataSerializationResult actual = helper.serializeRequest(transcriptionOptions, fileName);
+
+        String expected = multipartFileSegment(fileName, file)
+                + closingMarker();
+
+        assertEquals(expected, actual.getData().toString());
+        assertEquals(expected.getBytes(StandardCharsets.US_ASCII).length, actual.getDataLength());
+    }
+
+    @Test
+    public void serializeUnsupportedType() {
+        assertThrows(IllegalArgumentException.class, () -> {
+            MultipartDataHelper helper = new MultipartDataHelper(TEST_BOUNDARY);
+            EmbeddingsOptions embeddingsOptions = new EmbeddingsOptions(new ArrayList<>());
+            helper.serializeRequest(embeddingsOptions, "path/to/file");
+        });
+    }
+
+    private static String fieldFormData(String fieldName, String fieldValue) {
+        return "\r\n--test-boundary"
+                + "\r\nContent-Disposition: form-data; name=\"" + fieldName + "\"\r\n\r\n"
+                + fieldValue;
+    }
+
+    private static String multipartFileSegment(String fileName, byte[] fileBytes) {
+        return "--test-boundary\r\n"
+                + "Content-Disposition: form-data; name=\"file\"; filename=\"" + fileName + "\"\r\n"
+                + "Content-Type: application/octet-stream\r\n\r\n"
+                + new String(fileBytes, StandardCharsets.US_ASCII);
+    }
+
+    private static String closingMarker() {
+        return "\r\n--test-boundary--";
+    }
+}
diff --git a/sdk/openai/azure-ai-openai/src/test/resources/JP_it_is_rainy_today.wav b/sdk/openai/azure-ai-openai/src/test/resources/JP_it_is_rainy_today.wav
new file mode 100644
index 000000000000..5970c85ec1cd
Binary files /dev/null and b/sdk/openai/azure-ai-openai/src/test/resources/JP_it_is_rainy_today.wav differ
diff --git a/sdk/openai/azure-ai-openai/src/test/resources/batman.wav b/sdk/openai/azure-ai-openai/src/test/resources/batman.wav
new file mode 100644
index 000000000000..4c0b7248a39c
Binary files /dev/null and b/sdk/openai/azure-ai-openai/src/test/resources/batman.wav differ
diff --git a/sdk/openai/azure-ai-openai/tsp-location.yaml b/sdk/openai/azure-ai-openai/tsp-location.yaml
index 368074679599..bc4052dd97e4 100644
--- a/sdk/openai/azure-ai-openai/tsp-location.yaml
+++ b/sdk/openai/azure-ai-openai/tsp-location.yaml
@@ -1,5 +1,5 @@
 directory: specification/cognitiveservices/OpenAI.Inference
 additionalDirectories:
     - specification/cognitiveservices/OpenAI.Authoring
-commit: b646a42aa3b7a0ce488d05f1724827ea41d12cf1
+commit: dd2d1e8957ac6654272137e8d5874eacafd80a5f
 repo: Azure/azure-rest-api-specs