Azure · mssfang · Sep 18, 2023 · Sep 18, 2023 · Sep 18, 2023 · jpalvarezl
diff --git a/sdk/openai/azure-ai-openai/README.md b/sdk/openai/azure-ai-openai/README.md
@@ -19,6 +19,8 @@ For concrete examples you can have a look at the following links. Some of the mo
 * [Streaming chat completions sample](#streaming-chat-completions "Streaming chat completions")
 * [Embeddings sample](#text-embeddings "Text Embeddings")
 * [Image Generation sample](#image-generation "Image Generation")
+* [Audio Transcription sample](#audio-transcription "Audio Transcription")
+* [Audio Translation sample](#audio-translation "Audio Translation")
 
 If you want to see the full code for these snippets check out our [samples folder][samples_folder].
 
@@ -150,6 +152,8 @@ The following sections provide several code snippets covering some of the most c
 * [Streaming chat completions sample](#streaming-chat-completions "Streaming chat completions")
 * [Embeddings sample](#text-embeddings "Text Embeddings")
 * [Image Generation sample](#image-generation "Image Generation")
+* [Audio Transcription sample](#audio-transcription "Audio Transcription")
+* [Audio Translation sample](#audio-translation "Audio Translation")
 
 ### Text completions
 
@@ -286,6 +290,44 @@ for (ImageLocation imageLocation : images.getData()) {
 
 For a complete sample example, see sample [Image Generation][sample_image_generation].
 
+### Audio Transcription
+The OpenAI service starts supporting `audio transcription` with the introduction of `Whisper` models. 
+The following code snippet shows how to use the service to transcribe audio.
+
+```java readme-sample-audioTranscription
+String fileName = "{your-file-name}";
+Path filePath = Paths.get("{your-file-path}" + fileName);
+
+byte[] file = BinaryData.fromFile(filePath).toBytes();
+AudioTranscriptionOptions transcriptionOptions = new AudioTranscriptionOptions(file)
+    .setResponseFormat(AudioTranscriptionFormat.JSON);
+
+AudioTranscription transcription = client.getAudioTranscription("{deploymentOrModelId}", transcriptionOptions, fileName);
+
+System.out.println("Transcription: " + transcription.getText());
+```
+For a complete sample example, see sample [Audio Transcription][sample_audio_transcription].
+Please refer to the service documentation for a conceptual discussion of [Whisper][microsoft_docs_whisper_model].
+
+### Audio Translation
+The OpenAI service starts supporting `audio translation` with the introduction of `Whisper` models.
+The following code snippet shows how to use the service to translate audio.
+
+```java readme-sample-audioTranslation
+String fileName = "{your-file-name}";
+Path filePath = Paths.get("{your-file-path}" + fileName);
+
+byte[] file = BinaryData.fromFile(filePath).toBytes();
+AudioTranslationOptions translationOptions = new AudioTranslationOptions(file)
+    .setResponseFormat(AudioTranscriptionFormat.JSON);
+
+AudioTranscription translation = client.getAudioTranslation("{deploymentOrModelId}", translationOptions, fileName);
+
+System.out.println("Translation: " + translation.getText());
+```
+For a complete sample example, see sample [Audio Translation][sample_audio_translation].
+Please refer to the service documentation for a conceptual discussion of [Whisper][microsoft_docs_whisper_model].
+
 ## Troubleshooting
 ### Enable client logging
 You can set the `AZURE_LOG_LEVEL` environment variable to view logging statements made in the client library. For
@@ -327,6 +369,7 @@ For details on contributing to this repository, see the [contributing guide](htt
 [logLevels]: https://github.com/Azure/azure-sdk-for-java/blob/main/sdk/core/azure-core/src/main/java/com/azure/core/util/logging/ClientLogger.java
 [microsoft_docs_openai_completion]: https://learn.microsoft.com/azure/cognitive-services/openai/how-to/completions
 [microsoft_docs_openai_embedding]: https://learn.microsoft.com/azure/cognitive-services/openai/concepts/understand-embeddings
+[microsoft_docs_whisper_model]: https://learn.microsoft.com/azure/ai-services/openai/whisper-quickstart?tabs=command-line
 [non_azure_openai_authentication]: https://platform.openai.com/docs/api-reference/authentication
 [performance_tuning]: https://github.com/Azure/azure-sdk-for-java/wiki/Performance-Tuning
 [product_documentation]: https://azure.microsoft.com/services/
@@ -342,6 +385,8 @@ For details on contributing to this repository, see the [contributing guide](htt
 [sample_get_completions_streaming]: https://github.com/Azure/azure-sdk-for-java/blob/main/sdk/openai/azure-ai-openai/src/samples/java/com/azure/ai/openai/usage/GetCompletionsStreamSample.java
 [sample_get_embedding]: https://github.com/Azure/azure-sdk-for-java/blob/main/sdk/openai/azure-ai-openai/src/samples/java/com/azure/ai/openai/usage/GetEmbeddingsSample.java
 [sample_image_generation]: https://github.com/Azure/azure-sdk-for-java/blob/main/sdk/openai/azure-ai-openai/src/samples/java/com/azure/ai/openai/usage/GetImagesSample.java
+[sample_audio_transcription]: https://github.com/Azure/azure-sdk-for-java/blob/main/sdk/openai/azure-ai-openai/src/samples/java/com/azure/ai/openai/usage/AudioTranscriptionSample
+[sample_audio_translation]: https://github.com/Azure/azure-sdk-for-java/blob/main/sdk/openai/azure-ai-openai/src/samples/java/com/azure/ai/openai/usage/AudioTranslationSample.java
 [openai_client_async]: https://github.com/Azure/azure-sdk-for-java/blob/main/sdk/openai/azure-ai-openai/src/main/java/com/azure/ai/openai/OpenAIAsyncClient.java
 [openai_client_builder]: https://github.com/Azure/azure-sdk-for-java/blob/main/sdk/openai/azure-ai-openai/src/main/java/com/azure/ai/openai/OpenAIClientBuilder.java
 [openai_client_sync]: https://github.com/Azure/azure-sdk-for-java/blob/main/sdk/openai/azure-ai-openai/src/main/java/com/azure/ai/openai/OpenAIClient.java

diff --git a/sdk/openai/azure-ai-openai/src/samples/README.md b/sdk/openai/azure-ai-openai/src/samples/README.md
@@ -28,12 +28,16 @@ Synchronous:
 - [Chat Completions][sample_get_chat_completions]
 - [Embeddings][sample_get_embedding]
 - [Image Generation][sample_image_generation]
+- [Audio Transcription][sample_audio_transcription]
+- [Audio Translation][sample_audio_translation]
 
 Asynchronous:
 - [Text Completions][async_sample_get_completions]
 - [Chat Completions][async_sample_get_chat_completions]
 - [Embeddings][async_sample_get_embedding]
 - [Image Generation][async_sample_image_generation]
+- [Audio Transcription][async_sample_audio_transcription]
+- [Audio Translation][async_sample_audio_translation]
 
 Cookbook:
 - [Chat bot][cookbook_chat_bot]
@@ -66,11 +70,15 @@ This project welcomes contributions and suggestions. Find [more contributing][SD
 [async_sample_get_chat_completions]: https://github.com/Azure/azure-sdk-for-java/blob/main/sdk/openai/azure-ai-openai/src/samples/java/com/azure/ai/openai/usage/GetChatCompletionsAsyncSample.java
 [async_sample_get_embedding]: https://github.com/Azure/azure-sdk-for-java/blob/main/sdk/openai/azure-ai-openai/src/samples/java/com/azure/ai/openai/usage/GetEmbeddingsAsyncSample.java
 [async_sample_image_generation]: https://github.com/Azure/azure-sdk-for-java/blob/main/sdk/openai/azure-ai-openai/src/samples/java/com/azure/ai/openai/usage/GetImagesAsyncSample.java
+[async_sample_audio_transcription]: https://github.com/Azure/azure-sdk-for-java/blob/main/sdk/openai/azure-ai-openai/src/samples/java/com/azure/ai/openai/usage/AudioTranscriptionAsyncSample.java
+[async_sample_audio_translation]: https://github.com/Azure/azure-sdk-for-java/blob/main/sdk/openai/azure-ai-openai/src/samples/java/com/azure/ai/openai/usage/AudioTranslationAsyncSample.java
 
 [sample_get_completions]: https://github.com/Azure/azure-sdk-for-java/blob/main/sdk/openai/azure-ai-openai/src/samples/java/com/azure/ai/openai/usage/GetCompletionsSample.java
 [sample_get_chat_completions]: https://github.com/Azure/azure-sdk-for-java/blob/main/sdk/openai/azure-ai-openai/src/samples/java/com/azure/ai/openai/usage/GetChatCompletionsSample.java
 [sample_get_embedding]: https://github.com/Azure/azure-sdk-for-java/blob/main/sdk/openai/azure-ai-openai/src/samples/java/com/azure/ai/openai/usage/GetEmbeddingsSample.java
 [sample_image_generation]: https://github.com/Azure/azure-sdk-for-java/blob/main/sdk/openai/azure-ai-openai/src/samples/java/com/azure/ai/openai/usage/GetImagesSample.java
+[sample_audio_transcription]: https://github.com/Azure/azure-sdk-for-java/blob/main/sdk/openai/azure-ai-openai/src/samples/java/com/azure/ai/openai/usage/AudioTranscriptionSample
+[sample_audio_translation]: https://github.com/Azure/azure-sdk-for-java/blob/main/sdk/openai/azure-ai-openai/src/samples/java/com/azure/ai/openai/usage/AudioTranslationSample.java
 
 [cookbook_chat_bot]: https://github.com/Azure/azure-sdk-for-java/blob/main/sdk/openai/azure-ai-openai/src/samples/java/com/azure/ai/openai/ChatbotSample.java
 [cookbook_chat_bot_with_key]: https://github.com/Azure/azure-sdk-for-java/blob/main/sdk/openai/azure-ai-openai/src/samples/java/com/azure/ai/openai/ChatbotWithKeySample.java

diff --git a/sdk/openai/azure-ai-openai/src/samples/java/com/azure/ai/openai/impl/ReadmeSamples.java b/sdk/openai/azure-ai-openai/src/samples/java/com/azure/ai/openai/impl/ReadmeSamples.java
@@ -6,6 +6,10 @@
 import com.azure.ai.openai.OpenAIAsyncClient;
 import com.azure.ai.openai.OpenAIClient;
 import com.azure.ai.openai.OpenAIClientBuilder;
+import com.azure.ai.openai.models.AudioTranscription;
+import com.azure.ai.openai.models.AudioTranscriptionFormat;
+import com.azure.ai.openai.models.AudioTranscriptionOptions;
+import com.azure.ai.openai.models.AudioTranslationOptions;
 import com.azure.ai.openai.models.ChatChoice;
 import com.azure.ai.openai.models.ChatCompletions;
 import com.azure.ai.openai.models.ChatCompletionsOptions;
@@ -25,11 +29,14 @@
 import com.azure.core.credential.TokenCredential;
 import com.azure.core.http.ProxyOptions;
 import com.azure.core.models.ResponseError;
+import com.azure.core.util.BinaryData;
 import com.azure.core.util.HttpClientOptions;
 import com.azure.core.util.IterableStream;
 import com.azure.identity.DefaultAzureCredentialBuilder;
 
 import java.net.InetSocketAddress;
+import java.nio.file.Path;
+import java.nio.file.Paths;
 import java.util.ArrayList;
 import java.util.Arrays;
 import java.util.List;
@@ -221,4 +228,34 @@ public void imageGeneration() {
         }
         // END: readme-sample-imageGeneration
     }
+
+    public void audioTranscription() {
+        // BEGIN: readme-sample-audioTranscription
+        String fileName = "{your-file-name}";
+        Path filePath = Paths.get("{your-file-path}" + fileName);
+
+        byte[] file = BinaryData.fromFile(filePath).toBytes();
+        AudioTranscriptionOptions transcriptionOptions = new AudioTranscriptionOptions(file)
+            .setResponseFormat(AudioTranscriptionFormat.JSON);
+
+        AudioTranscription transcription = client.getAudioTranscription("{deploymentOrModelId}", transcriptionOptions, fileName);
+
+        System.out.println("Transcription: " + transcription.getText());
+        // END: readme-sample-audioTranscription
+    }
+
+    public void audioTranslation() {
+        // BEGIN: readme-sample-audioTranslation
+        String fileName = "{your-file-name}";
+        Path filePath = Paths.get("{your-file-path}" + fileName);
+
+        byte[] file = BinaryData.fromFile(filePath).toBytes();
+        AudioTranslationOptions translationOptions = new AudioTranslationOptions(file)
+            .setResponseFormat(AudioTranscriptionFormat.JSON);
+
+        AudioTranscription translation = client.getAudioTranslation("{deploymentOrModelId}", translationOptions, fileName);
+
+        System.out.println("Translation: " + translation.getText());
+        // END: readme-sample-audioTranslation
+    }
 }
diff --git a/...i/azure-ai-openai/src/samples/java/com/azure/ai/openai/resources/JP_it_is_rainy_today.wav b/...i/azure-ai-openai/src/samples/java/com/azure/ai/openai/resources/JP_it_is_rainy_today.wav
diff --git a/sdk/openai/azure-ai-openai/src/samples/java/com/azure/ai/openai/resources/batman.wav b/sdk/openai/azure-ai-openai/src/samples/java/com/azure/ai/openai/resources/batman.wav
diff --git a/...e-ai-openai/src/samples/java/com/azure/ai/openai/usage/AudioTranscriptionAsyncSample.java b/...e-ai-openai/src/samples/java/com/azure/ai/openai/usage/AudioTranscriptionAsyncSample.java
@@ -0,0 +1,52 @@
+// Copyright (c) Microsoft Corporation. All rights reserved.
+// Licensed under the MIT License.
+
+package com.azure.ai.openai.usage;
+
+import com.azure.ai.openai.OpenAIAsyncClient;
+import com.azure.ai.openai.OpenAIClientBuilder;
+import com.azure.ai.openai.models.AudioTranscriptionFormat;
+import com.azure.ai.openai.models.AudioTranscriptionOptions;
+import com.azure.core.credential.AzureKeyCredential;
+import com.azure.core.util.BinaryData;
+
+import java.nio.file.Path;
+import java.nio.file.Paths;
+import java.util.concurrent.TimeUnit;
+
+/**
+ * An asynchronous sample demonstrates how to transcript a given audio file.
+ */
+public class AudioTranscriptionAsyncSample {
+    /**
+     * Runs the sample algorithm and demonstrates how to transcript a given audio file.
+     *
+     * @param args Unused. Arguments to the program.
+     */
+    public static void main(String[] args) throws InterruptedException {
+        String azureOpenaiKey = "{azure-open-ai-key}";
+        String endpoint = "{azure-open-ai-endpoint}";
+        String deploymentOrModelId = "{azure-open-ai-deployment-model-id}";
+        String fileName = "batman.wav";
+        Path filePath = Paths.get("src/samples/java/com/azure/ai/openai/resources/" + fileName);
+
+        OpenAIAsyncClient client = new OpenAIClientBuilder()
+            .endpoint(endpoint)
+            .credential(new AzureKeyCredential(azureOpenaiKey))
+            .buildAsyncClient();
+
+        byte[] file = BinaryData.fromFile(filePath).toBytes();
+        AudioTranscriptionOptions transcriptionOptions = new AudioTranscriptionOptions(file)
+            .setResponseFormat(AudioTranscriptionFormat.JSON);
+
+        client.getAudioTranscription(deploymentOrModelId, transcriptionOptions, fileName)
+            .subscribe(transcription -> {
+                System.out.println("Transcription: " + transcription.getText());
+            });
+
+        // The .subscribe() creation and assignment is not a blocking call. For the purpose of this example, we sleep
+        // the thread so the program does not end before the send operation is complete. Using .block() instead of
+        // .subscribe() will turn this into a synchronous call.
+        TimeUnit.SECONDS.sleep(10);
+    }
+}
diff --git a/.../azure-ai-openai/src/samples/java/com/azure/ai/openai/usage/AudioTranscriptionSample.java b/.../azure-ai-openai/src/samples/java/com/azure/ai/openai/usage/AudioTranscriptionSample.java
@@ -0,0 +1,49 @@
+// Copyright (c) Microsoft Corporation. All rights reserved.
+// Licensed under the MIT License.
+
+package com.azure.ai.openai.usage;
+
+import com.azure.ai.openai.OpenAIClient;
+import com.azure.ai.openai.OpenAIClientBuilder;
+import com.azure.ai.openai.models.AudioTranscription;
+import com.azure.ai.openai.models.AudioTranscriptionFormat;
+import com.azure.ai.openai.models.AudioTranscriptionOptions;
+import com.azure.core.credential.AzureKeyCredential;
+import com.azure.core.util.BinaryData;
+
+import java.nio.file.Path;
+import java.nio.file.Paths;
+
+/**
+ * A sample demonstrates how to transcript a given audio file.
+ */
+public class AudioTranscriptionSample {
+    /**
+     * Runs the sample algorithm and demonstrates how to get the images for a given prompt.
+     *
+     * @param args Unused. Arguments to the program.
+     */
+    public static void main(String[] args) {
+        String azureOpenaiKey = "{azure-open-ai-key}";
+        String endpoint = "{azure-open-ai-endpoint}";
+        String deploymentOrModelId = "{azure-open-ai-deployment-model-id}";
+        String fileName = "batman.wav";
+        Path filePath = Paths.get("src/samples/java/com/azure/ai/openai/resources/" + fileName);
+
+        OpenAIClient client = new OpenAIClientBuilder()
+            .endpoint(endpoint)
+            .credential(new AzureKeyCredential(azureOpenaiKey))
+            .buildClient();
+
+        byte[] file = BinaryData.fromFile(filePath).toBytes();
+        AudioTranscriptionOptions transcriptionOptions = new AudioTranscriptionOptions(file)
+            .setResponseFormat(AudioTranscriptionFormat.JSON);
+
+        AudioTranscription transcription = client.getAudioTranscription(deploymentOrModelId, transcriptionOptions, fileName);
+
+        System.out.println("Transcription: " + transcription.getText());
+
+    }
+
+
+}
diff --git a/...ure-ai-openai/src/samples/java/com/azure/ai/openai/usage/AudioTranslationAsyncSample.java b/...ure-ai-openai/src/samples/java/com/azure/ai/openai/usage/AudioTranslationAsyncSample.java
@@ -0,0 +1,51 @@
+// Copyright (c) Microsoft Corporation. All rights reserved.
+// Licensed under the MIT License.
+
+package com.azure.ai.openai.usage;
+
+import com.azure.ai.openai.OpenAIAsyncClient;
+import com.azure.ai.openai.OpenAIClientBuilder;
+import com.azure.ai.openai.models.AudioTranscriptionFormat;
+import com.azure.ai.openai.models.AudioTranslationOptions;
+import com.azure.core.credential.AzureKeyCredential;
+import com.azure.core.util.BinaryData;
+
+import java.nio.file.Path;
+import java.nio.file.Paths;
+import java.util.concurrent.TimeUnit;
+
+/**
+ * An asynchronous sample demonstrates how to translate a given audio file.
+ */
+public class AudioTranslationAsyncSample {
+    /**
+     * Runs the sample algorithm and demonstrates how to translate a given audio file.
+     *
+     * @param args Unused. Arguments to the program.
+     */
+    public static void main(String[] args) throws InterruptedException {
+        String azureOpenaiKey = "{azure-open-ai-key}";
+        String endpoint = "{azure-open-ai-endpoint}";
+        String deploymentOrModelId = "{azure-open-ai-deployment-model-id}";
+        String fileName = "JP_it_is_rainy_today.wav";
+        Path filePath = Paths.get("src/samples/java/com/azure/ai/openai/resources/" + fileName);
+
+        OpenAIAsyncClient client = new OpenAIClientBuilder()
+            .endpoint(endpoint)
+            .credential(new AzureKeyCredential(azureOpenaiKey))
+            .buildAsyncClient();
+        byte[] file = BinaryData.fromFile(filePath).toBytes();
+        AudioTranslationOptions translationOptions = new AudioTranslationOptions(file)
+            .setResponseFormat(AudioTranscriptionFormat.JSON);
+
+        client.getAudioTranslation(deploymentOrModelId, translationOptions, fileName)
+            .subscribe(translation -> {
+                System.out.println("Translation: " + translation.getText());
+            });
+
+        // The .subscribe() creation and assignment is not a blocking call. For the purpose of this example, we sleep
+        // the thread so the program does not end before the send operation is complete. Using .block() instead of
+        // .subscribe() will turn this into a synchronous call.
+        TimeUnit.SECONDS.sleep(10);
+    }
+}
diff --git a/...ai/azure-ai-openai/src/samples/java/com/azure/ai/openai/usage/AudioTranslationSample.java b/...ai/azure-ai-openai/src/samples/java/com/azure/ai/openai/usage/AudioTranslationSample.java
@@ -0,0 +1,45 @@
+// Copyright (c) Microsoft Corporation. All rights reserved.
+// Licensed under the MIT License.
+
+package com.azure.ai.openai.usage;
+
+import com.azure.ai.openai.OpenAIClient;
+import com.azure.ai.openai.OpenAIClientBuilder;
+import com.azure.ai.openai.models.AudioTranscription;
+import com.azure.ai.openai.models.AudioTranscriptionFormat;
+import com.azure.ai.openai.models.AudioTranslationOptions;
+import com.azure.core.credential.AzureKeyCredential;
+import com.azure.core.util.BinaryData;
+
+import java.nio.file.Path;
+import java.nio.file.Paths;
+
+/**
+ * A sample demonstrates how to translate a given audio file.
+ */
+public class AudioTranslationSample {
+    /**
+     * Runs the sample algorithm and demonstrates how to translate a given audio file.
+     *
+     * @param args Unused. Arguments to the program.
+     */
+    public static void main(String[] args) {
+        String azureOpenaiKey = "{azure-open-ai-key}";
+        String endpoint = "{azure-open-ai-endpoint}";
+        String deploymentOrModelId = "{azure-open-ai-deployment-model-id}";
+        String fileName = "JP_it_is_rainy_today.wav";
+        Path filePath = Paths.get("src/samples/java/com/azure/ai/openai/resources/" + fileName);
+
+        OpenAIClient client = new OpenAIClientBuilder()
+            .endpoint(endpoint)
+            .credential(new AzureKeyCredential(azureOpenaiKey))
+            .buildClient();
+        byte[] file = BinaryData.fromFile(filePath).toBytes();
+        AudioTranslationOptions translationOptions = new AudioTranslationOptions(file)
+            .setResponseFormat(AudioTranscriptionFormat.JSON);
+
+        AudioTranscription translation = client.getAudioTranslation(deploymentOrModelId, translationOptions, fileName);
+
+        System.out.println("Translation: " + translation.getText());
+    }
+}
diff --git a/sdk/openai/azure-ai-openai/src/test/java/com/azure/ai/openai/OpenAIClientTestBase.java b/sdk/openai/azure-ai-openai/src/test/java/com/azure/ai/openai/OpenAIClientTestBase.java
@@ -40,6 +40,7 @@
 import org.junit.jupiter.api.Test;
 
 import java.nio.file.Path;
+import java.nio.file.Paths;
 import java.util.ArrayList;
 import java.util.Arrays;
 import java.util.List;
@@ -251,7 +252,7 @@ private ChatCompletionsOptions getChatMessagesWithFunction() {
     }
 
     static Path openTestResourceFile(String fileName) {
-        return Path.of("src/test/resources/" + fileName);
+        return Paths.get("src/test/resources/" + fileName);
     }
 
     static void assertCompletions(int choicesPerPrompt, Completions actual) {