diff --git a/sdk/openai/azure-ai-openai/README.md b/sdk/openai/azure-ai-openai/README.md index 2314af4a23fb..d44822bea896 100644 --- a/sdk/openai/azure-ai-openai/README.md +++ b/sdk/openai/azure-ai-openai/README.md @@ -19,6 +19,8 @@ For concrete examples you can have a look at the following links. Some of the mo * [Streaming chat completions sample](#streaming-chat-completions "Streaming chat completions") * [Embeddings sample](#text-embeddings "Text Embeddings") * [Image Generation sample](#image-generation "Image Generation") +* [Audio Transcription sample](#audio-transcription "Audio Transcription") +* [Audio Translation sample](#audio-translation "Audio Translation") If you want to see the full code for these snippets check out our [samples folder][samples_folder]. @@ -150,6 +152,8 @@ The following sections provide several code snippets covering some of the most c * [Streaming chat completions sample](#streaming-chat-completions "Streaming chat completions") * [Embeddings sample](#text-embeddings "Text Embeddings") * [Image Generation sample](#image-generation "Image Generation") +* [Audio Transcription sample](#audio-transcription "Audio Transcription") +* [Audio Translation sample](#audio-translation "Audio Translation") ### Text completions @@ -286,6 +290,44 @@ for (ImageLocation imageLocation : images.getData()) { For a complete sample example, see sample [Image Generation][sample_image_generation]. +### Audio Transcription +The OpenAI service starts supporting `audio transcription` with the introduction of `Whisper` models. +The following code snippet shows how to use the service to transcribe audio. + +```java readme-sample-audioTranscription +String fileName = "{your-file-name}"; +Path filePath = Paths.get("{your-file-path}" + fileName); + +byte[] file = BinaryData.fromFile(filePath).toBytes(); +AudioTranscriptionOptions transcriptionOptions = new AudioTranscriptionOptions(file) + .setResponseFormat(AudioTranscriptionFormat.JSON); + +AudioTranscription transcription = client.getAudioTranscription("{deploymentOrModelId}", transcriptionOptions, fileName); + +System.out.println("Transcription: " + transcription.getText()); +``` +For a complete sample example, see sample [Audio Transcription][sample_audio_transcription]. +Please refer to the service documentation for a conceptual discussion of [Whisper][microsoft_docs_whisper_model]. + +### Audio Translation +The OpenAI service starts supporting `audio translation` with the introduction of `Whisper` models. +The following code snippet shows how to use the service to translate audio. + +```java readme-sample-audioTranslation +String fileName = "{your-file-name}"; +Path filePath = Paths.get("{your-file-path}" + fileName); + +byte[] file = BinaryData.fromFile(filePath).toBytes(); +AudioTranslationOptions translationOptions = new AudioTranslationOptions(file) + .setResponseFormat(AudioTranscriptionFormat.JSON); + +AudioTranscription translation = client.getAudioTranslation("{deploymentOrModelId}", translationOptions, fileName); + +System.out.println("Translation: " + translation.getText()); +``` +For a complete sample example, see sample [Audio Translation][sample_audio_translation]. +Please refer to the service documentation for a conceptual discussion of [Whisper][microsoft_docs_whisper_model]. + ## Troubleshooting ### Enable client logging You can set the `AZURE_LOG_LEVEL` environment variable to view logging statements made in the client library. For @@ -327,6 +369,7 @@ For details on contributing to this repository, see the [contributing guide](htt [logLevels]: https://github.com/Azure/azure-sdk-for-java/blob/main/sdk/core/azure-core/src/main/java/com/azure/core/util/logging/ClientLogger.java [microsoft_docs_openai_completion]: https://learn.microsoft.com/azure/cognitive-services/openai/how-to/completions [microsoft_docs_openai_embedding]: https://learn.microsoft.com/azure/cognitive-services/openai/concepts/understand-embeddings +[microsoft_docs_whisper_model]: https://learn.microsoft.com/azure/ai-services/openai/whisper-quickstart?tabs=command-line [non_azure_openai_authentication]: https://platform.openai.com/docs/api-reference/authentication [performance_tuning]: https://github.com/Azure/azure-sdk-for-java/wiki/Performance-Tuning [product_documentation]: https://azure.microsoft.com/services/ @@ -342,6 +385,8 @@ For details on contributing to this repository, see the [contributing guide](htt [sample_get_completions_streaming]: https://github.com/Azure/azure-sdk-for-java/blob/main/sdk/openai/azure-ai-openai/src/samples/java/com/azure/ai/openai/usage/GetCompletionsStreamSample.java [sample_get_embedding]: https://github.com/Azure/azure-sdk-for-java/blob/main/sdk/openai/azure-ai-openai/src/samples/java/com/azure/ai/openai/usage/GetEmbeddingsSample.java [sample_image_generation]: https://github.com/Azure/azure-sdk-for-java/blob/main/sdk/openai/azure-ai-openai/src/samples/java/com/azure/ai/openai/usage/GetImagesSample.java +[sample_audio_transcription]: https://github.com/Azure/azure-sdk-for-java/blob/main/sdk/openai/azure-ai-openai/src/samples/java/com/azure/ai/openai/usage/AudioTranscriptionSample +[sample_audio_translation]: https://github.com/Azure/azure-sdk-for-java/blob/main/sdk/openai/azure-ai-openai/src/samples/java/com/azure/ai/openai/usage/AudioTranslationSample.java [openai_client_async]: https://github.com/Azure/azure-sdk-for-java/blob/main/sdk/openai/azure-ai-openai/src/main/java/com/azure/ai/openai/OpenAIAsyncClient.java [openai_client_builder]: https://github.com/Azure/azure-sdk-for-java/blob/main/sdk/openai/azure-ai-openai/src/main/java/com/azure/ai/openai/OpenAIClientBuilder.java [openai_client_sync]: https://github.com/Azure/azure-sdk-for-java/blob/main/sdk/openai/azure-ai-openai/src/main/java/com/azure/ai/openai/OpenAIClient.java diff --git a/sdk/openai/azure-ai-openai/src/samples/README.md b/sdk/openai/azure-ai-openai/src/samples/README.md index cf37cf05b527..8795b90ba0c2 100644 --- a/sdk/openai/azure-ai-openai/src/samples/README.md +++ b/sdk/openai/azure-ai-openai/src/samples/README.md @@ -28,12 +28,16 @@ Synchronous: - [Chat Completions][sample_get_chat_completions] - [Embeddings][sample_get_embedding] - [Image Generation][sample_image_generation] +- [Audio Transcription][sample_audio_transcription] +- [Audio Translation][sample_audio_translation] Asynchronous: - [Text Completions][async_sample_get_completions] - [Chat Completions][async_sample_get_chat_completions] - [Embeddings][async_sample_get_embedding] - [Image Generation][async_sample_image_generation] +- [Audio Transcription][async_sample_audio_transcription] +- [Audio Translation][async_sample_audio_translation] Cookbook: - [Chat bot][cookbook_chat_bot] @@ -66,11 +70,15 @@ This project welcomes contributions and suggestions. Find [more contributing][SD [async_sample_get_chat_completions]: https://github.com/Azure/azure-sdk-for-java/blob/main/sdk/openai/azure-ai-openai/src/samples/java/com/azure/ai/openai/usage/GetChatCompletionsAsyncSample.java [async_sample_get_embedding]: https://github.com/Azure/azure-sdk-for-java/blob/main/sdk/openai/azure-ai-openai/src/samples/java/com/azure/ai/openai/usage/GetEmbeddingsAsyncSample.java [async_sample_image_generation]: https://github.com/Azure/azure-sdk-for-java/blob/main/sdk/openai/azure-ai-openai/src/samples/java/com/azure/ai/openai/usage/GetImagesAsyncSample.java +[async_sample_audio_transcription]: https://github.com/Azure/azure-sdk-for-java/blob/main/sdk/openai/azure-ai-openai/src/samples/java/com/azure/ai/openai/usage/AudioTranscriptionAsyncSample.java +[async_sample_audio_translation]: https://github.com/Azure/azure-sdk-for-java/blob/main/sdk/openai/azure-ai-openai/src/samples/java/com/azure/ai/openai/usage/AudioTranslationAsyncSample.java [sample_get_completions]: https://github.com/Azure/azure-sdk-for-java/blob/main/sdk/openai/azure-ai-openai/src/samples/java/com/azure/ai/openai/usage/GetCompletionsSample.java [sample_get_chat_completions]: https://github.com/Azure/azure-sdk-for-java/blob/main/sdk/openai/azure-ai-openai/src/samples/java/com/azure/ai/openai/usage/GetChatCompletionsSample.java [sample_get_embedding]: https://github.com/Azure/azure-sdk-for-java/blob/main/sdk/openai/azure-ai-openai/src/samples/java/com/azure/ai/openai/usage/GetEmbeddingsSample.java [sample_image_generation]: https://github.com/Azure/azure-sdk-for-java/blob/main/sdk/openai/azure-ai-openai/src/samples/java/com/azure/ai/openai/usage/GetImagesSample.java +[sample_audio_transcription]: https://github.com/Azure/azure-sdk-for-java/blob/main/sdk/openai/azure-ai-openai/src/samples/java/com/azure/ai/openai/usage/AudioTranscriptionSample +[sample_audio_translation]: https://github.com/Azure/azure-sdk-for-java/blob/main/sdk/openai/azure-ai-openai/src/samples/java/com/azure/ai/openai/usage/AudioTranslationSample.java [cookbook_chat_bot]: https://github.com/Azure/azure-sdk-for-java/blob/main/sdk/openai/azure-ai-openai/src/samples/java/com/azure/ai/openai/ChatbotSample.java [cookbook_chat_bot_with_key]: https://github.com/Azure/azure-sdk-for-java/blob/main/sdk/openai/azure-ai-openai/src/samples/java/com/azure/ai/openai/ChatbotWithKeySample.java diff --git a/sdk/openai/azure-ai-openai/src/samples/java/com/azure/ai/openai/impl/ReadmeSamples.java b/sdk/openai/azure-ai-openai/src/samples/java/com/azure/ai/openai/impl/ReadmeSamples.java index 3384e3cb3e2f..fbc1283c69f6 100644 --- a/sdk/openai/azure-ai-openai/src/samples/java/com/azure/ai/openai/impl/ReadmeSamples.java +++ b/sdk/openai/azure-ai-openai/src/samples/java/com/azure/ai/openai/impl/ReadmeSamples.java @@ -6,6 +6,10 @@ import com.azure.ai.openai.OpenAIAsyncClient; import com.azure.ai.openai.OpenAIClient; import com.azure.ai.openai.OpenAIClientBuilder; +import com.azure.ai.openai.models.AudioTranscription; +import com.azure.ai.openai.models.AudioTranscriptionFormat; +import com.azure.ai.openai.models.AudioTranscriptionOptions; +import com.azure.ai.openai.models.AudioTranslationOptions; import com.azure.ai.openai.models.ChatChoice; import com.azure.ai.openai.models.ChatCompletions; import com.azure.ai.openai.models.ChatCompletionsOptions; @@ -25,11 +29,14 @@ import com.azure.core.credential.TokenCredential; import com.azure.core.http.ProxyOptions; import com.azure.core.models.ResponseError; +import com.azure.core.util.BinaryData; import com.azure.core.util.HttpClientOptions; import com.azure.core.util.IterableStream; import com.azure.identity.DefaultAzureCredentialBuilder; import java.net.InetSocketAddress; +import java.nio.file.Path; +import java.nio.file.Paths; import java.util.ArrayList; import java.util.Arrays; import java.util.List; @@ -221,4 +228,34 @@ public void imageGeneration() { } // END: readme-sample-imageGeneration } + + public void audioTranscription() { + // BEGIN: readme-sample-audioTranscription + String fileName = "{your-file-name}"; + Path filePath = Paths.get("{your-file-path}" + fileName); + + byte[] file = BinaryData.fromFile(filePath).toBytes(); + AudioTranscriptionOptions transcriptionOptions = new AudioTranscriptionOptions(file) + .setResponseFormat(AudioTranscriptionFormat.JSON); + + AudioTranscription transcription = client.getAudioTranscription("{deploymentOrModelId}", transcriptionOptions, fileName); + + System.out.println("Transcription: " + transcription.getText()); + // END: readme-sample-audioTranscription + } + + public void audioTranslation() { + // BEGIN: readme-sample-audioTranslation + String fileName = "{your-file-name}"; + Path filePath = Paths.get("{your-file-path}" + fileName); + + byte[] file = BinaryData.fromFile(filePath).toBytes(); + AudioTranslationOptions translationOptions = new AudioTranslationOptions(file) + .setResponseFormat(AudioTranscriptionFormat.JSON); + + AudioTranscription translation = client.getAudioTranslation("{deploymentOrModelId}", translationOptions, fileName); + + System.out.println("Translation: " + translation.getText()); + // END: readme-sample-audioTranslation + } } diff --git a/sdk/openai/azure-ai-openai/src/samples/java/com/azure/ai/openai/resources/JP_it_is_rainy_today.wav b/sdk/openai/azure-ai-openai/src/samples/java/com/azure/ai/openai/resources/JP_it_is_rainy_today.wav new file mode 100644 index 000000000000..5970c85ec1cd Binary files /dev/null and b/sdk/openai/azure-ai-openai/src/samples/java/com/azure/ai/openai/resources/JP_it_is_rainy_today.wav differ diff --git a/sdk/openai/azure-ai-openai/src/samples/java/com/azure/ai/openai/resources/batman.wav b/sdk/openai/azure-ai-openai/src/samples/java/com/azure/ai/openai/resources/batman.wav new file mode 100644 index 000000000000..4c0b7248a39c Binary files /dev/null and b/sdk/openai/azure-ai-openai/src/samples/java/com/azure/ai/openai/resources/batman.wav differ diff --git a/sdk/openai/azure-ai-openai/src/samples/java/com/azure/ai/openai/usage/AudioTranscriptionAsyncSample.java b/sdk/openai/azure-ai-openai/src/samples/java/com/azure/ai/openai/usage/AudioTranscriptionAsyncSample.java new file mode 100644 index 000000000000..af28ba442624 --- /dev/null +++ b/sdk/openai/azure-ai-openai/src/samples/java/com/azure/ai/openai/usage/AudioTranscriptionAsyncSample.java @@ -0,0 +1,52 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +package com.azure.ai.openai.usage; + +import com.azure.ai.openai.OpenAIAsyncClient; +import com.azure.ai.openai.OpenAIClientBuilder; +import com.azure.ai.openai.models.AudioTranscriptionFormat; +import com.azure.ai.openai.models.AudioTranscriptionOptions; +import com.azure.core.credential.AzureKeyCredential; +import com.azure.core.util.BinaryData; + +import java.nio.file.Path; +import java.nio.file.Paths; +import java.util.concurrent.TimeUnit; + +/** + * An asynchronous sample demonstrates how to transcript a given audio file. + */ +public class AudioTranscriptionAsyncSample { + /** + * Runs the sample algorithm and demonstrates how to transcript a given audio file. + * + * @param args Unused. Arguments to the program. + */ + public static void main(String[] args) throws InterruptedException { + String azureOpenaiKey = "{azure-open-ai-key}"; + String endpoint = "{azure-open-ai-endpoint}"; + String deploymentOrModelId = "{azure-open-ai-deployment-model-id}"; + String fileName = "batman.wav"; + Path filePath = Paths.get("src/samples/java/com/azure/ai/openai/resources/" + fileName); + + OpenAIAsyncClient client = new OpenAIClientBuilder() + .endpoint(endpoint) + .credential(new AzureKeyCredential(azureOpenaiKey)) + .buildAsyncClient(); + + byte[] file = BinaryData.fromFile(filePath).toBytes(); + AudioTranscriptionOptions transcriptionOptions = new AudioTranscriptionOptions(file) + .setResponseFormat(AudioTranscriptionFormat.JSON); + + client.getAudioTranscription(deploymentOrModelId, transcriptionOptions, fileName) + .subscribe(transcription -> { + System.out.println("Transcription: " + transcription.getText()); + }); + + // The .subscribe() creation and assignment is not a blocking call. For the purpose of this example, we sleep + // the thread so the program does not end before the send operation is complete. Using .block() instead of + // .subscribe() will turn this into a synchronous call. + TimeUnit.SECONDS.sleep(10); + } +} diff --git a/sdk/openai/azure-ai-openai/src/samples/java/com/azure/ai/openai/usage/AudioTranscriptionSample.java b/sdk/openai/azure-ai-openai/src/samples/java/com/azure/ai/openai/usage/AudioTranscriptionSample.java new file mode 100644 index 000000000000..d9dec1cd620a --- /dev/null +++ b/sdk/openai/azure-ai-openai/src/samples/java/com/azure/ai/openai/usage/AudioTranscriptionSample.java @@ -0,0 +1,49 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +package com.azure.ai.openai.usage; + +import com.azure.ai.openai.OpenAIClient; +import com.azure.ai.openai.OpenAIClientBuilder; +import com.azure.ai.openai.models.AudioTranscription; +import com.azure.ai.openai.models.AudioTranscriptionFormat; +import com.azure.ai.openai.models.AudioTranscriptionOptions; +import com.azure.core.credential.AzureKeyCredential; +import com.azure.core.util.BinaryData; + +import java.nio.file.Path; +import java.nio.file.Paths; + +/** + * A sample demonstrates how to transcript a given audio file. + */ +public class AudioTranscriptionSample { + /** + * Runs the sample algorithm and demonstrates how to get the images for a given prompt. + * + * @param args Unused. Arguments to the program. + */ + public static void main(String[] args) { + String azureOpenaiKey = "{azure-open-ai-key}"; + String endpoint = "{azure-open-ai-endpoint}"; + String deploymentOrModelId = "{azure-open-ai-deployment-model-id}"; + String fileName = "batman.wav"; + Path filePath = Paths.get("src/samples/java/com/azure/ai/openai/resources/" + fileName); + + OpenAIClient client = new OpenAIClientBuilder() + .endpoint(endpoint) + .credential(new AzureKeyCredential(azureOpenaiKey)) + .buildClient(); + + byte[] file = BinaryData.fromFile(filePath).toBytes(); + AudioTranscriptionOptions transcriptionOptions = new AudioTranscriptionOptions(file) + .setResponseFormat(AudioTranscriptionFormat.JSON); + + AudioTranscription transcription = client.getAudioTranscription(deploymentOrModelId, transcriptionOptions, fileName); + + System.out.println("Transcription: " + transcription.getText()); + + } + + +} diff --git a/sdk/openai/azure-ai-openai/src/samples/java/com/azure/ai/openai/usage/AudioTranslationAsyncSample.java b/sdk/openai/azure-ai-openai/src/samples/java/com/azure/ai/openai/usage/AudioTranslationAsyncSample.java new file mode 100644 index 000000000000..7f1d190bf233 --- /dev/null +++ b/sdk/openai/azure-ai-openai/src/samples/java/com/azure/ai/openai/usage/AudioTranslationAsyncSample.java @@ -0,0 +1,51 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +package com.azure.ai.openai.usage; + +import com.azure.ai.openai.OpenAIAsyncClient; +import com.azure.ai.openai.OpenAIClientBuilder; +import com.azure.ai.openai.models.AudioTranscriptionFormat; +import com.azure.ai.openai.models.AudioTranslationOptions; +import com.azure.core.credential.AzureKeyCredential; +import com.azure.core.util.BinaryData; + +import java.nio.file.Path; +import java.nio.file.Paths; +import java.util.concurrent.TimeUnit; + +/** + * An asynchronous sample demonstrates how to translate a given audio file. + */ +public class AudioTranslationAsyncSample { + /** + * Runs the sample algorithm and demonstrates how to translate a given audio file. + * + * @param args Unused. Arguments to the program. + */ + public static void main(String[] args) throws InterruptedException { + String azureOpenaiKey = "{azure-open-ai-key}"; + String endpoint = "{azure-open-ai-endpoint}"; + String deploymentOrModelId = "{azure-open-ai-deployment-model-id}"; + String fileName = "JP_it_is_rainy_today.wav"; + Path filePath = Paths.get("src/samples/java/com/azure/ai/openai/resources/" + fileName); + + OpenAIAsyncClient client = new OpenAIClientBuilder() + .endpoint(endpoint) + .credential(new AzureKeyCredential(azureOpenaiKey)) + .buildAsyncClient(); + byte[] file = BinaryData.fromFile(filePath).toBytes(); + AudioTranslationOptions translationOptions = new AudioTranslationOptions(file) + .setResponseFormat(AudioTranscriptionFormat.JSON); + + client.getAudioTranslation(deploymentOrModelId, translationOptions, fileName) + .subscribe(translation -> { + System.out.println("Translation: " + translation.getText()); + }); + + // The .subscribe() creation and assignment is not a blocking call. For the purpose of this example, we sleep + // the thread so the program does not end before the send operation is complete. Using .block() instead of + // .subscribe() will turn this into a synchronous call. + TimeUnit.SECONDS.sleep(10); + } +} diff --git a/sdk/openai/azure-ai-openai/src/samples/java/com/azure/ai/openai/usage/AudioTranslationSample.java b/sdk/openai/azure-ai-openai/src/samples/java/com/azure/ai/openai/usage/AudioTranslationSample.java new file mode 100644 index 000000000000..c50def2081af --- /dev/null +++ b/sdk/openai/azure-ai-openai/src/samples/java/com/azure/ai/openai/usage/AudioTranslationSample.java @@ -0,0 +1,45 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +package com.azure.ai.openai.usage; + +import com.azure.ai.openai.OpenAIClient; +import com.azure.ai.openai.OpenAIClientBuilder; +import com.azure.ai.openai.models.AudioTranscription; +import com.azure.ai.openai.models.AudioTranscriptionFormat; +import com.azure.ai.openai.models.AudioTranslationOptions; +import com.azure.core.credential.AzureKeyCredential; +import com.azure.core.util.BinaryData; + +import java.nio.file.Path; +import java.nio.file.Paths; + +/** + * A sample demonstrates how to translate a given audio file. + */ +public class AudioTranslationSample { + /** + * Runs the sample algorithm and demonstrates how to translate a given audio file. + * + * @param args Unused. Arguments to the program. + */ + public static void main(String[] args) { + String azureOpenaiKey = "{azure-open-ai-key}"; + String endpoint = "{azure-open-ai-endpoint}"; + String deploymentOrModelId = "{azure-open-ai-deployment-model-id}"; + String fileName = "JP_it_is_rainy_today.wav"; + Path filePath = Paths.get("src/samples/java/com/azure/ai/openai/resources/" + fileName); + + OpenAIClient client = new OpenAIClientBuilder() + .endpoint(endpoint) + .credential(new AzureKeyCredential(azureOpenaiKey)) + .buildClient(); + byte[] file = BinaryData.fromFile(filePath).toBytes(); + AudioTranslationOptions translationOptions = new AudioTranslationOptions(file) + .setResponseFormat(AudioTranscriptionFormat.JSON); + + AudioTranscription translation = client.getAudioTranslation(deploymentOrModelId, translationOptions, fileName); + + System.out.println("Translation: " + translation.getText()); + } +} diff --git a/sdk/openai/azure-ai-openai/src/test/java/com/azure/ai/openai/OpenAIClientTestBase.java b/sdk/openai/azure-ai-openai/src/test/java/com/azure/ai/openai/OpenAIClientTestBase.java index 676575ab8571..d7bde0a8b948 100644 --- a/sdk/openai/azure-ai-openai/src/test/java/com/azure/ai/openai/OpenAIClientTestBase.java +++ b/sdk/openai/azure-ai-openai/src/test/java/com/azure/ai/openai/OpenAIClientTestBase.java @@ -40,6 +40,7 @@ import org.junit.jupiter.api.Test; import java.nio.file.Path; +import java.nio.file.Paths; import java.util.ArrayList; import java.util.Arrays; import java.util.List; @@ -251,7 +252,7 @@ private ChatCompletionsOptions getChatMessagesWithFunction() { } static Path openTestResourceFile(String fileName) { - return Path.of("src/test/resources/" + fileName); + return Paths.get("src/test/resources/" + fileName); } static void assertCompletions(int choicesPerPrompt, Completions actual) {