diff --git a/.vscode/cspell.json b/.vscode/cspell.json index 800eaef4b22c..490349022c7a 100644 --- a/.vscode/cspell.json +++ b/.vscode/cspell.json @@ -330,6 +330,7 @@ "Mockito", "Mordor", "mosca", + "mpga", "msal", "msix", "MSRC", diff --git a/eng/code-quality-reports/src/main/resources/spotbugs/spotbugs-exclude.xml b/eng/code-quality-reports/src/main/resources/spotbugs/spotbugs-exclude.xml index 4fd5b0cf37d3..3586a122fa77 100644 --- a/eng/code-quality-reports/src/main/resources/spotbugs/spotbugs-exclude.xml +++ b/eng/code-quality-reports/src/main/resources/spotbugs/spotbugs-exclude.xml @@ -2697,4 +2697,18 @@ + + + + + + + + + + + + + + diff --git a/sdk/openai/azure-ai-openai/CHANGELOG.md b/sdk/openai/azure-ai-openai/CHANGELOG.md index c82b24b6462f..d327aded5c9b 100644 --- a/sdk/openai/azure-ai-openai/CHANGELOG.md +++ b/sdk/openai/azure-ai-openai/CHANGELOG.md @@ -4,6 +4,10 @@ ### Features Added +- Support for `Whisper` endpoints was added. +- Translation and Transcription of audio files is available +- The above features are available both in Azure and non-Azure OpenAI + ### Breaking Changes ### Bugs Fixed diff --git a/sdk/openai/azure-ai-openai/README.md b/sdk/openai/azure-ai-openai/README.md index 2371d9320b52..b0a33e95d9af 100644 --- a/sdk/openai/azure-ai-openai/README.md +++ b/sdk/openai/azure-ai-openai/README.md @@ -19,6 +19,8 @@ For concrete examples you can have a look at the following links. Some of the mo * [Streaming chat completions sample](#streaming-chat-completions "Streaming chat completions") * [Embeddings sample](#text-embeddings "Text Embeddings") * [Image Generation sample](#image-generation "Image Generation") +* [Audio Transcription sample](#audio-transcription "Audio Transcription") +* [Audio Translation sample](#audio-translation "Audio Translation") If you want to see the full code for these snippets check out our [samples folder][samples_folder]. @@ -150,6 +152,8 @@ The following sections provide several code snippets covering some of the most c * [Streaming chat completions sample](#streaming-chat-completions "Streaming chat completions") * [Embeddings sample](#text-embeddings "Text Embeddings") * [Image Generation sample](#image-generation "Image Generation") +* [Audio Transcription sample](#audio-transcription "Audio Transcription") +* [Audio Translation sample](#audio-translation "Audio Translation") ### Text completions @@ -286,6 +290,44 @@ for (ImageLocation imageLocation : images.getData()) { For a complete sample example, see sample [Image Generation][sample_image_generation]. +### Audio Transcription +The OpenAI service starts supporting `audio transcription` with the introduction of `Whisper` models. +The following code snippet shows how to use the service to transcribe audio. + +```java readme-sample-audioTranscription +String fileName = "{your-file-name}"; +Path filePath = Paths.get("{your-file-path}" + fileName); + +byte[] file = BinaryData.fromFile(filePath).toBytes(); +AudioTranscriptionOptions transcriptionOptions = new AudioTranscriptionOptions(file) + .setResponseFormat(AudioTranscriptionFormat.JSON); + +AudioTranscription transcription = client.getAudioTranscription("{deploymentOrModelId}", fileName, transcriptionOptions); + +System.out.println("Transcription: " + transcription.getText()); +``` +For a complete sample example, see sample [Audio Transcription][sample_audio_transcription]. +Please refer to the service documentation for a conceptual discussion of [Whisper][microsoft_docs_whisper_model]. + +### Audio Translation +The OpenAI service starts supporting `audio translation` with the introduction of `Whisper` models. +The following code snippet shows how to use the service to translate audio. + +```java readme-sample-audioTranslation +String fileName = "{your-file-name}"; +Path filePath = Paths.get("{your-file-path}" + fileName); + +byte[] file = BinaryData.fromFile(filePath).toBytes(); +AudioTranslationOptions translationOptions = new AudioTranslationOptions(file) + .setResponseFormat(AudioTranscriptionFormat.JSON); + +AudioTranscription translation = client.getAudioTranslation("{deploymentOrModelId}", fileName, translationOptions); + +System.out.println("Translation: " + translation.getText()); +``` +For a complete sample example, see sample [Audio Translation][sample_audio_translation]. +Please refer to the service documentation for a conceptual discussion of [Whisper][microsoft_docs_whisper_model]. + ## Troubleshooting ### Enable client logging You can set the `AZURE_LOG_LEVEL` environment variable to view logging statements made in the client library. For @@ -327,6 +369,7 @@ For details on contributing to this repository, see the [contributing guide](htt [logLevels]: https://github.com/Azure/azure-sdk-for-java/blob/main/sdk/core/azure-core/src/main/java/com/azure/core/util/logging/ClientLogger.java [microsoft_docs_openai_completion]: https://learn.microsoft.com/azure/cognitive-services/openai/how-to/completions [microsoft_docs_openai_embedding]: https://learn.microsoft.com/azure/cognitive-services/openai/concepts/understand-embeddings +[microsoft_docs_whisper_model]: https://learn.microsoft.com/azure/ai-services/openai/whisper-quickstart?tabs=command-line [non_azure_openai_authentication]: https://platform.openai.com/docs/api-reference/authentication [performance_tuning]: https://github.com/Azure/azure-sdk-for-java/wiki/Performance-Tuning [product_documentation]: https://azure.microsoft.com/services/ @@ -342,6 +385,8 @@ For details on contributing to this repository, see the [contributing guide](htt [sample_get_completions_streaming]: https://github.com/Azure/azure-sdk-for-java/blob/main/sdk/openai/azure-ai-openai/src/samples/java/com/azure/ai/openai/usage/GetCompletionsStreamSample.java [sample_get_embedding]: https://github.com/Azure/azure-sdk-for-java/blob/main/sdk/openai/azure-ai-openai/src/samples/java/com/azure/ai/openai/usage/GetEmbeddingsSample.java [sample_image_generation]: https://github.com/Azure/azure-sdk-for-java/blob/main/sdk/openai/azure-ai-openai/src/samples/java/com/azure/ai/openai/usage/GetImagesSample.java +[sample_audio_transcription]: https://github.com/Azure/azure-sdk-for-java/blob/main/sdk/openai/azure-ai-openai/src/samples/java/com/azure/ai/openai/usage/AudioTranscriptionSample.java +[sample_audio_translation]: https://github.com/Azure/azure-sdk-for-java/blob/main/sdk/openai/azure-ai-openai/src/samples/java/com/azure/ai/openai/usage/AudioTranslationSample.java [openai_client_async]: https://github.com/Azure/azure-sdk-for-java/blob/main/sdk/openai/azure-ai-openai/src/main/java/com/azure/ai/openai/OpenAIAsyncClient.java [openai_client_builder]: https://github.com/Azure/azure-sdk-for-java/blob/main/sdk/openai/azure-ai-openai/src/main/java/com/azure/ai/openai/OpenAIClientBuilder.java [openai_client_sync]: https://github.com/Azure/azure-sdk-for-java/blob/main/sdk/openai/azure-ai-openai/src/main/java/com/azure/ai/openai/OpenAIClient.java diff --git a/sdk/openai/azure-ai-openai/assets.json b/sdk/openai/azure-ai-openai/assets.json index 4a830f321b44..beb6b5b76cff 100644 --- a/sdk/openai/azure-ai-openai/assets.json +++ b/sdk/openai/azure-ai-openai/assets.json @@ -2,5 +2,5 @@ "AssetsRepo": "Azure/azure-sdk-assets", "AssetsRepoPrefixPath": "java", "TagPrefix": "java/openai/azure-ai-openai", - "Tag": "java/openai/azure-ai-openai_57107e7a09" + "Tag": "java/openai/azure-ai-openai_3c34d9f076" } diff --git a/sdk/openai/azure-ai-openai/src/main/java/com/azure/ai/openai/OpenAIAsyncClient.java b/sdk/openai/azure-ai-openai/src/main/java/com/azure/ai/openai/OpenAIAsyncClient.java index e07549aaa744..aa8e0bea1bf8 100644 --- a/sdk/openai/azure-ai-openai/src/main/java/com/azure/ai/openai/OpenAIAsyncClient.java +++ b/sdk/openai/azure-ai-openai/src/main/java/com/azure/ai/openai/OpenAIAsyncClient.java @@ -3,10 +3,18 @@ // Code generated by Microsoft (R) AutoRest Code Generator. package com.azure.ai.openai; +import static com.azure.core.util.FluxUtil.monoError; + import com.azure.ai.openai.implementation.CompletionsUtils; +import com.azure.ai.openai.implementation.MultipartDataHelper; +import com.azure.ai.openai.implementation.MultipartDataSerializationResult; import com.azure.ai.openai.implementation.NonAzureOpenAIClientImpl; import com.azure.ai.openai.implementation.OpenAIClientImpl; import com.azure.ai.openai.implementation.OpenAIServerSentEvents; +import com.azure.ai.openai.models.AudioTranscription; +import com.azure.ai.openai.models.AudioTranscriptionFormat; +import com.azure.ai.openai.models.AudioTranscriptionOptions; +import com.azure.ai.openai.models.AudioTranslationOptions; import com.azure.ai.openai.models.ChatCompletions; import com.azure.ai.openai.models.ChatCompletionsOptions; import com.azure.ai.openai.models.Completions; @@ -24,12 +32,16 @@ import com.azure.core.exception.HttpResponseException; import com.azure.core.exception.ResourceModifiedException; import com.azure.core.exception.ResourceNotFoundException; +import com.azure.core.http.HttpHeaderName; import com.azure.core.http.rest.RequestOptions; import com.azure.core.http.rest.Response; import com.azure.core.util.BinaryData; import com.azure.core.util.FluxUtil; +import com.azure.core.util.logging.ClientLogger; import com.azure.core.util.polling.PollerFlux; import java.nio.ByteBuffer; +import java.util.ArrayList; +import java.util.List; import reactor.core.publisher.Flux; import reactor.core.publisher.Mono; @@ -39,6 +51,8 @@ public final class OpenAIAsyncClient { @Generated private final OpenAIClientImpl serviceClient; + private static final ClientLogger LOGGER = new ClientLogger(OpenAIAsyncClient.class); + private final NonAzureOpenAIClientImpl openAIServiceClient; /** @@ -657,6 +671,18 @@ PollerFlux beginBeginAzureBatchImageGeneration( * violence (Optional): (recursive schema, see violence above) * hate (Optional): (recursive schema, see hate above) * self_harm (Optional): (recursive schema, see self_harm above) + * error (Optional): { + * code: String (Required) + * message: String (Required) + * target: String (Optional) + * details (Optional): [ + * (recursive schema, see above) + * ] + * innererror (Optional): { + * code: String (Optional) + * innererror (Optional): (recursive schema, see innererror above) + * } + * } * } * } * ] @@ -694,4 +720,512 @@ Mono> getChatCompletionsWithAzureExtensionsWithResponse( return this.serviceClient.getChatCompletionsWithAzureExtensionsWithResponseAsync( deploymentOrModelName, chatCompletionsOptions, requestOptions); } + + /** + * Gets transcribed text and associated metadata from provided spoken audio data. Audio will be transcribed in the + * written language corresponding to the language it was spoken in. + * + *

Request Body Schema + * + *

{@code
+     * {
+     *     file: byte[] (Required)
+     *     response_format: String(json/verbose_json/text/srt/vtt) (Optional)
+     *     language: String (Optional)
+     *     prompt: String (Optional)
+     *     temperature: Double (Optional)
+     *     model: String (Optional)
+     * }
+     * }
+ * + *

Response Body Schema + * + *

{@code
+     * {
+     *     text: String (Required)
+     *     task: String(transcribe/translate) (Optional)
+     *     language: String (Optional)
+     *     duration: Double (Optional)
+     *     segments (Optional): [
+     *          (Optional){
+     *             id: int (Required)
+     *             start: double (Required)
+     *             end: double (Required)
+     *             text: String (Required)
+     *             temperature: double (Required)
+     *             avg_logprob: double (Required)
+     *             compression_ratio: double (Required)
+     *             no_speech_prob: double (Required)
+     *             tokens (Required): [
+     *                 int (Required)
+     *             ]
+     *             seek: int (Required)
+     *         }
+     *     ]
+     * }
+     * }
+ * + * @param deploymentOrModelName Specifies either the model deployment name (when using Azure OpenAI) or model name + * (when using non-Azure OpenAI) to use for this request. + * @param audioTranscriptionOptions The configuration information for an audio transcription request. + * @param requestOptions The options to configure the HTTP request before HTTP client sends it. + * @throws HttpResponseException thrown if the request is rejected by server. + * @throws ClientAuthenticationException thrown if the request is rejected by server on status code 401. + * @throws ResourceNotFoundException thrown if the request is rejected by server on status code 404. + * @throws ResourceModifiedException thrown if the request is rejected by server on status code 409. + * @return transcribed text and associated metadata from provided spoken audio data along with {@link Response} on + * successful completion of {@link Mono}. + */ + @Generated + @ServiceMethod(returns = ReturnType.SINGLE) + public Mono> getAudioTranscriptionAsResponseObjectWithResponse( + String deploymentOrModelName, BinaryData audioTranscriptionOptions, RequestOptions requestOptions) { + return this.serviceClient.getAudioTranscriptionAsResponseObjectWithResponseAsync( + deploymentOrModelName, audioTranscriptionOptions, requestOptions); + } + + /** + * Gets transcribed text and associated metadata from provided spoken audio file data. Audio will be transcribed in + * the written language corresponding to the language it was spoken in. + * + * @param deploymentOrModelName Specifies either the model deployment name (when using Azure OpenAI) or model name + * (when using non-Azure OpenAI) to use for this request. + * @param fileName The file name that is represented in the {@code file} field of {@link AudioTranscriptionOptions} + * @param audioTranscriptionOptions The configuration information for an audio transcription request. + * @throws IllegalArgumentException thrown if parameters fail the validation. + * @throws HttpResponseException thrown if the request is rejected by server. + * @throws ClientAuthenticationException thrown if the request is rejected by server on status code 401. + * @throws ResourceNotFoundException thrown if the request is rejected by server on status code 404. + * @throws ResourceModifiedException thrown if the request is rejected by server on status code 409. + * @throws RuntimeException all other wrapped checked exceptions if the request fails to be sent. + * @return {@link AudioTranscription} transcribed text and associated metadata from provided spoken audio data on + * successful completion of {@link Mono}. + */ + @ServiceMethod(returns = ReturnType.SINGLE) + public Mono getAudioTranscription( + String deploymentOrModelName, String fileName, AudioTranscriptionOptions audioTranscriptionOptions) { + // checking allowed formats for a JSON response + List acceptedFormats = new ArrayList<>(); + acceptedFormats.add(AudioTranscriptionFormat.JSON); + acceptedFormats.add(AudioTranscriptionFormat.VERBOSE_JSON); + if (!acceptedFormats.contains(audioTranscriptionOptions.getResponseFormat())) { + return monoError( + LOGGER, new IllegalArgumentException("This operation does not support the requested audio format")); + } + // embedding the `model` in the request for non-Azure case + if (this.openAIServiceClient != null) { + audioTranscriptionOptions.setModel(deploymentOrModelName); + } + MultipartDataHelper helper = new MultipartDataHelper(); + MultipartDataSerializationResult result = helper.serializeRequest(audioTranscriptionOptions, fileName); + String multipartBoundary = helper.getBoundary(); + RequestOptions requestOptions = new RequestOptions(); + requestOptions + .setHeader(HttpHeaderName.CONTENT_TYPE, "multipart/form-data;" + " boundary=" + multipartBoundary) + .setHeader(HttpHeaderName.CONTENT_LENGTH, String.valueOf(result.getDataLength())); + Mono> response = + openAIServiceClient != null + ? this.openAIServiceClient.getAudioTranscriptionAsResponseObjectWithResponseAsync( + deploymentOrModelName, result.getData(), requestOptions) + : this.serviceClient.getAudioTranscriptionAsResponseObjectWithResponseAsync( + deploymentOrModelName, result.getData(), requestOptions); + return response.map(binaryData -> binaryData.getValue().toObject(AudioTranscription.class)); + } + + /** + * Gets transcribed text and associated metadata from provided spoken audio file data. Audio will be transcribed in + * the written language corresponding to the language it was spoken in. + * + * @param deploymentOrModelName Specifies either the model deployment name (when using Azure OpenAI) or model name + * (when using non-Azure OpenAI) to use for this request. + * @param fileName The file name that is represented in the {@code file} field of {@link AudioTranscriptionOptions} + * @param audioTranscriptionOptions The configuration information for an audio transcription request. + * @throws IllegalArgumentException thrown if parameters fail the validation. + * @throws HttpResponseException thrown if the request is rejected by server. + * @throws ClientAuthenticationException thrown if the request is rejected by server on status code 401. + * @throws ResourceNotFoundException thrown if the request is rejected by server on status code 404. + * @throws ResourceModifiedException thrown if the request is rejected by server on status code 409. + * @throws RuntimeException all other wrapped checked exceptions if the request fails to be sent. + * @return transcribed text and associated metadata from provided spoken audio data on successful completion of + * {@link Mono}. + */ + @ServiceMethod(returns = ReturnType.SINGLE) + public Mono getAudioTranscriptionText( + String deploymentOrModelName, String fileName, AudioTranscriptionOptions audioTranscriptionOptions) { + // checking allowed formats for a plain text response + List acceptedFormats = new ArrayList<>(); + acceptedFormats.add(AudioTranscriptionFormat.TEXT); + acceptedFormats.add(AudioTranscriptionFormat.VTT); + acceptedFormats.add(AudioTranscriptionFormat.SRT); + if (!acceptedFormats.contains(audioTranscriptionOptions.getResponseFormat())) { + return monoError( + LOGGER, new IllegalArgumentException("This operation does not support the requested audio format")); + } + // embedding the `model` in the request for non-Azure case + if (this.openAIServiceClient != null) { + audioTranscriptionOptions.setModel(deploymentOrModelName); + } + MultipartDataHelper helper = new MultipartDataHelper(); + MultipartDataSerializationResult result = helper.serializeRequest(audioTranscriptionOptions, fileName); + String multipartBoundary = helper.getBoundary(); + RequestOptions requestOptions = new RequestOptions(); + requestOptions + .setHeader(HttpHeaderName.CONTENT_TYPE, "multipart/form-data;" + " boundary=" + multipartBoundary) + .setHeader(HttpHeaderName.CONTENT_LENGTH, String.valueOf(result.getDataLength())); + Mono> response = + openAIServiceClient != null + ? this.openAIServiceClient.getAudioTranscriptionAsPlainTextWithResponseAsync( + deploymentOrModelName, result.getData(), requestOptions) + : this.serviceClient.getAudioTranscriptionAsPlainTextWithResponseAsync( + deploymentOrModelName, result.getData(), requestOptions); + return response.map(binaryData -> binaryData.getValue().toString()); + } + + /** + * Gets English language transcribed text and associated metadata from provided spoken audio file data. + * + * @param deploymentOrModelName Specifies either the model deployment name (when using Azure OpenAI) or model name + * (when using non-Azure OpenAI) to use for this request. + * @param fileName The file name that is represented in the {@code file} field of {@link AudioTranslationOptions} + * @param audioTranslationOptions The configuration information for an audio translation request. + * @throws IllegalArgumentException thrown if parameters fail the validation. + * @throws HttpResponseException thrown if the request is rejected by server. + * @throws ClientAuthenticationException thrown if the request is rejected by server on status code 401. + * @throws ResourceNotFoundException thrown if the request is rejected by server on status code 404. + * @throws ResourceModifiedException thrown if the request is rejected by server on status code 409. + * @throws RuntimeException all other wrapped checked exceptions if the request fails to be sent. + * @return {@link AudioTranscription} english language transcribed text and associated metadata from provided spoken + * audio file data on successful completion of {@link Mono}. + */ + @ServiceMethod(returns = ReturnType.SINGLE) + public Mono getAudioTranslation( + String deploymentOrModelName, String fileName, AudioTranslationOptions audioTranslationOptions) { + // checking allowed formats for a JSON response + List acceptedFormats = new ArrayList<>(); + acceptedFormats.add(AudioTranscriptionFormat.JSON); + acceptedFormats.add(AudioTranscriptionFormat.VERBOSE_JSON); + if (!acceptedFormats.contains(audioTranslationOptions.getResponseFormat())) { + return monoError( + LOGGER, new IllegalArgumentException("This operation does not support the requested audio format")); + } + // embedding the `model` in the request for non-Azure case + if (this.openAIServiceClient != null) { + audioTranslationOptions.setModel(deploymentOrModelName); + } + MultipartDataHelper helper = new MultipartDataHelper(); + MultipartDataSerializationResult result = helper.serializeRequest(audioTranslationOptions, fileName); + String multipartBoundary = helper.getBoundary(); + RequestOptions requestOptions = new RequestOptions(); + requestOptions + .setHeader(HttpHeaderName.CONTENT_TYPE, "multipart/form-data;" + " boundary=" + multipartBoundary) + .setHeader(HttpHeaderName.CONTENT_LENGTH, String.valueOf(result.getDataLength())); + Mono> response = + openAIServiceClient != null + ? this.openAIServiceClient.getAudioTranslationAsResponseObjectWithResponseAsync( + deploymentOrModelName, result.getData(), requestOptions) + : this.serviceClient.getAudioTranslationAsResponseObjectWithResponseAsync( + deploymentOrModelName, result.getData(), requestOptions); + return response.map(binaryData -> binaryData.getValue().toObject(AudioTranscription.class)); + } + + /** + * Gets English language transcribed text and associated metadata from provided spoken audio file data. + * + * @param deploymentOrModelName Specifies either the model deployment name (when using Azure OpenAI) or model name + * (when using non-Azure OpenAI) to use for this request. + * @param fileName The file name that is represented in the {@code file} field of {@link AudioTranslationOptions}. + * @param audioTranslationOptions The configuration information for an audio translation request. + * @throws IllegalArgumentException thrown if parameters fail the validation. + * @throws HttpResponseException thrown if the request is rejected by server. + * @throws ClientAuthenticationException thrown if the request is rejected by server on status code 401. + * @throws ResourceNotFoundException thrown if the request is rejected by server on status code 404. + * @throws ResourceModifiedException thrown if the request is rejected by server on status code 409. + * @throws RuntimeException all other wrapped checked exceptions if the request fails to be sent. + * @return english language transcribed text and associated metadata from provided spoken audio file data on + * successful completion of {@link Mono}. + */ + @ServiceMethod(returns = ReturnType.SINGLE) + public Mono getAudioTranslationText( + String deploymentOrModelName, String fileName, AudioTranslationOptions audioTranslationOptions) { + // checking allowed formats for a plain text response + List acceptedFormats = new ArrayList<>(); + acceptedFormats.add(AudioTranscriptionFormat.TEXT); + acceptedFormats.add(AudioTranscriptionFormat.VTT); + acceptedFormats.add(AudioTranscriptionFormat.SRT); + if (!acceptedFormats.contains(audioTranslationOptions.getResponseFormat())) { + return monoError( + LOGGER, new IllegalArgumentException("This operation does not support the requested audio format")); + } + // embedding the `model` in the request for non-Azure case + if (this.openAIServiceClient != null) { + audioTranslationOptions.setModel(deploymentOrModelName); + } + MultipartDataHelper helper = new MultipartDataHelper(); + MultipartDataSerializationResult result = helper.serializeRequest(audioTranslationOptions, fileName); + String multipartBoundary = helper.getBoundary(); + RequestOptions requestOptions = new RequestOptions(); + requestOptions + .setHeader(HttpHeaderName.CONTENT_TYPE, "multipart/form-data;" + " boundary=" + multipartBoundary) + .setHeader(HttpHeaderName.CONTENT_LENGTH, String.valueOf(result.getDataLength())); + Mono> response = + openAIServiceClient != null + ? this.openAIServiceClient.getAudioTranslationAsPlainTextWithResponseAsync( + deploymentOrModelName, result.getData(), requestOptions) + : this.serviceClient.getAudioTranslationAsPlainTextWithResponseAsync( + deploymentOrModelName, result.getData(), requestOptions); + return response.map(binaryData -> binaryData.getValue().toString()); + } + + /** + * Gets transcribed text and associated metadata from provided spoken audio data. Audio will be transcribed in the + * written language corresponding to the language it was spoken in. + * + *

Request Body Schema + * + *

{@code
+     * {
+     *     file: byte[] (Required)
+     *     response_format: String(json/verbose_json/text/srt/vtt) (Optional)
+     *     language: String (Optional)
+     *     prompt: String (Optional)
+     *     temperature: Double (Optional)
+     *     model: String (Optional)
+     * }
+     * }
+ * + *

Response Body Schema + * + *

{@code
+     * String
+     * }
+ * + * @param deploymentOrModelName Specifies either the model deployment name (when using Azure OpenAI) or model name + * (when using non-Azure OpenAI) to use for this request. + * @param audioTranscriptionOptions The configuration information for an audio transcription request. + * @param requestOptions The options to configure the HTTP request before HTTP client sends it. + * @throws HttpResponseException thrown if the request is rejected by server. + * @throws ClientAuthenticationException thrown if the request is rejected by server on status code 401. + * @throws ResourceNotFoundException thrown if the request is rejected by server on status code 404. + * @throws ResourceModifiedException thrown if the request is rejected by server on status code 409. + * @return transcribed text and associated metadata from provided spoken audio data along with {@link Response} on + * successful completion of {@link Mono}. + */ + @Generated + @ServiceMethod(returns = ReturnType.SINGLE) + public Mono> getAudioTranscriptionAsPlainTextWithResponse( + String deploymentOrModelName, BinaryData audioTranscriptionOptions, RequestOptions requestOptions) { + return this.serviceClient.getAudioTranscriptionAsPlainTextWithResponseAsync( + deploymentOrModelName, audioTranscriptionOptions, requestOptions); + } + + /** + * Gets English language transcribed text and associated metadata from provided spoken audio data. + * + *

Request Body Schema + * + *

{@code
+     * {
+     *     file: byte[] (Required)
+     *     response_format: String(json/verbose_json/text/srt/vtt) (Optional)
+     *     prompt: String (Optional)
+     *     temperature: Double (Optional)
+     *     model: String (Optional)
+     * }
+     * }
+ * + *

Response Body Schema + * + *

{@code
+     * {
+     *     text: String (Required)
+     *     task: String(transcribe/translate) (Optional)
+     *     language: String (Optional)
+     *     duration: Double (Optional)
+     *     segments (Optional): [
+     *          (Optional){
+     *             id: int (Required)
+     *             start: double (Required)
+     *             end: double (Required)
+     *             text: String (Required)
+     *             temperature: double (Required)
+     *             avg_logprob: double (Required)
+     *             compression_ratio: double (Required)
+     *             no_speech_prob: double (Required)
+     *             tokens (Required): [
+     *                 int (Required)
+     *             ]
+     *             seek: int (Required)
+     *         }
+     *     ]
+     * }
+     * }
+ * + * @param deploymentOrModelName Specifies either the model deployment name (when using Azure OpenAI) or model name + * (when using non-Azure OpenAI) to use for this request. + * @param audioTranslationOptions The configuration information for an audio translation request. + * @param requestOptions The options to configure the HTTP request before HTTP client sends it. + * @throws HttpResponseException thrown if the request is rejected by server. + * @throws ClientAuthenticationException thrown if the request is rejected by server on status code 401. + * @throws ResourceNotFoundException thrown if the request is rejected by server on status code 404. + * @throws ResourceModifiedException thrown if the request is rejected by server on status code 409. + * @return english language transcribed text and associated metadata from provided spoken audio data along with + * {@link Response} on successful completion of {@link Mono}. + */ + @Generated + @ServiceMethod(returns = ReturnType.SINGLE) + public Mono> getAudioTranslationAsResponseObjectWithResponse( + String deploymentOrModelName, BinaryData audioTranslationOptions, RequestOptions requestOptions) { + return this.serviceClient.getAudioTranslationAsResponseObjectWithResponseAsync( + deploymentOrModelName, audioTranslationOptions, requestOptions); + } + + /** + * Gets English language transcribed text and associated metadata from provided spoken audio data. + * + *

Request Body Schema + * + *

{@code
+     * {
+     *     file: byte[] (Required)
+     *     response_format: String(json/verbose_json/text/srt/vtt) (Optional)
+     *     prompt: String (Optional)
+     *     temperature: Double (Optional)
+     *     model: String (Optional)
+     * }
+     * }
+ * + *

Response Body Schema + * + *

{@code
+     * String
+     * }
+ * + * @param deploymentOrModelName Specifies either the model deployment name (when using Azure OpenAI) or model name + * (when using non-Azure OpenAI) to use for this request. + * @param audioTranslationOptions The configuration information for an audio translation request. + * @param requestOptions The options to configure the HTTP request before HTTP client sends it. + * @throws HttpResponseException thrown if the request is rejected by server. + * @throws ClientAuthenticationException thrown if the request is rejected by server on status code 401. + * @throws ResourceNotFoundException thrown if the request is rejected by server on status code 404. + * @throws ResourceModifiedException thrown if the request is rejected by server on status code 409. + * @return english language transcribed text and associated metadata from provided spoken audio data along with + * {@link Response} on successful completion of {@link Mono}. + */ + @Generated + @ServiceMethod(returns = ReturnType.SINGLE) + public Mono> getAudioTranslationAsPlainTextWithResponse( + String deploymentOrModelName, BinaryData audioTranslationOptions, RequestOptions requestOptions) { + return this.serviceClient.getAudioTranslationAsPlainTextWithResponseAsync( + deploymentOrModelName, audioTranslationOptions, requestOptions); + } + + /** + * Gets transcribed text and associated metadata from provided spoken audio data. Audio will be transcribed in the + * written language corresponding to the language it was spoken in. + * + * @param deploymentOrModelName Specifies either the model deployment name (when using Azure OpenAI) or model name + * (when using non-Azure OpenAI) to use for this request. + * @param audioTranscriptionOptions The configuration information for an audio transcription request. + * @throws IllegalArgumentException thrown if parameters fail the validation. + * @throws HttpResponseException thrown if the request is rejected by server. + * @throws ClientAuthenticationException thrown if the request is rejected by server on status code 401. + * @throws ResourceNotFoundException thrown if the request is rejected by server on status code 404. + * @throws ResourceModifiedException thrown if the request is rejected by server on status code 409. + * @throws RuntimeException all other wrapped checked exceptions if the request fails to be sent. + * @return transcribed text and associated metadata from provided spoken audio data on successful completion of + * {@link Mono}. + */ + @Generated + @ServiceMethod(returns = ReturnType.SINGLE) + public Mono getAudioTranscriptionAsResponseObject( + String deploymentOrModelName, AudioTranscriptionOptions audioTranscriptionOptions) { + // Generated convenience method for getAudioTranscriptionAsResponseObjectWithResponse + RequestOptions requestOptions = new RequestOptions(); + return getAudioTranscriptionAsResponseObjectWithResponse( + deploymentOrModelName, BinaryData.fromObject(audioTranscriptionOptions), requestOptions) + .flatMap(FluxUtil::toMono) + .map(protocolMethodData -> protocolMethodData.toObject(AudioTranscription.class)); + } + + /** + * Gets transcribed text and associated metadata from provided spoken audio data. Audio will be transcribed in the + * written language corresponding to the language it was spoken in. + * + * @param deploymentOrModelName Specifies either the model deployment name (when using Azure OpenAI) or model name + * (when using non-Azure OpenAI) to use for this request. + * @param audioTranscriptionOptions The configuration information for an audio transcription request. + * @throws IllegalArgumentException thrown if parameters fail the validation. + * @throws HttpResponseException thrown if the request is rejected by server. + * @throws ClientAuthenticationException thrown if the request is rejected by server on status code 401. + * @throws ResourceNotFoundException thrown if the request is rejected by server on status code 404. + * @throws ResourceModifiedException thrown if the request is rejected by server on status code 409. + * @throws RuntimeException all other wrapped checked exceptions if the request fails to be sent. + * @return transcribed text and associated metadata from provided spoken audio data on successful completion of + * {@link Mono}. + */ + @Generated + @ServiceMethod(returns = ReturnType.SINGLE) + public Mono getAudioTranscriptionAsPlainText( + String deploymentOrModelName, AudioTranscriptionOptions audioTranscriptionOptions) { + // Generated convenience method for getAudioTranscriptionAsPlainTextWithResponse + RequestOptions requestOptions = new RequestOptions(); + return getAudioTranscriptionAsPlainTextWithResponse( + deploymentOrModelName, BinaryData.fromObject(audioTranscriptionOptions), requestOptions) + .flatMap(FluxUtil::toMono) + .map(protocolMethodData -> protocolMethodData.toObject(String.class)); + } + + /** + * Gets English language transcribed text and associated metadata from provided spoken audio data. + * + * @param deploymentOrModelName Specifies either the model deployment name (when using Azure OpenAI) or model name + * (when using non-Azure OpenAI) to use for this request. + * @param audioTranslationOptions The configuration information for an audio translation request. + * @throws IllegalArgumentException thrown if parameters fail the validation. + * @throws HttpResponseException thrown if the request is rejected by server. + * @throws ClientAuthenticationException thrown if the request is rejected by server on status code 401. + * @throws ResourceNotFoundException thrown if the request is rejected by server on status code 404. + * @throws ResourceModifiedException thrown if the request is rejected by server on status code 409. + * @throws RuntimeException all other wrapped checked exceptions if the request fails to be sent. + * @return english language transcribed text and associated metadata from provided spoken audio data on successful + * completion of {@link Mono}. + */ + @Generated + @ServiceMethod(returns = ReturnType.SINGLE) + public Mono getAudioTranslationAsResponseObject( + String deploymentOrModelName, AudioTranslationOptions audioTranslationOptions) { + // Generated convenience method for getAudioTranslationAsResponseObjectWithResponse + RequestOptions requestOptions = new RequestOptions(); + return getAudioTranslationAsResponseObjectWithResponse( + deploymentOrModelName, BinaryData.fromObject(audioTranslationOptions), requestOptions) + .flatMap(FluxUtil::toMono) + .map(protocolMethodData -> protocolMethodData.toObject(AudioTranscription.class)); + } + + /** + * Gets English language transcribed text and associated metadata from provided spoken audio data. + * + * @param deploymentOrModelName Specifies either the model deployment name (when using Azure OpenAI) or model name + * (when using non-Azure OpenAI) to use for this request. + * @param audioTranslationOptions The configuration information for an audio translation request. + * @throws IllegalArgumentException thrown if parameters fail the validation. + * @throws HttpResponseException thrown if the request is rejected by server. + * @throws ClientAuthenticationException thrown if the request is rejected by server on status code 401. + * @throws ResourceNotFoundException thrown if the request is rejected by server on status code 404. + * @throws ResourceModifiedException thrown if the request is rejected by server on status code 409. + * @throws RuntimeException all other wrapped checked exceptions if the request fails to be sent. + * @return english language transcribed text and associated metadata from provided spoken audio data on successful + * completion of {@link Mono}. + */ + @Generated + @ServiceMethod(returns = ReturnType.SINGLE) + public Mono getAudioTranslationAsPlainText( + String deploymentOrModelName, AudioTranslationOptions audioTranslationOptions) { + // Generated convenience method for getAudioTranslationAsPlainTextWithResponse + RequestOptions requestOptions = new RequestOptions(); + return getAudioTranslationAsPlainTextWithResponse( + deploymentOrModelName, BinaryData.fromObject(audioTranslationOptions), requestOptions) + .flatMap(FluxUtil::toMono) + .map(protocolMethodData -> protocolMethodData.toObject(String.class)); + } } diff --git a/sdk/openai/azure-ai-openai/src/main/java/com/azure/ai/openai/OpenAIClient.java b/sdk/openai/azure-ai-openai/src/main/java/com/azure/ai/openai/OpenAIClient.java index 7e171fc56a0d..5d17c266cddd 100644 --- a/sdk/openai/azure-ai-openai/src/main/java/com/azure/ai/openai/OpenAIClient.java +++ b/sdk/openai/azure-ai-openai/src/main/java/com/azure/ai/openai/OpenAIClient.java @@ -4,9 +4,15 @@ package com.azure.ai.openai; import com.azure.ai.openai.implementation.CompletionsUtils; +import com.azure.ai.openai.implementation.MultipartDataHelper; +import com.azure.ai.openai.implementation.MultipartDataSerializationResult; import com.azure.ai.openai.implementation.NonAzureOpenAIClientImpl; import com.azure.ai.openai.implementation.OpenAIClientImpl; import com.azure.ai.openai.implementation.OpenAIServerSentEvents; +import com.azure.ai.openai.models.AudioTranscription; +import com.azure.ai.openai.models.AudioTranscriptionFormat; +import com.azure.ai.openai.models.AudioTranscriptionOptions; +import com.azure.ai.openai.models.AudioTranslationOptions; import com.azure.ai.openai.models.ChatCompletions; import com.azure.ai.openai.models.ChatCompletionsOptions; import com.azure.ai.openai.models.Completions; @@ -24,6 +30,7 @@ import com.azure.core.exception.HttpResponseException; import com.azure.core.exception.ResourceModifiedException; import com.azure.core.exception.ResourceNotFoundException; +import com.azure.core.http.HttpHeaderName; import com.azure.core.http.rest.RequestOptions; import com.azure.core.http.rest.Response; import com.azure.core.util.BinaryData; @@ -31,6 +38,8 @@ import com.azure.core.util.logging.ClientLogger; import com.azure.core.util.polling.SyncPoller; import java.nio.ByteBuffer; +import java.util.ArrayList; +import java.util.List; import reactor.core.publisher.Flux; /** Initializes a new instance of the synchronous OpenAIClient type. */ @@ -656,6 +665,18 @@ SyncPoller beginBeginAzureBatchImageGeneration( * violence (Optional): (recursive schema, see violence above) * hate (Optional): (recursive schema, see hate above) * self_harm (Optional): (recursive schema, see self_harm above) + * error (Optional): { + * code: String (Required) + * message: String (Required) + * target: String (Optional) + * details (Optional): [ + * (recursive schema, see above) + * ] + * innererror (Optional): { + * code: String (Optional) + * innererror (Optional): (recursive schema, see innererror above) + * } + * } * } * } * ] @@ -693,4 +714,503 @@ Response getChatCompletionsWithAzureExtensionsWithResponse( return this.serviceClient.getChatCompletionsWithAzureExtensionsWithResponse( deploymentOrModelName, chatCompletionsOptions, requestOptions); } + + /** + * Gets transcribed text and associated metadata from provided spoken audio file data. Audio will be transcribed in + * the written language corresponding to the language it was spoken in. + * + * @param deploymentOrModelName Specifies either the model deployment name (when using Azure OpenAI) or model name + * (when using non-Azure OpenAI) to use for this request. + * @param fileName The file name that is represented in the {@code file} field of {@link AudioTranscriptionOptions}. + * @param audioTranscriptionOptions The configuration information for an audio transcription request. + * @throws IllegalArgumentException thrown if parameters fail the validation. + * @throws HttpResponseException thrown if the request is rejected by server. + * @throws ClientAuthenticationException thrown if the request is rejected by server on status code 401. + * @throws ResourceNotFoundException thrown if the request is rejected by server on status code 404. + * @throws ResourceModifiedException thrown if the request is rejected by server on status code 409. + * @throws RuntimeException all other wrapped checked exceptions if the request fails to be sent. + * @return {@link AudioTranscription} transcribed text and associated metadata from provided spoken audio data. + */ + @ServiceMethod(returns = ReturnType.SINGLE) + public AudioTranscription getAudioTranscription( + String deploymentOrModelName, String fileName, AudioTranscriptionOptions audioTranscriptionOptions) { + // checking allowed formats for a JSON response + List acceptedFormats = new ArrayList<>(); + acceptedFormats.add(AudioTranscriptionFormat.JSON); + acceptedFormats.add(AudioTranscriptionFormat.VERBOSE_JSON); + if (!acceptedFormats.contains(audioTranscriptionOptions.getResponseFormat())) { + throw LOGGER.logExceptionAsError( + new IllegalArgumentException("This operation does not support the requested audio format")); + } + // embedding the `model` in the request for non-Azure case + if (this.openAIServiceClient != null) { + audioTranscriptionOptions.setModel(deploymentOrModelName); + } + MultipartDataHelper helper = new MultipartDataHelper(); + MultipartDataSerializationResult result = helper.serializeRequest(audioTranscriptionOptions, fileName); + String multipartBoundary = helper.getBoundary(); + RequestOptions requestOptions = new RequestOptions(); + requestOptions + .setHeader(HttpHeaderName.CONTENT_TYPE, "multipart/form-data;" + " boundary=" + multipartBoundary) + .setHeader(HttpHeaderName.CONTENT_LENGTH, String.valueOf(result.getDataLength())); + Response response = + openAIServiceClient != null + ? this.openAIServiceClient.getAudioTranscriptionAsPlainTextWithResponse( + deploymentOrModelName, result.getData(), requestOptions) + : this.serviceClient.getAudioTranscriptionAsPlainTextWithResponse( + deploymentOrModelName, result.getData(), requestOptions); + return response.getValue().toObject(AudioTranscription.class); + } + + /** + * Gets transcribed text and associated metadata from provided spoken audio file data. Audio will be transcribed in + * the written language corresponding to the language it was spoken in. + * + * @param deploymentOrModelName Specifies either the model deployment name (when using Azure OpenAI) or model name + * (when using non-Azure OpenAI) to use for this request. + * @param fileName The file name that is represented in the {@code file} field of {@link AudioTranscriptionOptions}. + * @param audioTranscriptionOptions The configuration information for an audio transcription request. + * @throws IllegalArgumentException thrown if parameters fail the validation. + * @throws HttpResponseException thrown if the request is rejected by server. + * @throws ClientAuthenticationException thrown if the request is rejected by server on status code 401. + * @throws ResourceNotFoundException thrown if the request is rejected by server on status code 404. + * @throws ResourceModifiedException thrown if the request is rejected by server on status code 409. + * @throws RuntimeException all other wrapped checked exceptions if the request fails to be sent. + * @return transcribed text and associated metadata from provided spoken audio data. + */ + @ServiceMethod(returns = ReturnType.SINGLE) + public String getAudioTranscriptionText( + String deploymentOrModelName, String fileName, AudioTranscriptionOptions audioTranscriptionOptions) { + // checking allowed formats for a plain text response + List acceptedFormats = new ArrayList<>(); + acceptedFormats.add(AudioTranscriptionFormat.TEXT); + acceptedFormats.add(AudioTranscriptionFormat.VTT); + acceptedFormats.add(AudioTranscriptionFormat.SRT); + if (!acceptedFormats.contains(audioTranscriptionOptions.getResponseFormat())) { + throw LOGGER.logExceptionAsError( + new IllegalArgumentException("This operation does not support the requested audio format")); + } + // embedding the `model` in the request for non-Azure case + if (this.openAIServiceClient != null) { + audioTranscriptionOptions.setModel(deploymentOrModelName); + } + MultipartDataHelper helper = new MultipartDataHelper(); + MultipartDataSerializationResult result = helper.serializeRequest(audioTranscriptionOptions, fileName); + String multipartBoundary = helper.getBoundary(); + RequestOptions requestOptions = new RequestOptions(); + requestOptions + .setHeader(HttpHeaderName.CONTENT_TYPE, "multipart/form-data;" + " boundary=" + multipartBoundary) + .setHeader(HttpHeaderName.CONTENT_LENGTH, String.valueOf(result.getDataLength())); + Response response = + openAIServiceClient != null + ? this.openAIServiceClient.getAudioTranscriptionAsPlainTextWithResponse( + deploymentOrModelName, result.getData(), requestOptions) + : this.serviceClient.getAudioTranscriptionAsPlainTextWithResponse( + deploymentOrModelName, result.getData(), requestOptions); + return response.getValue().toString(); + } + + /** + * Gets English language transcribed text and associated metadata from provided spoken audio file data. + * + * @param deploymentOrModelName Specifies either the model deployment name (when using Azure OpenAI) or model name + * (when using non-Azure OpenAI) to use for this request. + * @param fileName The file name that is represented in the {@code file} field of {@link AudioTranslationOptions}. + * @param audioTranslationOptions The configuration information for an audio translation request. + * @throws IllegalArgumentException thrown if parameters fail the validation. + * @throws HttpResponseException thrown if the request is rejected by server. + * @throws ClientAuthenticationException thrown if the request is rejected by server on status code 401. + * @throws ResourceNotFoundException thrown if the request is rejected by server on status code 404. + * @throws ResourceModifiedException thrown if the request is rejected by server on status code 409. + * @throws RuntimeException all other wrapped checked exceptions if the request fails to be sent. + * @return {@link AudioTranscription} english language transcribed text and associated metadata from provided spoken + * audio file data. + */ + @ServiceMethod(returns = ReturnType.SINGLE) + public AudioTranscription getAudioTranslation( + String deploymentOrModelName, String fileName, AudioTranslationOptions audioTranslationOptions) { + // checking allowed formats for a JSON response + List acceptedFormats = new ArrayList<>(); + acceptedFormats.add(AudioTranscriptionFormat.JSON); + acceptedFormats.add(AudioTranscriptionFormat.VERBOSE_JSON); + if (!acceptedFormats.contains(audioTranslationOptions.getResponseFormat())) { + throw LOGGER.logExceptionAsError( + new IllegalArgumentException("This operation does not support the requested audio format")); + } + // embedding the `model` in the request for non-Azure case + if (this.openAIServiceClient != null) { + audioTranslationOptions.setModel(deploymentOrModelName); + } + MultipartDataHelper helper = new MultipartDataHelper(); + MultipartDataSerializationResult result = helper.serializeRequest(audioTranslationOptions, fileName); + String multipartBoundary = helper.getBoundary(); + RequestOptions requestOptions = new RequestOptions(); + requestOptions + .setHeader(HttpHeaderName.CONTENT_TYPE, "multipart/form-data;" + " boundary=" + multipartBoundary) + .setHeader(HttpHeaderName.CONTENT_LENGTH, String.valueOf(result.getDataLength())); + Response response = + openAIServiceClient != null + ? this.openAIServiceClient.getAudioTranslationAsPlainTextWithResponse( + deploymentOrModelName, result.getData(), requestOptions) + : this.serviceClient.getAudioTranslationAsPlainTextWithResponse( + deploymentOrModelName, result.getData(), requestOptions); + return response.getValue().toObject(AudioTranscription.class); + } + + /** + * Gets English language transcribed text and associated metadata from provided spoken audio file data. + * + * @param deploymentOrModelName Specifies either the model deployment name (when using Azure OpenAI) or model name + * (when using non-Azure OpenAI) to use for this request. + * @param fileName The file name that is represented in the {@code file} field of {@link AudioTranslationOptions}. + * @param audioTranslationOptions The configuration information for an audio translation request. + * @throws IllegalArgumentException thrown if parameters fail the validation. + * @throws HttpResponseException thrown if the request is rejected by server. + * @throws ClientAuthenticationException thrown if the request is rejected by server on status code 401. + * @throws ResourceNotFoundException thrown if the request is rejected by server on status code 404. + * @throws ResourceModifiedException thrown if the request is rejected by server on status code 409. + * @throws RuntimeException all other wrapped checked exceptions if the request fails to be sent. + * @return english language transcribed text and associated metadata from provided spoken audio file data. + */ + @ServiceMethod(returns = ReturnType.SINGLE) + public String getAudioTranslationText( + String deploymentOrModelName, String fileName, AudioTranslationOptions audioTranslationOptions) { + // checking allowed formats for a plain text response + List acceptedFormats = new ArrayList<>(); + acceptedFormats.add(AudioTranscriptionFormat.TEXT); + acceptedFormats.add(AudioTranscriptionFormat.VTT); + acceptedFormats.add(AudioTranscriptionFormat.SRT); + if (!acceptedFormats.contains(audioTranslationOptions.getResponseFormat())) { + throw LOGGER.logExceptionAsError( + new IllegalArgumentException("This operation does not support the requested audio format")); + } + // embedding the `model` in the request for non-Azure case + if (this.openAIServiceClient != null) { + audioTranslationOptions.setModel(deploymentOrModelName); + } + MultipartDataHelper helper = new MultipartDataHelper(); + MultipartDataSerializationResult result = helper.serializeRequest(audioTranslationOptions, fileName); + String multipartBoundary = helper.getBoundary(); + RequestOptions requestOptions = new RequestOptions(); + requestOptions + .setHeader(HttpHeaderName.CONTENT_TYPE, "multipart/form-data;" + " boundary=" + multipartBoundary) + .setHeader(HttpHeaderName.CONTENT_LENGTH, String.valueOf(result.getDataLength())); + Response response = + openAIServiceClient != null + ? this.openAIServiceClient.getAudioTranslationAsPlainTextWithResponse( + deploymentOrModelName, result.getData(), requestOptions) + : this.serviceClient.getAudioTranslationAsPlainTextWithResponse( + deploymentOrModelName, result.getData(), requestOptions); + return response.getValue().toString(); + } + + /** + * Gets transcribed text and associated metadata from provided spoken audio data. Audio will be transcribed in the + * written language corresponding to the language it was spoken in. + * + *

Request Body Schema + * + *

{@code
+     * {
+     *     file: byte[] (Required)
+     *     response_format: String(json/verbose_json/text/srt/vtt) (Optional)
+     *     language: String (Optional)
+     *     prompt: String (Optional)
+     *     temperature: Double (Optional)
+     *     model: String (Optional)
+     * }
+     * }
+ * + *

Response Body Schema + * + *

{@code
+     * {
+     *     text: String (Required)
+     *     task: String(transcribe/translate) (Optional)
+     *     language: String (Optional)
+     *     duration: Double (Optional)
+     *     segments (Optional): [
+     *          (Optional){
+     *             id: int (Required)
+     *             start: double (Required)
+     *             end: double (Required)
+     *             text: String (Required)
+     *             temperature: double (Required)
+     *             avg_logprob: double (Required)
+     *             compression_ratio: double (Required)
+     *             no_speech_prob: double (Required)
+     *             tokens (Required): [
+     *                 int (Required)
+     *             ]
+     *             seek: int (Required)
+     *         }
+     *     ]
+     * }
+     * }
+ * + * @param deploymentOrModelName Specifies either the model deployment name (when using Azure OpenAI) or model name + * (when using non-Azure OpenAI) to use for this request. + * @param audioTranscriptionOptions The configuration information for an audio transcription request. + * @param requestOptions The options to configure the HTTP request before HTTP client sends it. + * @throws HttpResponseException thrown if the request is rejected by server. + * @throws ClientAuthenticationException thrown if the request is rejected by server on status code 401. + * @throws ResourceNotFoundException thrown if the request is rejected by server on status code 404. + * @throws ResourceModifiedException thrown if the request is rejected by server on status code 409. + * @return transcribed text and associated metadata from provided spoken audio data along with {@link Response}. + */ + @Generated + @ServiceMethod(returns = ReturnType.SINGLE) + public Response getAudioTranscriptionAsResponseObjectWithResponse( + String deploymentOrModelName, BinaryData audioTranscriptionOptions, RequestOptions requestOptions) { + return this.serviceClient.getAudioTranscriptionAsResponseObjectWithResponse( + deploymentOrModelName, audioTranscriptionOptions, requestOptions); + } + + /** + * Gets transcribed text and associated metadata from provided spoken audio data. Audio will be transcribed in the + * written language corresponding to the language it was spoken in. + * + *

Request Body Schema + * + *

{@code
+     * {
+     *     file: byte[] (Required)
+     *     response_format: String(json/verbose_json/text/srt/vtt) (Optional)
+     *     language: String (Optional)
+     *     prompt: String (Optional)
+     *     temperature: Double (Optional)
+     *     model: String (Optional)
+     * }
+     * }
+ * + *

Response Body Schema + * + *

{@code
+     * String
+     * }
+ * + * @param deploymentOrModelName Specifies either the model deployment name (when using Azure OpenAI) or model name + * (when using non-Azure OpenAI) to use for this request. + * @param audioTranscriptionOptions The configuration information for an audio transcription request. + * @param requestOptions The options to configure the HTTP request before HTTP client sends it. + * @throws HttpResponseException thrown if the request is rejected by server. + * @throws ClientAuthenticationException thrown if the request is rejected by server on status code 401. + * @throws ResourceNotFoundException thrown if the request is rejected by server on status code 404. + * @throws ResourceModifiedException thrown if the request is rejected by server on status code 409. + * @return transcribed text and associated metadata from provided spoken audio data along with {@link Response}. + */ + @Generated + @ServiceMethod(returns = ReturnType.SINGLE) + public Response getAudioTranscriptionAsPlainTextWithResponse( + String deploymentOrModelName, BinaryData audioTranscriptionOptions, RequestOptions requestOptions) { + return this.serviceClient.getAudioTranscriptionAsPlainTextWithResponse( + deploymentOrModelName, audioTranscriptionOptions, requestOptions); + } + + /** + * Gets English language transcribed text and associated metadata from provided spoken audio data. + * + *

Request Body Schema + * + *

{@code
+     * {
+     *     file: byte[] (Required)
+     *     response_format: String(json/verbose_json/text/srt/vtt) (Optional)
+     *     prompt: String (Optional)
+     *     temperature: Double (Optional)
+     *     model: String (Optional)
+     * }
+     * }
+ * + *

Response Body Schema + * + *

{@code
+     * {
+     *     text: String (Required)
+     *     task: String(transcribe/translate) (Optional)
+     *     language: String (Optional)
+     *     duration: Double (Optional)
+     *     segments (Optional): [
+     *          (Optional){
+     *             id: int (Required)
+     *             start: double (Required)
+     *             end: double (Required)
+     *             text: String (Required)
+     *             temperature: double (Required)
+     *             avg_logprob: double (Required)
+     *             compression_ratio: double (Required)
+     *             no_speech_prob: double (Required)
+     *             tokens (Required): [
+     *                 int (Required)
+     *             ]
+     *             seek: int (Required)
+     *         }
+     *     ]
+     * }
+     * }
+ * + * @param deploymentOrModelName Specifies either the model deployment name (when using Azure OpenAI) or model name + * (when using non-Azure OpenAI) to use for this request. + * @param audioTranslationOptions The configuration information for an audio translation request. + * @param requestOptions The options to configure the HTTP request before HTTP client sends it. + * @throws HttpResponseException thrown if the request is rejected by server. + * @throws ClientAuthenticationException thrown if the request is rejected by server on status code 401. + * @throws ResourceNotFoundException thrown if the request is rejected by server on status code 404. + * @throws ResourceModifiedException thrown if the request is rejected by server on status code 409. + * @return english language transcribed text and associated metadata from provided spoken audio data along with + * {@link Response}. + */ + @Generated + @ServiceMethod(returns = ReturnType.SINGLE) + public Response getAudioTranslationAsResponseObjectWithResponse( + String deploymentOrModelName, BinaryData audioTranslationOptions, RequestOptions requestOptions) { + return this.serviceClient.getAudioTranslationAsResponseObjectWithResponse( + deploymentOrModelName, audioTranslationOptions, requestOptions); + } + + /** + * Gets English language transcribed text and associated metadata from provided spoken audio data. + * + *

Request Body Schema + * + *

{@code
+     * {
+     *     file: byte[] (Required)
+     *     response_format: String(json/verbose_json/text/srt/vtt) (Optional)
+     *     prompt: String (Optional)
+     *     temperature: Double (Optional)
+     *     model: String (Optional)
+     * }
+     * }
+ * + *

Response Body Schema + * + *

{@code
+     * String
+     * }
+ * + * @param deploymentOrModelName Specifies either the model deployment name (when using Azure OpenAI) or model name + * (when using non-Azure OpenAI) to use for this request. + * @param audioTranslationOptions The configuration information for an audio translation request. + * @param requestOptions The options to configure the HTTP request before HTTP client sends it. + * @throws HttpResponseException thrown if the request is rejected by server. + * @throws ClientAuthenticationException thrown if the request is rejected by server on status code 401. + * @throws ResourceNotFoundException thrown if the request is rejected by server on status code 404. + * @throws ResourceModifiedException thrown if the request is rejected by server on status code 409. + * @return english language transcribed text and associated metadata from provided spoken audio data along with + * {@link Response}. + */ + @Generated + @ServiceMethod(returns = ReturnType.SINGLE) + public Response getAudioTranslationAsPlainTextWithResponse( + String deploymentOrModelName, BinaryData audioTranslationOptions, RequestOptions requestOptions) { + return this.serviceClient.getAudioTranslationAsPlainTextWithResponse( + deploymentOrModelName, audioTranslationOptions, requestOptions); + } + + /** + * Gets transcribed text and associated metadata from provided spoken audio data. Audio will be transcribed in the + * written language corresponding to the language it was spoken in. + * + * @param deploymentOrModelName Specifies either the model deployment name (when using Azure OpenAI) or model name + * (when using non-Azure OpenAI) to use for this request. + * @param audioTranscriptionOptions The configuration information for an audio transcription request. + * @throws IllegalArgumentException thrown if parameters fail the validation. + * @throws HttpResponseException thrown if the request is rejected by server. + * @throws ClientAuthenticationException thrown if the request is rejected by server on status code 401. + * @throws ResourceNotFoundException thrown if the request is rejected by server on status code 404. + * @throws ResourceModifiedException thrown if the request is rejected by server on status code 409. + * @throws RuntimeException all other wrapped checked exceptions if the request fails to be sent. + * @return transcribed text and associated metadata from provided spoken audio data. + */ + @Generated + @ServiceMethod(returns = ReturnType.SINGLE) + public AudioTranscription getAudioTranscriptionAsResponseObject( + String deploymentOrModelName, AudioTranscriptionOptions audioTranscriptionOptions) { + // Generated convenience method for getAudioTranscriptionAsResponseObjectWithResponse + RequestOptions requestOptions = new RequestOptions(); + return getAudioTranscriptionAsResponseObjectWithResponse( + deploymentOrModelName, BinaryData.fromObject(audioTranscriptionOptions), requestOptions) + .getValue() + .toObject(AudioTranscription.class); + } + + /** + * Gets transcribed text and associated metadata from provided spoken audio data. Audio will be transcribed in the + * written language corresponding to the language it was spoken in. + * + * @param deploymentOrModelName Specifies either the model deployment name (when using Azure OpenAI) or model name + * (when using non-Azure OpenAI) to use for this request. + * @param audioTranscriptionOptions The configuration information for an audio transcription request. + * @throws IllegalArgumentException thrown if parameters fail the validation. + * @throws HttpResponseException thrown if the request is rejected by server. + * @throws ClientAuthenticationException thrown if the request is rejected by server on status code 401. + * @throws ResourceNotFoundException thrown if the request is rejected by server on status code 404. + * @throws ResourceModifiedException thrown if the request is rejected by server on status code 409. + * @throws RuntimeException all other wrapped checked exceptions if the request fails to be sent. + * @return transcribed text and associated metadata from provided spoken audio data. + */ + @Generated + @ServiceMethod(returns = ReturnType.SINGLE) + public String getAudioTranscriptionAsPlainText( + String deploymentOrModelName, AudioTranscriptionOptions audioTranscriptionOptions) { + // Generated convenience method for getAudioTranscriptionAsPlainTextWithResponse + RequestOptions requestOptions = new RequestOptions(); + return getAudioTranscriptionAsPlainTextWithResponse( + deploymentOrModelName, BinaryData.fromObject(audioTranscriptionOptions), requestOptions) + .getValue() + .toObject(String.class); + } + + /** + * Gets English language transcribed text and associated metadata from provided spoken audio data. + * + * @param deploymentOrModelName Specifies either the model deployment name (when using Azure OpenAI) or model name + * (when using non-Azure OpenAI) to use for this request. + * @param audioTranslationOptions The configuration information for an audio translation request. + * @throws IllegalArgumentException thrown if parameters fail the validation. + * @throws HttpResponseException thrown if the request is rejected by server. + * @throws ClientAuthenticationException thrown if the request is rejected by server on status code 401. + * @throws ResourceNotFoundException thrown if the request is rejected by server on status code 404. + * @throws ResourceModifiedException thrown if the request is rejected by server on status code 409. + * @throws RuntimeException all other wrapped checked exceptions if the request fails to be sent. + * @return english language transcribed text and associated metadata from provided spoken audio data. + */ + @Generated + @ServiceMethod(returns = ReturnType.SINGLE) + public AudioTranscription getAudioTranslationAsResponseObject( + String deploymentOrModelName, AudioTranslationOptions audioTranslationOptions) { + // Generated convenience method for getAudioTranslationAsResponseObjectWithResponse + RequestOptions requestOptions = new RequestOptions(); + return getAudioTranslationAsResponseObjectWithResponse( + deploymentOrModelName, BinaryData.fromObject(audioTranslationOptions), requestOptions) + .getValue() + .toObject(AudioTranscription.class); + } + + /** + * Gets English language transcribed text and associated metadata from provided spoken audio data. + * + * @param deploymentOrModelName Specifies either the model deployment name (when using Azure OpenAI) or model name + * (when using non-Azure OpenAI) to use for this request. + * @param audioTranslationOptions The configuration information for an audio translation request. + * @throws IllegalArgumentException thrown if parameters fail the validation. + * @throws HttpResponseException thrown if the request is rejected by server. + * @throws ClientAuthenticationException thrown if the request is rejected by server on status code 401. + * @throws ResourceNotFoundException thrown if the request is rejected by server on status code 404. + * @throws ResourceModifiedException thrown if the request is rejected by server on status code 409. + * @throws RuntimeException all other wrapped checked exceptions if the request fails to be sent. + * @return english language transcribed text and associated metadata from provided spoken audio data. + */ + @Generated + @ServiceMethod(returns = ReturnType.SINGLE) + public String getAudioTranslationAsPlainText( + String deploymentOrModelName, AudioTranslationOptions audioTranslationOptions) { + // Generated convenience method for getAudioTranslationAsPlainTextWithResponse + RequestOptions requestOptions = new RequestOptions(); + return getAudioTranslationAsPlainTextWithResponse( + deploymentOrModelName, BinaryData.fromObject(audioTranslationOptions), requestOptions) + .getValue() + .toObject(String.class); + } } diff --git a/sdk/openai/azure-ai-openai/src/main/java/com/azure/ai/openai/OpenAIServiceVersion.java b/sdk/openai/azure-ai-openai/src/main/java/com/azure/ai/openai/OpenAIServiceVersion.java index 9844431603fa..3027940ba21f 100644 --- a/sdk/openai/azure-ai-openai/src/main/java/com/azure/ai/openai/OpenAIServiceVersion.java +++ b/sdk/openai/azure-ai-openai/src/main/java/com/azure/ai/openai/OpenAIServiceVersion.java @@ -21,7 +21,10 @@ public enum OpenAIServiceVersion implements ServiceVersion { V2023_07_01_PREVIEW("2023-07-01-preview"), /** Enum value 2023-08-01-preview. */ - V2023_08_01_PREVIEW("2023-08-01-preview"); + V2023_08_01_PREVIEW("2023-08-01-preview"), + + /** Enum value 2023-09-01-preview. */ + V2023_09_01_PREVIEW("2023-09-01-preview"); private final String version; @@ -41,6 +44,6 @@ public String getVersion() { * @return The latest {@link OpenAIServiceVersion}. */ public static OpenAIServiceVersion getLatest() { - return V2023_08_01_PREVIEW; + return V2023_09_01_PREVIEW; } } diff --git a/sdk/openai/azure-ai-openai/src/main/java/com/azure/ai/openai/implementation/MultipartDataHelper.java b/sdk/openai/azure-ai-openai/src/main/java/com/azure/ai/openai/implementation/MultipartDataHelper.java new file mode 100644 index 000000000000..ecad479f6c95 --- /dev/null +++ b/sdk/openai/azure-ai-openai/src/main/java/com/azure/ai/openai/implementation/MultipartDataHelper.java @@ -0,0 +1,214 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +package com.azure.ai.openai.implementation; + +import com.azure.ai.openai.models.AudioTranscriptionOptions; +import com.azure.ai.openai.models.AudioTranslationOptions; +import com.azure.core.util.BinaryData; +import com.azure.core.util.logging.ClientLogger; + +import java.io.ByteArrayOutputStream; +import java.io.IOException; +import java.nio.charset.Charset; +import java.nio.charset.StandardCharsets; +import java.util.ArrayList; +import java.util.List; + +/** + * Helper class for marshaling {@link AudioTranscriptionOptions} and {@link AudioTranslationOptions} objects to be used + * in multipart HTTP requests according to RFC7578. + */ +public class MultipartDataHelper { + private static final ClientLogger LOGGER = new ClientLogger(MultipartDataHelper.class); + + /** + * Value to be used as part of the divider for the multipart requests. + */ + private final String boundary; + + /** + * The actual part separator in the request. This is obtained by prepending "--" to the "boundary". + */ + private final String partSeparator; + + /** + * The marker for the ending of a multipart request. This is obtained by post-pending "--" to the "partSeparator". + */ + private final String endMarker; + + /** + * Charset used for encoding the multipart HTTP request. + */ + private final Charset encoderCharset = StandardCharsets.UTF_8; + + /** + * Line separator for the multipart HTTP request. + */ + private static final String CRLF = "\r\n"; + + /** + * Default constructor used in the code. The boundary is a random value. + */ + public MultipartDataHelper() { + // TODO: We can't use randomly generated UUIDs for now. Generating a test session record won't match the + // newly generated UUID for the test run instance this(UUID.randomUUID().toString().substring(0, 16)); + this("29580623-3d02-4a"); + } + + /** + * Constructor accepting a boundary generator. Used for testing. + * + * @param boundary The value to be used as "boundary". + */ + public MultipartDataHelper(String boundary) { + this.boundary = boundary; + partSeparator = "--" + boundary; + endMarker = partSeparator + "--"; + } + + /** + * Gets the "boundary" value. + * + * @return the "boundary" value. + */ + public String getBoundary() { + return boundary; + } + + /** + * This method marshals the passed request into ready to be sent. + * + * @param requestOptions Object to be marshalled for the multipart HTTP request. + * @param fileName The name of the file that is being sent as a part of this request. + * @param {@link AudioTranscriptionOptions} and {@link AudioTranslationOptions} are the only types supported. + * This represents the type information of the request object. + * @return the marshalled data and its length. + */ + public MultipartDataSerializationResult serializeRequest(T requestOptions, String fileName) { + if (requestOptions instanceof AudioTranslationOptions) { + AudioTranslationOptions audioTranslationOptions = (AudioTranslationOptions) requestOptions; + byte[] file = audioTranslationOptions.getFile(); + List fields = formatAudioTranslationOptions(audioTranslationOptions); + return serializeRequestFields(file, fields, fileName); + } else if (requestOptions instanceof AudioTranscriptionOptions) { + AudioTranscriptionOptions audioTranscriptionOptions = (AudioTranscriptionOptions) requestOptions; + byte[] file = audioTranscriptionOptions.getFile(); + List fields = formatAudioTranscriptionOptions(audioTranscriptionOptions); + return serializeRequestFields(file, fields, fileName); + } else { + throw LOGGER.logThrowableAsError(new IllegalArgumentException( + "Only AudioTranslationOptions and AudioTranscriptionOptions currently supported")); + } + } + + /** + * This helper method marshals the passed request fields. + * + * @param file is the byte[] representation of the file in the request object. + * @param fields a list of the members other than the file in the request object. + * @param fileName the name of the file passed in the "file" field of the request object. + * @return a structure containing the marshalled data and its length. + */ + private MultipartDataSerializationResult serializeRequestFields(byte[] file, List fields, String fileName) { + ByteArrayOutputStream byteArrayOutputStream = new ByteArrayOutputStream(); + + // Multipart preamble + String fileFieldPreamble = partSeparator + + CRLF + "Content-Disposition: form-data; name=\"file\"; filename=\"" + + fileName + "\"" + + CRLF + "Content-Type: application/octet-stream" + CRLF + CRLF; + try { + // Writing the file into the request as a byte stream + byteArrayOutputStream.write(fileFieldPreamble.getBytes(encoderCharset)); + byteArrayOutputStream.write(file); + + // Adding other fields to the request + for (MultipartField field : fields) { + byteArrayOutputStream.write(serializeField(field)); + } + byteArrayOutputStream.write((CRLF + endMarker).getBytes(encoderCharset)); + } catch (IOException e) { + throw new RuntimeException(e); + } + + byte[] totalData = byteArrayOutputStream.toByteArray(); + return new MultipartDataSerializationResult(BinaryData.fromBytes(totalData), totalData.length); + } + + /** + * Adds member fields apart from the file to the multipart HTTP request. + * + * @param audioTranslationOptions The configuration information for an audio translation request. + * @return a list of the fields in the request (except for "file"). + */ + private List formatAudioTranslationOptions(AudioTranslationOptions audioTranslationOptions) { + List fields = new ArrayList<>(); + if (audioTranslationOptions.getResponseFormat() != null) { + fields.add(new MultipartField( + "response_format", + audioTranslationOptions.getResponseFormat().toString())); + } + if (audioTranslationOptions.getModel() != null) { + fields.add(new MultipartField("model", + audioTranslationOptions.getModel() + )); + } + if (audioTranslationOptions.getPrompt() != null) { + fields.add(new MultipartField("prompt", + audioTranslationOptions.getPrompt())); + } + if (audioTranslationOptions.getTemperature() != null) { + fields.add(new MultipartField("temperature", + String.valueOf(audioTranslationOptions.getTemperature()))); + } + return fields; + } + + /** + * Adds member fields apart from the file to the multipart HTTP request. + * + * @param audioTranscriptionOptions The configuration information for an audio transcription request. + * @return a list of the fields in the request (except for "file"). + */ + private List formatAudioTranscriptionOptions(AudioTranscriptionOptions audioTranscriptionOptions) { + List fields = new ArrayList<>(); + if (audioTranscriptionOptions.getResponseFormat() != null) { + fields.add(new MultipartField("response_format", + audioTranscriptionOptions.getResponseFormat().toString())); + } + if (audioTranscriptionOptions.getModel() != null) { + fields.add(new MultipartField("model", + audioTranscriptionOptions.getModel() + )); + } + if (audioTranscriptionOptions.getPrompt() != null) { + fields.add(new MultipartField("prompt", + audioTranscriptionOptions.getPrompt())); + } + if (audioTranscriptionOptions.getTemperature() != null) { + fields.add(new MultipartField("temperature", + String.valueOf(audioTranscriptionOptions.getTemperature()))); + } + if (audioTranscriptionOptions.getLanguage() != null) { + fields.add(new MultipartField("language", + audioTranscriptionOptions.getLanguage())); + } + return fields; + } + + /** + * This method formats a field for a multipart HTTP request and returns its byte[] representation. + * + * @param field the field of the request to be marshalled. + * @return byte[] representation of a field for a multipart HTTP request. + */ + private byte[] serializeField(MultipartField field) { + String serialized = CRLF + partSeparator + + CRLF + "Content-Disposition: form-data; name=\"" + + field.getWireName() + "\"" + CRLF + CRLF + + field.getValue(); + + return serialized.getBytes(encoderCharset); + } +} diff --git a/sdk/openai/azure-ai-openai/src/main/java/com/azure/ai/openai/implementation/MultipartDataSerializationResult.java b/sdk/openai/azure-ai-openai/src/main/java/com/azure/ai/openai/implementation/MultipartDataSerializationResult.java new file mode 100644 index 000000000000..1150b879b6b6 --- /dev/null +++ b/sdk/openai/azure-ai-openai/src/main/java/com/azure/ai/openai/implementation/MultipartDataSerializationResult.java @@ -0,0 +1,50 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +package com.azure.ai.openai.implementation; + +import com.azure.core.util.BinaryData; + +/** + * This class is used as a stand-in representation of marshalled data to be used in an HTTP multipart request. + */ +public class MultipartDataSerializationResult { + + /** + * Represents the length of the content of this request. The value is to be used for the "Content-Length" header + * of the HTTP request + */ + private final long dataLength; + + /** + * The multipart form data of the request. + */ + private final BinaryData data; + + /** + * Constructor bundling both data and its length + * @param data the multipart form data of the request + * @param contentLength the length of the multipart form data of the request + */ + public MultipartDataSerializationResult(BinaryData data, long contentLength) { + this.dataLength = contentLength; + this.data = data; + } + + /** + * + * @return the result of marshaling a multipart HTTP request + */ + public BinaryData getData() { + return data; + } + + /** + * + * @return the length of a multipart HTTP request data + */ + public long getDataLength() { + return dataLength; + } + +} diff --git a/sdk/openai/azure-ai-openai/src/main/java/com/azure/ai/openai/implementation/MultipartField.java b/sdk/openai/azure-ai-openai/src/main/java/com/azure/ai/openai/implementation/MultipartField.java new file mode 100644 index 000000000000..1ad618b7ceb6 --- /dev/null +++ b/sdk/openai/azure-ai-openai/src/main/java/com/azure/ai/openai/implementation/MultipartField.java @@ -0,0 +1,46 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +package com.azure.ai.openai.implementation; + +/** + * A field of a request for a multipart HTTP request. + */ +public class MultipartField { + + /** + * The JSON key name of this field. + */ + private final String wireName; + + /** + * The JSON value of this field. + */ + private final String value; + + /** + * + * @param wireName The JSON key name of this field. + * @param value The JSON value of this field. + */ + public MultipartField(String wireName, String value) { + this.wireName = wireName; + this.value = value; + } + + /** + * + * @return The JSON key name of this field. + */ + public String getWireName() { + return wireName; + } + + /** + * + * @return The JSON value of this field. + */ + public String getValue() { + return value; + } +} diff --git a/sdk/openai/azure-ai-openai/src/main/java/com/azure/ai/openai/implementation/NonAzureOpenAIClientImpl.java b/sdk/openai/azure-ai-openai/src/main/java/com/azure/ai/openai/implementation/NonAzureOpenAIClientImpl.java index 5ecd55ec21b3..8fd0413c128e 100644 --- a/sdk/openai/azure-ai-openai/src/main/java/com/azure/ai/openai/implementation/NonAzureOpenAIClientImpl.java +++ b/sdk/openai/azure-ai-openai/src/main/java/com/azure/ai/openai/implementation/NonAzureOpenAIClientImpl.java @@ -243,6 +243,158 @@ Response generateImageSync( @BodyParam("application/json") BinaryData imageGenerationOptions, RequestOptions requestOptions, Context context); + + @Post("/audio/transcriptions") + @ExpectedResponses({200}) + @UnexpectedResponseExceptionType( + value = ClientAuthenticationException.class, + code = {401}) + @UnexpectedResponseExceptionType( + value = ResourceNotFoundException.class, + code = {404}) + @UnexpectedResponseExceptionType( + value = ResourceModifiedException.class, + code = {409}) + @UnexpectedResponseExceptionType(HttpResponseException.class) + Mono> getAudioTranscriptionAsResponseObject( + @HostParam("endpoint") String endpoint, + @HeaderParam("accept") String accept, + @BodyParam("multipart/form-data") BinaryData audioTranscriptionOptions, + RequestOptions requestOptions, + Context context); + + @Post("/audio/transcriptions") + @ExpectedResponses({200}) + @UnexpectedResponseExceptionType( + value = ClientAuthenticationException.class, + code = {401}) + @UnexpectedResponseExceptionType( + value = ResourceNotFoundException.class, + code = {404}) + @UnexpectedResponseExceptionType( + value = ResourceModifiedException.class, + code = {409}) + @UnexpectedResponseExceptionType(HttpResponseException.class) + Response getAudioTranscriptionAsResponseObjectSync( + @HostParam("endpoint") String endpoint, + @HeaderParam("accept") String accept, + @BodyParam("multipart/form-data") BinaryData audioTranscriptionOptions, + RequestOptions requestOptions, + Context context); + + @Post("/audio/transcriptions") + @ExpectedResponses({200}) + @UnexpectedResponseExceptionType( + value = ClientAuthenticationException.class, + code = {401}) + @UnexpectedResponseExceptionType( + value = ResourceNotFoundException.class, + code = {404}) + @UnexpectedResponseExceptionType( + value = ResourceModifiedException.class, + code = {409}) + @UnexpectedResponseExceptionType(HttpResponseException.class) + Mono> getAudioTranscriptionAsPlainText( + @HostParam("endpoint") String endpoint, + @HeaderParam("accept") String accept, + @BodyParam("multipart/form-data") BinaryData audioTranscriptionOptions, + RequestOptions requestOptions, + Context context); + + @Post("/audio/transcriptions") + @ExpectedResponses({200}) + @UnexpectedResponseExceptionType( + value = ClientAuthenticationException.class, + code = {401}) + @UnexpectedResponseExceptionType( + value = ResourceNotFoundException.class, + code = {404}) + @UnexpectedResponseExceptionType( + value = ResourceModifiedException.class, + code = {409}) + @UnexpectedResponseExceptionType(HttpResponseException.class) + Response getAudioTranscriptionAsPlainTextSync( + @HostParam("endpoint") String endpoint, + @HeaderParam("accept") String accept, + @BodyParam("multipart/form-data") BinaryData audioTranscriptionOptions, + RequestOptions requestOptions, + Context context); + + @Post("/audio/translations") + @ExpectedResponses({200}) + @UnexpectedResponseExceptionType( + value = ClientAuthenticationException.class, + code = {401}) + @UnexpectedResponseExceptionType( + value = ResourceNotFoundException.class, + code = {404}) + @UnexpectedResponseExceptionType( + value = ResourceModifiedException.class, + code = {409}) + @UnexpectedResponseExceptionType(HttpResponseException.class) + Mono> getAudioTranslationAsResponseObject( + @HostParam("endpoint") String endpoint, + @HeaderParam("accept") String accept, + @BodyParam("multipart/form-data") BinaryData audioTranslationOptions, + RequestOptions requestOptions, + Context context); + + @Post("/audio/translations") + @ExpectedResponses({200}) + @UnexpectedResponseExceptionType( + value = ClientAuthenticationException.class, + code = {401}) + @UnexpectedResponseExceptionType( + value = ResourceNotFoundException.class, + code = {404}) + @UnexpectedResponseExceptionType( + value = ResourceModifiedException.class, + code = {409}) + @UnexpectedResponseExceptionType(HttpResponseException.class) + Response getAudioTranslationAsResponseObjectSync( + @HostParam("endpoint") String endpoint, + @HeaderParam("accept") String accept, + @BodyParam("multipart/form-data") BinaryData audioTranslationOptions, + RequestOptions requestOptions, + Context context); + + @Post("/audio/translations") + @ExpectedResponses({200}) + @UnexpectedResponseExceptionType( + value = ClientAuthenticationException.class, + code = {401}) + @UnexpectedResponseExceptionType( + value = ResourceNotFoundException.class, + code = {404}) + @UnexpectedResponseExceptionType( + value = ResourceModifiedException.class, + code = {409}) + @UnexpectedResponseExceptionType(HttpResponseException.class) + Mono> getAudioTranslationAsPlainText( + @HostParam("endpoint") String endpoint, + @HeaderParam("accept") String accept, + @BodyParam("multipart/form-data") BinaryData audioTranslationOptions, + RequestOptions requestOptions, + Context context); + + @Post("/audio/translations") + @ExpectedResponses({200}) + @UnexpectedResponseExceptionType( + value = ClientAuthenticationException.class, + code = {401}) + @UnexpectedResponseExceptionType( + value = ResourceNotFoundException.class, + code = {404}) + @UnexpectedResponseExceptionType( + value = ResourceModifiedException.class, + code = {409}) + @UnexpectedResponseExceptionType(HttpResponseException.class) + Response getAudioTranslationAsPlainTextSync( + @HostParam("endpoint") String endpoint, + @HeaderParam("accept") String accept, + @BodyParam("multipart/form-data") BinaryData audioTranslationOptions, + RequestOptions requestOptions, + Context context); } /** @@ -891,7 +1043,7 @@ public Response generateImageWithResponse( * * @param inputJson JSON submitted by the client * @param modelId The LLM model ID to be injected in the JSON - * @return + * @return an updated version of the JSON with the key "model" and its corresponding value "modelId" added */ private static BinaryData addModelIdJson(BinaryData inputJson, String modelId) throws JsonProcessingException { JsonNode jsonNode = JSON_MAPPER.readTree(inputJson.toString()); @@ -905,4 +1057,446 @@ private static BinaryData addModelIdJson(BinaryData inputJson, String modelId) t return inputJson; } + + /** + * Gets transcribed text and associated metadata from provided spoken audio data. Audio will be transcribed in the + * written language corresponding to the language it was spoken in. + * + *

Request Body Schema + * + *

{@code
+     * {
+     *     file: byte[] (Required)
+     *     response_format: String(json/verbose_json/text/srt/vtt) (Optional)
+     *     language: String (Optional)
+     *     prompt: String (Optional)
+     *     temperature: Double (Optional)
+     *     model: String (Optional)
+     * }
+     * }
+ * + *

Response Body Schema + * + *

{@code
+     * {
+     *     text: String (Required)
+     *     task: String(transcribe/translate) (Optional)
+     *     language: String (Optional)
+     *     duration: Double (Optional)
+     *     segments (Optional): [
+     *          (Optional){
+     *             id: int (Required)
+     *             start: double (Required)
+     *             end: double (Required)
+     *             text: String (Required)
+     *             temperature: double (Required)
+     *             avg_logprob: double (Required)
+     *             compression_ratio: double (Required)
+     *             no_speech_prob: double (Required)
+     *             tokens (Required): [
+     *                 int (Required)
+     *             ]
+     *             seek: int (Required)
+     *         }
+     *     ]
+     * }
+     * }
+ * + * @param modelId Specifies the model name to use for this request. + * @param audioTranscriptionOptions The configuration information for an audio transcription request. + * @param requestOptions The options to configure the HTTP request before HTTP client sends it. + * @throws HttpResponseException thrown if the request is rejected by server. + * @throws ClientAuthenticationException thrown if the request is rejected by server on status code 401. + * @throws ResourceNotFoundException thrown if the request is rejected by server on status code 404. + * @throws ResourceModifiedException thrown if the request is rejected by server on status code 409. + * @return transcribed text and associated metadata from provided spoken audio data along with {@link Response} on + * successful completion of {@link Mono}. + */ + @ServiceMethod(returns = ReturnType.SINGLE) + public Mono> getAudioTranscriptionAsResponseObjectWithResponseAsync( + String modelId, BinaryData audioTranscriptionOptions, RequestOptions requestOptions) { + final String accept = "application/json"; + return FluxUtil.withContext( + context -> + service.getAudioTranscriptionAsResponseObject( + OPEN_AI_ENDPOINT, + accept, + audioTranscriptionOptions, + requestOptions, + context)); + } + + /** + * Gets transcribed text and associated metadata from provided spoken audio data. Audio will be transcribed in the + * written language corresponding to the language it was spoken in. + * + *

Request Body Schema + * + *

{@code
+     * {
+     *     file: byte[] (Required)
+     *     response_format: String(json/verbose_json/text/srt/vtt) (Optional)
+     *     language: String (Optional)
+     *     prompt: String (Optional)
+     *     temperature: Double (Optional)
+     *     model: String (Optional)
+     * }
+     * }
+ * + *

Response Body Schema + * + *

{@code
+     * {
+     *     text: String (Required)
+     *     task: String(transcribe/translate) (Optional)
+     *     language: String (Optional)
+     *     duration: Double (Optional)
+     *     segments (Optional): [
+     *          (Optional){
+     *             id: int (Required)
+     *             start: double (Required)
+     *             end: double (Required)
+     *             text: String (Required)
+     *             temperature: double (Required)
+     *             avg_logprob: double (Required)
+     *             compression_ratio: double (Required)
+     *             no_speech_prob: double (Required)
+     *             tokens (Required): [
+     *                 int (Required)
+     *             ]
+     *             seek: int (Required)
+     *         }
+     *     ]
+     * }
+     * }
+ * + * @param modelId Specifies the model name to use for this request. + * @param audioTranscriptionOptions The configuration information for an audio transcription request. + * @param requestOptions The options to configure the HTTP request before HTTP client sends it. + * @throws HttpResponseException thrown if the request is rejected by server. + * @throws ClientAuthenticationException thrown if the request is rejected by server on status code 401. + * @throws ResourceNotFoundException thrown if the request is rejected by server on status code 404. + * @throws ResourceModifiedException thrown if the request is rejected by server on status code 409. + * @return transcribed text and associated metadata from provided spoken audio data along with {@link Response}. + */ + @ServiceMethod(returns = ReturnType.SINGLE) + public Response getAudioTranscriptionAsResponseObjectWithResponse( + String modelId, BinaryData audioTranscriptionOptions, RequestOptions requestOptions) { + final String accept = "application/json"; + return service.getAudioTranscriptionAsResponseObjectSync( + OPEN_AI_ENDPOINT, + accept, + audioTranscriptionOptions, + requestOptions, + Context.NONE); + } + + /** + * Gets transcribed text and associated metadata from provided spoken audio data. Audio will be transcribed in the + * written language corresponding to the language it was spoken in. + * + *

Request Body Schema + * + *

{@code
+     * {
+     *     file: byte[] (Required)
+     *     response_format: String(json/verbose_json/text/srt/vtt) (Optional)
+     *     language: String (Optional)
+     *     prompt: String (Optional)
+     *     temperature: Double (Optional)
+     *     model: String (Optional)
+     * }
+     * }
+ * + *

Response Body Schema + * + *

{@code
+     * String
+     * }
+ * + * @param modelId Specifies the model name to use for this request. + * @param audioTranscriptionOptions The configuration information for an audio transcription request. + * @param requestOptions The options to configure the HTTP request before HTTP client sends it. + * @throws HttpResponseException thrown if the request is rejected by server. + * @throws ClientAuthenticationException thrown if the request is rejected by server on status code 401. + * @throws ResourceNotFoundException thrown if the request is rejected by server on status code 404. + * @throws ResourceModifiedException thrown if the request is rejected by server on status code 409. + * @return transcribed text and associated metadata from provided spoken audio data along with {@link Response} on + * successful completion of {@link Mono}. + */ + @ServiceMethod(returns = ReturnType.SINGLE) + public Mono> getAudioTranscriptionAsPlainTextWithResponseAsync( + String modelId, BinaryData audioTranscriptionOptions, RequestOptions requestOptions) { + final String accept = "application/json"; + return FluxUtil.withContext( + context -> + service.getAudioTranscriptionAsPlainText( + OPEN_AI_ENDPOINT, + accept, + audioTranscriptionOptions, + requestOptions, + context)); + } + + /** + * Gets transcribed text and associated metadata from provided spoken audio data. Audio will be transcribed in the + * written language corresponding to the language it was spoken in. + * + *

Request Body Schema + * + *

{@code
+     * {
+     *     file: byte[] (Required)
+     *     response_format: String(json/verbose_json/text/srt/vtt) (Optional)
+     *     language: String (Optional)
+     *     prompt: String (Optional)
+     *     temperature: Double (Optional)
+     *     model: String (Optional)
+     * }
+     * }
+ * + *

Response Body Schema + * + *

{@code
+     * String
+     * }
+ * + * @param modelId Specifies the model name to use for this request. + * @param audioTranscriptionOptions The configuration information for an audio transcription request. + * @param requestOptions The options to configure the HTTP request before HTTP client sends it. + * @throws HttpResponseException thrown if the request is rejected by server. + * @throws ClientAuthenticationException thrown if the request is rejected by server on status code 401. + * @throws ResourceNotFoundException thrown if the request is rejected by server on status code 404. + * @throws ResourceModifiedException thrown if the request is rejected by server on status code 409. + * @return transcribed text and associated metadata from provided spoken audio data along with {@link Response}. + */ + @ServiceMethod(returns = ReturnType.SINGLE) + public Response getAudioTranscriptionAsPlainTextWithResponse( + String modelId, BinaryData audioTranscriptionOptions, RequestOptions requestOptions) { + final String accept = "application/json"; + return service.getAudioTranscriptionAsPlainTextSync( + OPEN_AI_ENDPOINT, + accept, + audioTranscriptionOptions, + requestOptions, + Context.NONE); + } + + /** + * Gets English language transcribed text and associated metadata from provided spoken audio data. + * + *

Request Body Schema + * + *

{@code
+     * {
+     *     file: byte[] (Required)
+     *     response_format: String(json/verbose_json/text/srt/vtt) (Optional)
+     *     prompt: String (Optional)
+     *     temperature: Double (Optional)
+     *     model: String (Optional)
+     * }
+     * }
+ * + *

Response Body Schema + * + *

{@code
+     * {
+     *     text: String (Required)
+     *     task: String(transcribe/translate) (Optional)
+     *     language: String (Optional)
+     *     duration: Double (Optional)
+     *     segments (Optional): [
+     *          (Optional){
+     *             id: int (Required)
+     *             start: double (Required)
+     *             end: double (Required)
+     *             text: String (Required)
+     *             temperature: double (Required)
+     *             avg_logprob: double (Required)
+     *             compression_ratio: double (Required)
+     *             no_speech_prob: double (Required)
+     *             tokens (Required): [
+     *                 int (Required)
+     *             ]
+     *             seek: int (Required)
+     *         }
+     *     ]
+     * }
+     * }
+ * + * @param deploymentOrModelName Specifies the model name to use for this request. + * @param audioTranslationOptions The configuration information for an audio translation request. + * @param requestOptions The options to configure the HTTP request before HTTP client sends it. + * @throws HttpResponseException thrown if the request is rejected by server. + * @throws ClientAuthenticationException thrown if the request is rejected by server on status code 401. + * @throws ResourceNotFoundException thrown if the request is rejected by server on status code 404. + * @throws ResourceModifiedException thrown if the request is rejected by server on status code 409. + * @return english language transcribed text and associated metadata from provided spoken audio data along with + * {@link Response} on successful completion of {@link Mono}. + */ + @ServiceMethod(returns = ReturnType.SINGLE) + public Mono> getAudioTranslationAsResponseObjectWithResponseAsync( + String deploymentOrModelName, BinaryData audioTranslationOptions, RequestOptions requestOptions) { + final String accept = "application/json"; + return FluxUtil.withContext( + context -> + service.getAudioTranslationAsResponseObject( + OPEN_AI_ENDPOINT, + accept, + audioTranslationOptions, + requestOptions, + context)); + } + + /** + * Gets English language transcribed text and associated metadata from provided spoken audio data. + * + *

Request Body Schema + * + *

{@code
+     * {
+     *     file: byte[] (Required)
+     *     response_format: String(json/verbose_json/text/srt/vtt) (Optional)
+     *     prompt: String (Optional)
+     *     temperature: Double (Optional)
+     *     model: String (Optional)
+     * }
+     * }
+ * + *

Response Body Schema + * + *

{@code
+     * {
+     *     text: String (Required)
+     *     task: String(transcribe/translate) (Optional)
+     *     language: String (Optional)
+     *     duration: Double (Optional)
+     *     segments (Optional): [
+     *          (Optional){
+     *             id: int (Required)
+     *             start: double (Required)
+     *             end: double (Required)
+     *             text: String (Required)
+     *             temperature: double (Required)
+     *             avg_logprob: double (Required)
+     *             compression_ratio: double (Required)
+     *             no_speech_prob: double (Required)
+     *             tokens (Required): [
+     *                 int (Required)
+     *             ]
+     *             seek: int (Required)
+     *         }
+     *     ]
+     * }
+     * }
+ * + * @param modelId Specifies the model name to use for this request. + * @param audioTranslationOptions The configuration information for an audio translation request. + * @param requestOptions The options to configure the HTTP request before HTTP client sends it. + * @throws HttpResponseException thrown if the request is rejected by server. + * @throws ClientAuthenticationException thrown if the request is rejected by server on status code 401. + * @throws ResourceNotFoundException thrown if the request is rejected by server on status code 404. + * @throws ResourceModifiedException thrown if the request is rejected by server on status code 409. + * @return english language transcribed text and associated metadata from provided spoken audio data along with + * {@link Response}. + */ + @ServiceMethod(returns = ReturnType.SINGLE) + public Response getAudioTranslationAsResponseObjectWithResponse( + String modelId, BinaryData audioTranslationOptions, RequestOptions requestOptions) { + final String accept = "application/json"; + return service.getAudioTranslationAsResponseObjectSync( + OPEN_AI_ENDPOINT, + accept, + audioTranslationOptions, + requestOptions, + Context.NONE); + } + + /** + * Gets English language transcribed text and associated metadata from provided spoken audio data. + * + *

Request Body Schema + * + *

{@code
+     * {
+     *     file: byte[] (Required)
+     *     response_format: String(json/verbose_json/text/srt/vtt) (Optional)
+     *     prompt: String (Optional)
+     *     temperature: Double (Optional)
+     *     model: String (Optional)
+     * }
+     * }
+ * + *

Response Body Schema + * + *

{@code
+     * String
+     * }
+ * + * @param modelId Specifies the model name to use for this request. + * @param audioTranslationOptions The configuration information for an audio translation request. + * @param requestOptions The options to configure the HTTP request before HTTP client sends it. + * @throws HttpResponseException thrown if the request is rejected by server. + * @throws ClientAuthenticationException thrown if the request is rejected by server on status code 401. + * @throws ResourceNotFoundException thrown if the request is rejected by server on status code 404. + * @throws ResourceModifiedException thrown if the request is rejected by server on status code 409. + * @return english language transcribed text and associated metadata from provided spoken audio data along with + * {@link Response} on successful completion of {@link Mono}. + */ + @ServiceMethod(returns = ReturnType.SINGLE) + public Mono> getAudioTranslationAsPlainTextWithResponseAsync( + String modelId, BinaryData audioTranslationOptions, RequestOptions requestOptions) { + final String accept = "application/json"; + return FluxUtil.withContext( + context -> + service.getAudioTranslationAsPlainText( + OPEN_AI_ENDPOINT, + accept, + audioTranslationOptions, + requestOptions, + context)); + } + + /** + * Gets English language transcribed text and associated metadata from provided spoken audio data. + * + *

Request Body Schema + * + *

{@code
+     * {
+     *     file: byte[] (Required)
+     *     response_format: String(json/verbose_json/text/srt/vtt) (Optional)
+     *     prompt: String (Optional)
+     *     temperature: Double (Optional)
+     *     model: String (Optional)
+     * }
+     * }
+ * + *

Response Body Schema + * + *

{@code
+     * String
+     * }
+ * + * @param modelId Specifies the model name to use for this request. + * @param audioTranslationOptions The configuration information for an audio translation request. + * @param requestOptions The options to configure the HTTP request before HTTP client sends it. + * @throws HttpResponseException thrown if the request is rejected by server. + * @throws ClientAuthenticationException thrown if the request is rejected by server on status code 401. + * @throws ResourceNotFoundException thrown if the request is rejected by server on status code 404. + * @throws ResourceModifiedException thrown if the request is rejected by server on status code 409. + * @return english language transcribed text and associated metadata from provided spoken audio data along with + * {@link Response}. + */ + @ServiceMethod(returns = ReturnType.SINGLE) + public Response getAudioTranslationAsPlainTextWithResponse( + String modelId, BinaryData audioTranslationOptions, RequestOptions requestOptions) { + final String accept = "application/json"; + return service.getAudioTranslationAsPlainTextSync( + OPEN_AI_ENDPOINT, + accept, + audioTranslationOptions, + requestOptions, + Context.NONE); + } } diff --git a/sdk/openai/azure-ai-openai/src/main/java/com/azure/ai/openai/implementation/OpenAIClientImpl.java b/sdk/openai/azure-ai-openai/src/main/java/com/azure/ai/openai/implementation/OpenAIClientImpl.java index d74f569bcc04..73beb5f1faa2 100644 --- a/sdk/openai/azure-ai-openai/src/main/java/com/azure/ai/openai/implementation/OpenAIClientImpl.java +++ b/sdk/openai/azure-ai-openai/src/main/java/com/azure/ai/openai/implementation/OpenAIClientImpl.java @@ -360,6 +360,182 @@ Response beginAzureBatchImageGenerationSync( @BodyParam("application/json") BinaryData imageGenerationOptions, RequestOptions requestOptions, Context context); + + @Post("/deployments/{deploymentId}/audio/transcriptions") + @ExpectedResponses({200}) + @UnexpectedResponseExceptionType( + value = ClientAuthenticationException.class, + code = {401}) + @UnexpectedResponseExceptionType( + value = ResourceNotFoundException.class, + code = {404}) + @UnexpectedResponseExceptionType( + value = ResourceModifiedException.class, + code = {409}) + @UnexpectedResponseExceptionType(HttpResponseException.class) + Mono> getAudioTranscriptionAsPlainText( + @HostParam("endpoint") String endpoint, + @QueryParam("api-version") String apiVersion, + @PathParam("deploymentId") String deploymentOrModelName, + @HeaderParam("accept") String accept, + @BodyParam("application/json") BinaryData audioTranscriptionOptions, + RequestOptions requestOptions, + Context context); + + @Post("/deployments/{deploymentId}/audio/transcriptions") + @ExpectedResponses({200}) + @UnexpectedResponseExceptionType( + value = ClientAuthenticationException.class, + code = {401}) + @UnexpectedResponseExceptionType( + value = ResourceNotFoundException.class, + code = {404}) + @UnexpectedResponseExceptionType( + value = ResourceModifiedException.class, + code = {409}) + @UnexpectedResponseExceptionType(HttpResponseException.class) + Response getAudioTranscriptionAsPlainTextSync( + @HostParam("endpoint") String endpoint, + @QueryParam("api-version") String apiVersion, + @PathParam("deploymentId") String deploymentOrModelName, + @HeaderParam("accept") String accept, + @BodyParam("application/json") BinaryData audioTranscriptionOptions, + RequestOptions requestOptions, + Context context); + + // @Multipart not supported by RestProxy + @Post("/deployments/{deploymentId}/audio/transcriptions") + @ExpectedResponses({200}) + @UnexpectedResponseExceptionType( + value = ClientAuthenticationException.class, + code = {401}) + @UnexpectedResponseExceptionType( + value = ResourceNotFoundException.class, + code = {404}) + @UnexpectedResponseExceptionType( + value = ResourceModifiedException.class, + code = {409}) + @UnexpectedResponseExceptionType(HttpResponseException.class) + Mono> getAudioTranscriptionAsResponseObject( + @HostParam("endpoint") String endpoint, + @QueryParam("api-version") String apiVersion, + @PathParam("deploymentId") String deploymentOrModelName, + @HeaderParam("content-type") String contentType, + @HeaderParam("accept") String accept, + @BodyParam("multipart/form-data") BinaryData audioTranscriptionOptions, + RequestOptions requestOptions, + Context context); + + // @Multipart not supported by RestProxy + @Post("/deployments/{deploymentId}/audio/transcriptions") + @ExpectedResponses({200}) + @UnexpectedResponseExceptionType( + value = ClientAuthenticationException.class, + code = {401}) + @UnexpectedResponseExceptionType( + value = ResourceNotFoundException.class, + code = {404}) + @UnexpectedResponseExceptionType( + value = ResourceModifiedException.class, + code = {409}) + @UnexpectedResponseExceptionType(HttpResponseException.class) + Response getAudioTranscriptionAsResponseObjectSync( + @HostParam("endpoint") String endpoint, + @QueryParam("api-version") String apiVersion, + @PathParam("deploymentId") String deploymentOrModelName, + @HeaderParam("content-type") String contentType, + @HeaderParam("accept") String accept, + @BodyParam("multipart/form-data") BinaryData audioTranscriptionOptions, + RequestOptions requestOptions, + Context context); + + @Post("/deployments/{deploymentId}/audio/translations") + @ExpectedResponses({200}) + @UnexpectedResponseExceptionType( + value = ClientAuthenticationException.class, + code = {401}) + @UnexpectedResponseExceptionType( + value = ResourceNotFoundException.class, + code = {404}) + @UnexpectedResponseExceptionType( + value = ResourceModifiedException.class, + code = {409}) + @UnexpectedResponseExceptionType(HttpResponseException.class) + Mono> getAudioTranslationAsPlainText( + @HostParam("endpoint") String endpoint, + @QueryParam("api-version") String apiVersion, + @PathParam("deploymentId") String deploymentOrModelName, + @HeaderParam("accept") String accept, + @BodyParam("application/json") BinaryData audioTranslationOptions, + RequestOptions requestOptions, + Context context); + + @Post("/deployments/{deploymentId}/audio/translations") + @ExpectedResponses({200}) + @UnexpectedResponseExceptionType( + value = ClientAuthenticationException.class, + code = {401}) + @UnexpectedResponseExceptionType( + value = ResourceNotFoundException.class, + code = {404}) + @UnexpectedResponseExceptionType( + value = ResourceModifiedException.class, + code = {409}) + @UnexpectedResponseExceptionType(HttpResponseException.class) + Response getAudioTranslationAsPlainTextSync( + @HostParam("endpoint") String endpoint, + @QueryParam("api-version") String apiVersion, + @PathParam("deploymentId") String deploymentOrModelName, + @HeaderParam("accept") String accept, + @BodyParam("application/json") BinaryData audioTranslationOptions, + RequestOptions requestOptions, + Context context); + + // @Multipart not supported by RestProxy + @Post("/deployments/{deploymentId}/audio/translations") + @ExpectedResponses({200}) + @UnexpectedResponseExceptionType( + value = ClientAuthenticationException.class, + code = {401}) + @UnexpectedResponseExceptionType( + value = ResourceNotFoundException.class, + code = {404}) + @UnexpectedResponseExceptionType( + value = ResourceModifiedException.class, + code = {409}) + @UnexpectedResponseExceptionType(HttpResponseException.class) + Mono> getAudioTranslationAsResponseObject( + @HostParam("endpoint") String endpoint, + @QueryParam("api-version") String apiVersion, + @PathParam("deploymentId") String deploymentOrModelName, + @HeaderParam("content-type") String contentType, + @HeaderParam("accept") String accept, + @BodyParam("multipart/form-data") BinaryData audioTranslationOptions, + RequestOptions requestOptions, + Context context); + + // @Multipart not supported by RestProxy + @Post("/deployments/{deploymentId}/audio/translations") + @ExpectedResponses({200}) + @UnexpectedResponseExceptionType( + value = ClientAuthenticationException.class, + code = {401}) + @UnexpectedResponseExceptionType( + value = ResourceNotFoundException.class, + code = {404}) + @UnexpectedResponseExceptionType( + value = ResourceModifiedException.class, + code = {409}) + @UnexpectedResponseExceptionType(HttpResponseException.class) + Response getAudioTranslationAsResponseObjectSync( + @HostParam("endpoint") String endpoint, + @QueryParam("api-version") String apiVersion, + @PathParam("deploymentId") String deploymentOrModelName, + @HeaderParam("content-type") String contentType, + @HeaderParam("accept") String accept, + @BodyParam("multipart/form-data") BinaryData audioTranslationOptions, + RequestOptions requestOptions, + Context context); } /** @@ -537,6 +713,18 @@ public Response getEmbeddingsWithResponse( * violence (Optional): (recursive schema, see violence above) * hate (Optional): (recursive schema, see hate above) * self_harm (Optional): (recursive schema, see self_harm above) + * error (Optional): { + * code: String (Required) + * message: String (Required) + * target: String (Optional) + * details (Optional): [ + * (recursive schema, see above) + * ] + * innererror (Optional): { + * code: String (Optional) + * innererror (Optional): (recursive schema, see innererror above) + * } + * } * } * } * ] @@ -650,6 +838,18 @@ public Mono> getCompletionsWithResponseAsync( * violence (Optional): (recursive schema, see violence above) * hate (Optional): (recursive schema, see hate above) * self_harm (Optional): (recursive schema, see self_harm above) + * error (Optional): { + * code: String (Required) + * message: String (Required) + * target: String (Optional) + * details (Optional): [ + * (recursive schema, see above) + * ] + * innererror (Optional): { + * code: String (Optional) + * innererror (Optional): (recursive schema, see innererror above) + * } + * } * } * } * ] @@ -800,6 +1000,18 @@ public Response getCompletionsWithResponse( * violence (Optional): (recursive schema, see violence above) * hate (Optional): (recursive schema, see hate above) * self_harm (Optional): (recursive schema, see self_harm above) + * error (Optional): { + * code: String (Required) + * message: String (Required) + * target: String (Optional) + * details (Optional): [ + * (recursive schema, see above) + * ] + * innererror (Optional): { + * code: String (Optional) + * innererror (Optional): (recursive schema, see innererror above) + * } + * } * } * } * ] @@ -935,6 +1147,18 @@ public Mono> getChatCompletionsWithResponseAsync( * violence (Optional): (recursive schema, see violence above) * hate (Optional): (recursive schema, see hate above) * self_harm (Optional): (recursive schema, see self_harm above) + * error (Optional): { + * code: String (Required) + * message: String (Required) + * target: String (Optional) + * details (Optional): [ + * (recursive schema, see above) + * ] + * innererror (Optional): { + * code: String (Optional) + * innererror (Optional): (recursive schema, see innererror above) + * } + * } * } * } * ] @@ -1068,6 +1292,18 @@ public Response getChatCompletionsWithResponse( * violence (Optional): (recursive schema, see violence above) * hate (Optional): (recursive schema, see hate above) * self_harm (Optional): (recursive schema, see self_harm above) + * error (Optional): { + * code: String (Required) + * message: String (Required) + * target: String (Optional) + * details (Optional): [ + * (recursive schema, see above) + * ] + * innererror (Optional): { + * code: String (Optional) + * innererror (Optional): (recursive schema, see innererror above) + * } + * } * } * } * ] @@ -1204,6 +1440,18 @@ public Mono> getChatCompletionsWithAzureExtensionsWithRespo * violence (Optional): (recursive schema, see violence above) * hate (Optional): (recursive schema, see hate above) * self_harm (Optional): (recursive schema, see self_harm above) + * error (Optional): { + * code: String (Required) + * message: String (Required) + * target: String (Optional) + * details (Optional): [ + * (recursive schema, see above) + * ] + * innererror (Optional): { + * code: String (Optional) + * innererror (Optional): (recursive schema, see innererror above) + * } + * } * } * } * ] @@ -1509,4 +1757,478 @@ public SyncPoller beginBeginAzureBatchImageGeneration( TypeReference.createInstance(BinaryData.class), TypeReference.createInstance(BinaryData.class)); } + + /** + * Gets transcribed text and associated metadata from provided spoken audio data. Audio will be transcribed in the + * written language corresponding to the language it was spoken in. + * + *

Request Body Schema + * + *

{@code
+     * {
+     *     file: byte[] (Required)
+     *     response_format: String(json/verbose_json/text/srt/vtt) (Optional)
+     *     language: String (Optional)
+     *     prompt: String (Optional)
+     *     temperature: Double (Optional)
+     *     model: String (Optional)
+     * }
+     * }
+ * + *

Response Body Schema + * + *

{@code
+     * String
+     * }
+ * + * @param deploymentOrModelName Specifies either the model deployment name (when using Azure OpenAI) or model name + * (when using non-Azure OpenAI) to use for this request. + * @param audioTranscriptionOptions The configuration information for an audio transcription request. + * @param requestOptions The options to configure the HTTP request before HTTP client sends it. + * @throws HttpResponseException thrown if the request is rejected by server. + * @throws ClientAuthenticationException thrown if the request is rejected by server on status code 401. + * @throws ResourceNotFoundException thrown if the request is rejected by server on status code 404. + * @throws ResourceModifiedException thrown if the request is rejected by server on status code 409. + * @return transcribed text and associated metadata from provided spoken audio data along with {@link Response} on + * successful completion of {@link Mono}. + */ + @ServiceMethod(returns = ReturnType.SINGLE) + public Mono> getAudioTranscriptionAsPlainTextWithResponseAsync( + String deploymentOrModelName, BinaryData audioTranscriptionOptions, RequestOptions requestOptions) { + final String accept = "application/json"; + return FluxUtil.withContext( + context -> + service.getAudioTranscriptionAsPlainText( + this.getEndpoint(), + this.getServiceVersion().getVersion(), + deploymentOrModelName, + accept, + audioTranscriptionOptions, + requestOptions, + context)); + } + + /** + * Gets transcribed text and associated metadata from provided spoken audio data. Audio will be transcribed in the + * written language corresponding to the language it was spoken in. + * + *

Request Body Schema + * + *

{@code
+     * {
+     *     file: byte[] (Required)
+     *     response_format: String(json/verbose_json/text/srt/vtt) (Optional)
+     *     language: String (Optional)
+     *     prompt: String (Optional)
+     *     temperature: Double (Optional)
+     *     model: String (Optional)
+     * }
+     * }
+ * + *

Response Body Schema + * + *

{@code
+     * String
+     * }
+ * + * @param deploymentOrModelName Specifies either the model deployment name (when using Azure OpenAI) or model name + * (when using non-Azure OpenAI) to use for this request. + * @param audioTranscriptionOptions The configuration information for an audio transcription request. + * @param requestOptions The options to configure the HTTP request before HTTP client sends it. + * @throws HttpResponseException thrown if the request is rejected by server. + * @throws ClientAuthenticationException thrown if the request is rejected by server on status code 401. + * @throws ResourceNotFoundException thrown if the request is rejected by server on status code 404. + * @throws ResourceModifiedException thrown if the request is rejected by server on status code 409. + * @return transcribed text and associated metadata from provided spoken audio data along with {@link Response}. + */ + @ServiceMethod(returns = ReturnType.SINGLE) + public Response getAudioTranscriptionAsPlainTextWithResponse( + String deploymentOrModelName, BinaryData audioTranscriptionOptions, RequestOptions requestOptions) { + final String accept = "application/json"; + return service.getAudioTranscriptionAsPlainTextSync( + this.getEndpoint(), + this.getServiceVersion().getVersion(), + deploymentOrModelName, + accept, + audioTranscriptionOptions, + requestOptions, + Context.NONE); + } + + /** + * Gets transcribed text and associated metadata from provided spoken audio data. Audio will be transcribed in the + * written language corresponding to the language it was spoken in. + * + *

Request Body Schema + * + *

{@code
+     * {
+     *     file: byte[] (Required)
+     *     response_format: String(json/verbose_json/text/srt/vtt) (Optional)
+     *     language: String (Optional)
+     *     prompt: String (Optional)
+     *     temperature: Double (Optional)
+     *     model: String (Optional)
+     * }
+     * }
+ * + *

Response Body Schema + * + *

{@code
+     * {
+     *     text: String (Required)
+     *     task: String(transcribe/translate) (Optional)
+     *     language: String (Optional)
+     *     duration: Double (Optional)
+     *     segments (Optional): [
+     *          (Optional){
+     *             id: int (Required)
+     *             start: double (Required)
+     *             end: double (Required)
+     *             text: String (Required)
+     *             temperature: double (Required)
+     *             avg_logprob: double (Required)
+     *             compression_ratio: double (Required)
+     *             no_speech_prob: double (Required)
+     *             tokens (Required): [
+     *                 int (Required)
+     *             ]
+     *             seek: int (Required)
+     *         }
+     *     ]
+     * }
+     * }
+ * + * @param deploymentOrModelName Specifies either the model deployment name (when using Azure OpenAI) or model name + * (when using non-Azure OpenAI) to use for this request. + * @param audioTranscriptionOptions The configuration information for an audio transcription request. + * @param requestOptions The options to configure the HTTP request before HTTP client sends it. + * @throws HttpResponseException thrown if the request is rejected by server. + * @throws ClientAuthenticationException thrown if the request is rejected by server on status code 401. + * @throws ResourceNotFoundException thrown if the request is rejected by server on status code 404. + * @throws ResourceModifiedException thrown if the request is rejected by server on status code 409. + * @return transcribed text and associated metadata from provided spoken audio data along with {@link Response} on + * successful completion of {@link Mono}. + */ + @ServiceMethod(returns = ReturnType.SINGLE) + public Mono> getAudioTranscriptionAsResponseObjectWithResponseAsync( + String deploymentOrModelName, BinaryData audioTranscriptionOptions, RequestOptions requestOptions) { + final String contentType = "multipart/form-data"; + final String accept = "application/json"; + return FluxUtil.withContext( + context -> + service.getAudioTranscriptionAsResponseObject( + this.getEndpoint(), + this.getServiceVersion().getVersion(), + deploymentOrModelName, + contentType, + accept, + audioTranscriptionOptions, + requestOptions, + context)); + } + + /** + * Gets transcribed text and associated metadata from provided spoken audio data. Audio will be transcribed in the + * written language corresponding to the language it was spoken in. + * + *

Request Body Schema + * + *

{@code
+     * {
+     *     file: byte[] (Required)
+     *     response_format: String(json/verbose_json/text/srt/vtt) (Optional)
+     *     language: String (Optional)
+     *     prompt: String (Optional)
+     *     temperature: Double (Optional)
+     *     model: String (Optional)
+     * }
+     * }
+ * + *

Response Body Schema + * + *

{@code
+     * {
+     *     text: String (Required)
+     *     task: String(transcribe/translate) (Optional)
+     *     language: String (Optional)
+     *     duration: Double (Optional)
+     *     segments (Optional): [
+     *          (Optional){
+     *             id: int (Required)
+     *             start: double (Required)
+     *             end: double (Required)
+     *             text: String (Required)
+     *             temperature: double (Required)
+     *             avg_logprob: double (Required)
+     *             compression_ratio: double (Required)
+     *             no_speech_prob: double (Required)
+     *             tokens (Required): [
+     *                 int (Required)
+     *             ]
+     *             seek: int (Required)
+     *         }
+     *     ]
+     * }
+     * }
+ * + * @param deploymentOrModelName Specifies either the model deployment name (when using Azure OpenAI) or model name + * (when using non-Azure OpenAI) to use for this request. + * @param audioTranscriptionOptions The configuration information for an audio transcription request. + * @param requestOptions The options to configure the HTTP request before HTTP client sends it. + * @throws HttpResponseException thrown if the request is rejected by server. + * @throws ClientAuthenticationException thrown if the request is rejected by server on status code 401. + * @throws ResourceNotFoundException thrown if the request is rejected by server on status code 404. + * @throws ResourceModifiedException thrown if the request is rejected by server on status code 409. + * @return transcribed text and associated metadata from provided spoken audio data along with {@link Response}. + */ + @ServiceMethod(returns = ReturnType.SINGLE) + public Response getAudioTranscriptionAsResponseObjectWithResponse( + String deploymentOrModelName, BinaryData audioTranscriptionOptions, RequestOptions requestOptions) { + final String contentType = "multipart/form-data"; + final String accept = "application/json"; + return service.getAudioTranscriptionAsResponseObjectSync( + this.getEndpoint(), + this.getServiceVersion().getVersion(), + deploymentOrModelName, + contentType, + accept, + audioTranscriptionOptions, + requestOptions, + Context.NONE); + } + + /** + * Gets English language transcribed text and associated metadata from provided spoken audio data. + * + *

Request Body Schema + * + *

{@code
+     * {
+     *     file: byte[] (Required)
+     *     response_format: String(json/verbose_json/text/srt/vtt) (Optional)
+     *     prompt: String (Optional)
+     *     temperature: Double (Optional)
+     *     model: String (Optional)
+     * }
+     * }
+ * + *

Response Body Schema + * + *

{@code
+     * String
+     * }
+ * + * @param deploymentOrModelName Specifies either the model deployment name (when using Azure OpenAI) or model name + * (when using non-Azure OpenAI) to use for this request. + * @param audioTranslationOptions The configuration information for an audio translation request. + * @param requestOptions The options to configure the HTTP request before HTTP client sends it. + * @throws HttpResponseException thrown if the request is rejected by server. + * @throws ClientAuthenticationException thrown if the request is rejected by server on status code 401. + * @throws ResourceNotFoundException thrown if the request is rejected by server on status code 404. + * @throws ResourceModifiedException thrown if the request is rejected by server on status code 409. + * @return english language transcribed text and associated metadata from provided spoken audio data along with + * {@link Response} on successful completion of {@link Mono}. + */ + @ServiceMethod(returns = ReturnType.SINGLE) + public Mono> getAudioTranslationAsPlainTextWithResponseAsync( + String deploymentOrModelName, BinaryData audioTranslationOptions, RequestOptions requestOptions) { + final String accept = "application/json"; + return FluxUtil.withContext( + context -> + service.getAudioTranslationAsPlainText( + this.getEndpoint(), + this.getServiceVersion().getVersion(), + deploymentOrModelName, + accept, + audioTranslationOptions, + requestOptions, + context)); + } + + /** + * Gets English language transcribed text and associated metadata from provided spoken audio data. + * + *

Request Body Schema + * + *

{@code
+     * {
+     *     file: byte[] (Required)
+     *     response_format: String(json/verbose_json/text/srt/vtt) (Optional)
+     *     prompt: String (Optional)
+     *     temperature: Double (Optional)
+     *     model: String (Optional)
+     * }
+     * }
+ * + *

Response Body Schema + * + *

{@code
+     * String
+     * }
+ * + * @param deploymentOrModelName Specifies either the model deployment name (when using Azure OpenAI) or model name + * (when using non-Azure OpenAI) to use for this request. + * @param audioTranslationOptions The configuration information for an audio translation request. + * @param requestOptions The options to configure the HTTP request before HTTP client sends it. + * @throws HttpResponseException thrown if the request is rejected by server. + * @throws ClientAuthenticationException thrown if the request is rejected by server on status code 401. + * @throws ResourceNotFoundException thrown if the request is rejected by server on status code 404. + * @throws ResourceModifiedException thrown if the request is rejected by server on status code 409. + * @return english language transcribed text and associated metadata from provided spoken audio data along with + * {@link Response}. + */ + @ServiceMethod(returns = ReturnType.SINGLE) + public Response getAudioTranslationAsPlainTextWithResponse( + String deploymentOrModelName, BinaryData audioTranslationOptions, RequestOptions requestOptions) { + final String accept = "application/json"; + return service.getAudioTranslationAsPlainTextSync( + this.getEndpoint(), + this.getServiceVersion().getVersion(), + deploymentOrModelName, + accept, + audioTranslationOptions, + requestOptions, + Context.NONE); + } + + /** + * Gets English language transcribed text and associated metadata from provided spoken audio data. + * + *

Request Body Schema + * + *

{@code
+     * {
+     *     file: byte[] (Required)
+     *     response_format: String(json/verbose_json/text/srt/vtt) (Optional)
+     *     prompt: String (Optional)
+     *     temperature: Double (Optional)
+     *     model: String (Optional)
+     * }
+     * }
+ * + *

Response Body Schema + * + *

{@code
+     * {
+     *     text: String (Required)
+     *     task: String(transcribe/translate) (Optional)
+     *     language: String (Optional)
+     *     duration: Double (Optional)
+     *     segments (Optional): [
+     *          (Optional){
+     *             id: int (Required)
+     *             start: double (Required)
+     *             end: double (Required)
+     *             text: String (Required)
+     *             temperature: double (Required)
+     *             avg_logprob: double (Required)
+     *             compression_ratio: double (Required)
+     *             no_speech_prob: double (Required)
+     *             tokens (Required): [
+     *                 int (Required)
+     *             ]
+     *             seek: int (Required)
+     *         }
+     *     ]
+     * }
+     * }
+ * + * @param deploymentOrModelName Specifies either the model deployment name (when using Azure OpenAI) or model name + * (when using non-Azure OpenAI) to use for this request. + * @param audioTranslationOptions The configuration information for an audio translation request. + * @param requestOptions The options to configure the HTTP request before HTTP client sends it. + * @throws HttpResponseException thrown if the request is rejected by server. + * @throws ClientAuthenticationException thrown if the request is rejected by server on status code 401. + * @throws ResourceNotFoundException thrown if the request is rejected by server on status code 404. + * @throws ResourceModifiedException thrown if the request is rejected by server on status code 409. + * @return english language transcribed text and associated metadata from provided spoken audio data along with + * {@link Response} on successful completion of {@link Mono}. + */ + @ServiceMethod(returns = ReturnType.SINGLE) + public Mono> getAudioTranslationAsResponseObjectWithResponseAsync( + String deploymentOrModelName, BinaryData audioTranslationOptions, RequestOptions requestOptions) { + final String contentType = "multipart/form-data"; + final String accept = "application/json"; + return FluxUtil.withContext( + context -> + service.getAudioTranslationAsResponseObject( + this.getEndpoint(), + this.getServiceVersion().getVersion(), + deploymentOrModelName, + contentType, + accept, + audioTranslationOptions, + requestOptions, + context)); + } + + /** + * Gets English language transcribed text and associated metadata from provided spoken audio data. + * + *

Request Body Schema + * + *

{@code
+     * {
+     *     file: byte[] (Required)
+     *     response_format: String(json/verbose_json/text/srt/vtt) (Optional)
+     *     prompt: String (Optional)
+     *     temperature: Double (Optional)
+     *     model: String (Optional)
+     * }
+     * }
+ * + *

Response Body Schema + * + *

{@code
+     * {
+     *     text: String (Required)
+     *     task: String(transcribe/translate) (Optional)
+     *     language: String (Optional)
+     *     duration: Double (Optional)
+     *     segments (Optional): [
+     *          (Optional){
+     *             id: int (Required)
+     *             start: double (Required)
+     *             end: double (Required)
+     *             text: String (Required)
+     *             temperature: double (Required)
+     *             avg_logprob: double (Required)
+     *             compression_ratio: double (Required)
+     *             no_speech_prob: double (Required)
+     *             tokens (Required): [
+     *                 int (Required)
+     *             ]
+     *             seek: int (Required)
+     *         }
+     *     ]
+     * }
+     * }
+ * + * @param deploymentOrModelName Specifies either the model deployment name (when using Azure OpenAI) or model name + * (when using non-Azure OpenAI) to use for this request. + * @param audioTranslationOptions The configuration information for an audio translation request. + * @param requestOptions The options to configure the HTTP request before HTTP client sends it. + * @throws HttpResponseException thrown if the request is rejected by server. + * @throws ClientAuthenticationException thrown if the request is rejected by server on status code 401. + * @throws ResourceNotFoundException thrown if the request is rejected by server on status code 404. + * @throws ResourceModifiedException thrown if the request is rejected by server on status code 409. + * @return english language transcribed text and associated metadata from provided spoken audio data along with + * {@link Response}. + */ + @ServiceMethod(returns = ReturnType.SINGLE) + public Response getAudioTranslationAsResponseObjectWithResponse( + String deploymentOrModelName, BinaryData audioTranslationOptions, RequestOptions requestOptions) { + final String contentType = "multipart/form-data"; + final String accept = "application/json"; + return service.getAudioTranslationAsResponseObjectSync( + this.getEndpoint(), + this.getServiceVersion().getVersion(), + deploymentOrModelName, + contentType, + accept, + audioTranslationOptions, + requestOptions, + Context.NONE); + } } diff --git a/sdk/openai/azure-ai-openai/src/main/java/com/azure/ai/openai/models/AudioTaskLabel.java b/sdk/openai/azure-ai-openai/src/main/java/com/azure/ai/openai/models/AudioTaskLabel.java new file mode 100644 index 000000000000..36f8361ad2a4 --- /dev/null +++ b/sdk/openai/azure-ai-openai/src/main/java/com/azure/ai/openai/models/AudioTaskLabel.java @@ -0,0 +1,50 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. +// Code generated by Microsoft (R) AutoRest Code Generator. +package com.azure.ai.openai.models; + +import com.azure.core.annotation.Generated; +import com.azure.core.util.ExpandableStringEnum; +import com.fasterxml.jackson.annotation.JsonCreator; +import java.util.Collection; + +/** Defines the possible descriptors for available audio operation responses. */ +public final class AudioTaskLabel extends ExpandableStringEnum { + + /** Accompanying response data resulted from an audio transcription task. */ + @Generated public static final AudioTaskLabel TRANSCRIBE = fromString("transcribe"); + + /** Accompanying response data resulted from an audio translation task. */ + @Generated public static final AudioTaskLabel TRANSLATE = fromString("translate"); + + /** + * Creates a new instance of AudioTaskLabel value. + * + * @deprecated Use the {@link #fromString(String)} factory method. + */ + @Generated + @Deprecated + public AudioTaskLabel() {} + + /** + * Creates or finds a AudioTaskLabel from its string representation. + * + * @param name a name to look for. + * @return the corresponding AudioTaskLabel. + */ + @Generated + @JsonCreator + public static AudioTaskLabel fromString(String name) { + return fromString(name, AudioTaskLabel.class); + } + + /** + * Gets known AudioTaskLabel values. + * + * @return known AudioTaskLabel values. + */ + @Generated + public static Collection values() { + return values(AudioTaskLabel.class); + } +} diff --git a/sdk/openai/azure-ai-openai/src/main/java/com/azure/ai/openai/models/AudioTranscription.java b/sdk/openai/azure-ai-openai/src/main/java/com/azure/ai/openai/models/AudioTranscription.java new file mode 100644 index 000000000000..8d7b085ce8af --- /dev/null +++ b/sdk/openai/azure-ai-openai/src/main/java/com/azure/ai/openai/models/AudioTranscription.java @@ -0,0 +1,119 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. +// Code generated by Microsoft (R) AutoRest Code Generator. +package com.azure.ai.openai.models; + +import com.azure.core.annotation.Generated; +import com.azure.core.annotation.Immutable; +import com.fasterxml.jackson.annotation.JsonCreator; +import com.fasterxml.jackson.annotation.JsonProperty; +import java.time.Duration; +import java.util.List; + +/** Result information for an operation that transcribed spoken audio into written text. */ +@Immutable +public final class AudioTranscription { + + /* + * The transcribed text for the provided audio data. + */ + @Generated + @JsonProperty(value = "text") + private String text; + + /* + * The label that describes which operation type generated the accompanying response data. + */ + @Generated + @JsonProperty(value = "task") + private AudioTaskLabel task; + + /* + * The spoken language that was detected in the transcribed audio data. + * This is expressed as a two-letter ISO-639-1 language code like 'en' or 'fr'. + */ + @Generated + @JsonProperty(value = "language") + private String language; + + /* + * The total duration of the audio processed to produce accompanying transcription information. + */ + @Generated + @JsonProperty(value = "duration") + private Double duration; + + /* + * A collection of information about the timing, probabilities, and other detail of each processed audio segment. + */ + @Generated + @JsonProperty(value = "segments") + private List segments; + + /** + * Creates an instance of AudioTranscription class. + * + * @param text the text value to set. + */ + @Generated + @JsonCreator + private AudioTranscription(@JsonProperty(value = "text") String text) { + this.text = text; + } + + /** + * Get the text property: The transcribed text for the provided audio data. + * + * @return the text value. + */ + @Generated + public String getText() { + return this.text; + } + + /** + * Get the task property: The label that describes which operation type generated the accompanying response data. + * + * @return the task value. + */ + @Generated + public AudioTaskLabel getTask() { + return this.task; + } + + /** + * Get the language property: The spoken language that was detected in the transcribed audio data. This is expressed + * as a two-letter ISO-639-1 language code like 'en' or 'fr'. + * + * @return the language value. + */ + @Generated + public String getLanguage() { + return this.language; + } + + /** + * Get the duration property: The total duration of the audio processed to produce accompanying transcription + * information. + * + * @return the duration value. + */ + @Generated + public Duration getDuration() { + if (this.duration == null) { + return null; + } + return Duration.ofNanos((long) (this.duration * 1000_000_000L)); + } + + /** + * Get the segments property: A collection of information about the timing, probabilities, and other detail of each + * processed audio segment. + * + * @return the segments value. + */ + @Generated + public List getSegments() { + return this.segments; + } +} diff --git a/sdk/openai/azure-ai-openai/src/main/java/com/azure/ai/openai/models/AudioTranscriptionFormat.java b/sdk/openai/azure-ai-openai/src/main/java/com/azure/ai/openai/models/AudioTranscriptionFormat.java new file mode 100644 index 000000000000..8429c748e7ca --- /dev/null +++ b/sdk/openai/azure-ai-openai/src/main/java/com/azure/ai/openai/models/AudioTranscriptionFormat.java @@ -0,0 +1,65 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. +// Code generated by Microsoft (R) AutoRest Code Generator. +package com.azure.ai.openai.models; + +import com.azure.core.annotation.Generated; +import com.azure.core.util.ExpandableStringEnum; +import com.fasterxml.jackson.annotation.JsonCreator; +import java.util.Collection; + +/** Defines available options for the underlying response format of output transcription information. */ +public final class AudioTranscriptionFormat extends ExpandableStringEnum { + + /** Use a response body that is a JSON object containing a single 'text' field for the transcription. */ + @Generated public static final AudioTranscriptionFormat JSON = fromString("json"); + + /** + * Use a response body that is a JSON object containing transcription text along with timing, segments, and other + * metadata. + */ + @Generated public static final AudioTranscriptionFormat VERBOSE_JSON = fromString("verbose_json"); + + /** Use a response body that is plain text containing the raw, unannotated transcription. */ + @Generated public static final AudioTranscriptionFormat TEXT = fromString("text"); + + /** Use a response body that is plain text in SubRip (SRT) format that also includes timing information. */ + @Generated public static final AudioTranscriptionFormat SRT = fromString("srt"); + + /** + * Use a response body that is plain text in Web Video Text Tracks (VTT) format that also includes timing + * information. + */ + @Generated public static final AudioTranscriptionFormat VTT = fromString("vtt"); + + /** + * Creates a new instance of AudioTranscriptionFormat value. + * + * @deprecated Use the {@link #fromString(String)} factory method. + */ + @Generated + @Deprecated + public AudioTranscriptionFormat() {} + + /** + * Creates or finds a AudioTranscriptionFormat from its string representation. + * + * @param name a name to look for. + * @return the corresponding AudioTranscriptionFormat. + */ + @Generated + @JsonCreator + public static AudioTranscriptionFormat fromString(String name) { + return fromString(name, AudioTranscriptionFormat.class); + } + + /** + * Gets known AudioTranscriptionFormat values. + * + * @return known AudioTranscriptionFormat values. + */ + @Generated + public static Collection values() { + return values(AudioTranscriptionFormat.class); + } +} diff --git a/sdk/openai/azure-ai-openai/src/main/java/com/azure/ai/openai/models/AudioTranscriptionOptions.java b/sdk/openai/azure-ai-openai/src/main/java/com/azure/ai/openai/models/AudioTranscriptionOptions.java new file mode 100644 index 000000000000..7d72fd5ea891 --- /dev/null +++ b/sdk/openai/azure-ai-openai/src/main/java/com/azure/ai/openai/models/AudioTranscriptionOptions.java @@ -0,0 +1,211 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. +// Code generated by Microsoft (R) AutoRest Code Generator. +package com.azure.ai.openai.models; + +import com.azure.core.annotation.Fluent; +import com.azure.core.annotation.Generated; +import com.azure.core.util.CoreUtils; +import com.fasterxml.jackson.annotation.JsonCreator; +import com.fasterxml.jackson.annotation.JsonProperty; + +/** The configuration information for an audio transcription request. */ +@Fluent +public final class AudioTranscriptionOptions { + + /* + * The audio data to transcribe. This must be the binary content of a file in one of the supported media formats: + * flac, mp3, mp4, mpeg, mpga, m4a, ogg, wav, webm. + */ + @Generated + @JsonProperty(value = "file") + private byte[] file; + + /* + * The requested format of the transcription response data, which will influence the content and detail of the + * result. + */ + @Generated + @JsonProperty(value = "response_format") + private AudioTranscriptionFormat responseFormat; + + /* + * The primary spoken language of the audio data to be transcribed, supplied as a two-letter ISO-639-1 language + * code + * such as 'en' or 'fr'. + * Providing this known input language is optional but may improve the accuracy and/or latency of transcription. + */ + @Generated + @JsonProperty(value = "language") + private String language; + + /* + * An optional hint to guide the model's style or continue from a prior audio segment. The written language of the + * prompt should match the primary spoken language of the audio data. + */ + @Generated + @JsonProperty(value = "prompt") + private String prompt; + + /* + * The sampling temperature, between 0 and 1. + * Higher values like 0.8 will make the output more random, while lower values like 0.2 will make it more focused + * and deterministic. + * If set to 0, the model will use log probability to automatically increase the temperature until certain + * thresholds are hit. + */ + @Generated + @JsonProperty(value = "temperature") + private Double temperature; + + /* + * The model to use for this transcription request. + */ + @Generated + @JsonProperty(value = "model") + private String model; + + /** + * Creates an instance of AudioTranscriptionOptions class. + * + * @param file the file value to set. + */ + @Generated + @JsonCreator + public AudioTranscriptionOptions(@JsonProperty(value = "file") byte[] file) { + this.file = file; + } + + /** + * Get the file property: The audio data to transcribe. This must be the binary content of a file in one of the + * supported media formats: flac, mp3, mp4, mpeg, mpga, m4a, ogg, wav, webm. + * + * @return the file value. + */ + @Generated + public byte[] getFile() { + return CoreUtils.clone(this.file); + } + + /** + * Get the responseFormat property: The requested format of the transcription response data, which will influence + * the content and detail of the result. + * + * @return the responseFormat value. + */ + @Generated + public AudioTranscriptionFormat getResponseFormat() { + return this.responseFormat; + } + + /** + * Set the responseFormat property: The requested format of the transcription response data, which will influence + * the content and detail of the result. + * + * @param responseFormat the responseFormat value to set. + * @return the AudioTranscriptionOptions object itself. + */ + @Generated + public AudioTranscriptionOptions setResponseFormat(AudioTranscriptionFormat responseFormat) { + this.responseFormat = responseFormat; + return this; + } + + /** + * Get the language property: The primary spoken language of the audio data to be transcribed, supplied as a + * two-letter ISO-639-1 language code such as 'en' or 'fr'. Providing this known input language is optional but may + * improve the accuracy and/or latency of transcription. + * + * @return the language value. + */ + @Generated + public String getLanguage() { + return this.language; + } + + /** + * Set the language property: The primary spoken language of the audio data to be transcribed, supplied as a + * two-letter ISO-639-1 language code such as 'en' or 'fr'. Providing this known input language is optional but may + * improve the accuracy and/or latency of transcription. + * + * @param language the language value to set. + * @return the AudioTranscriptionOptions object itself. + */ + @Generated + public AudioTranscriptionOptions setLanguage(String language) { + this.language = language; + return this; + } + + /** + * Get the prompt property: An optional hint to guide the model's style or continue from a prior audio segment. The + * written language of the prompt should match the primary spoken language of the audio data. + * + * @return the prompt value. + */ + @Generated + public String getPrompt() { + return this.prompt; + } + + /** + * Set the prompt property: An optional hint to guide the model's style or continue from a prior audio segment. The + * written language of the prompt should match the primary spoken language of the audio data. + * + * @param prompt the prompt value to set. + * @return the AudioTranscriptionOptions object itself. + */ + @Generated + public AudioTranscriptionOptions setPrompt(String prompt) { + this.prompt = prompt; + return this; + } + + /** + * Get the temperature property: The sampling temperature, between 0 and 1. Higher values like 0.8 will make the + * output more random, while lower values like 0.2 will make it more focused and deterministic. If set to 0, the + * model will use log probability to automatically increase the temperature until certain thresholds are hit. + * + * @return the temperature value. + */ + @Generated + public Double getTemperature() { + return this.temperature; + } + + /** + * Set the temperature property: The sampling temperature, between 0 and 1. Higher values like 0.8 will make the + * output more random, while lower values like 0.2 will make it more focused and deterministic. If set to 0, the + * model will use log probability to automatically increase the temperature until certain thresholds are hit. + * + * @param temperature the temperature value to set. + * @return the AudioTranscriptionOptions object itself. + */ + @Generated + public AudioTranscriptionOptions setTemperature(Double temperature) { + this.temperature = temperature; + return this; + } + + /** + * Get the model property: The model to use for this transcription request. + * + * @return the model value. + */ + @Generated + public String getModel() { + return this.model; + } + + /** + * Set the model property: The model to use for this transcription request. + * + * @param model the model value to set. + * @return the AudioTranscriptionOptions object itself. + */ + @Generated + public AudioTranscriptionOptions setModel(String model) { + this.model = model; + return this; + } +} diff --git a/sdk/openai/azure-ai-openai/src/main/java/com/azure/ai/openai/models/AudioTranscriptionSegment.java b/sdk/openai/azure-ai-openai/src/main/java/com/azure/ai/openai/models/AudioTranscriptionSegment.java new file mode 100644 index 000000000000..87e289da3b0e --- /dev/null +++ b/sdk/openai/azure-ai-openai/src/main/java/com/azure/ai/openai/models/AudioTranscriptionSegment.java @@ -0,0 +1,262 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. +// Code generated by Microsoft (R) AutoRest Code Generator. +package com.azure.ai.openai.models; + +import com.azure.core.annotation.Generated; +import com.azure.core.annotation.Immutable; +import com.fasterxml.jackson.annotation.JsonCreator; +import com.fasterxml.jackson.annotation.JsonProperty; +import java.time.Duration; +import java.util.List; + +/** + * Extended information about a single segment of transcribed audio data. Segments generally represent roughly 5-10 + * seconds of speech. Segment boundaries typically occur between words but not necessarily sentences. + */ +@Immutable +public final class AudioTranscriptionSegment { + + /* + * The 0-based index of this segment within a transcription. + */ + @Generated + @JsonProperty(value = "id") + private int id; + + /* + * The time at which this segment started relative to the beginning of the transcribed audio. + */ + @Generated + @JsonProperty(value = "start") + private double start; + + /* + * The time at which this segment ended relative to the beginning of the transcribed audio. + */ + @Generated + @JsonProperty(value = "end") + private double end; + + /* + * The transcribed text that was part of this audio segment. + */ + @Generated + @JsonProperty(value = "text") + private String text; + + /* + * The temperature score associated with this audio segment. + */ + @Generated + @JsonProperty(value = "temperature") + private double temperature; + + /* + * The average log probability associated with this audio segment. + */ + @Generated + @JsonProperty(value = "avg_logprob") + private double avgLogprob; + + /* + * The compression ratio of this audio segment. + */ + @Generated + @JsonProperty(value = "compression_ratio") + private double compressionRatio; + + /* + * The probability of no speech detection within this audio segment. + */ + @Generated + @JsonProperty(value = "no_speech_prob") + private double noSpeechProb; + + /* + * The token IDs matching the transcribed text in this audio segment. + */ + @Generated + @JsonProperty(value = "tokens") + private List tokens; + + /* + * The seek position associated with the processing of this audio segment. + * Seek positions are expressed as hundredths of seconds. + * The model may process several segments from a single seek position, so while the seek position will never + * represent + * a later time than the segment's start, the segment's start may represent a significantly later time than the + * segment's associated seek position. + */ + @Generated + @JsonProperty(value = "seek") + private int seek; + + /** + * Creates an instance of AudioTranscriptionSegment class. + * + * @param id the id value to set. + * @param start the start value to set. + * @param end the end value to set. + * @param text the text value to set. + * @param temperature the temperature value to set. + * @param avgLogprob the avgLogprob value to set. + * @param compressionRatio the compressionRatio value to set. + * @param noSpeechProb the noSpeechProb value to set. + * @param tokens the tokens value to set. + * @param seek the seek value to set. + */ + @Generated + private AudioTranscriptionSegment( + int id, + Duration start, + Duration end, + String text, + double temperature, + double avgLogprob, + double compressionRatio, + double noSpeechProb, + List tokens, + int seek) { + this.id = id; + this.start = (double) start.toNanos() / 1000_000_000L; + this.end = (double) end.toNanos() / 1000_000_000L; + this.text = text; + this.temperature = temperature; + this.avgLogprob = avgLogprob; + this.compressionRatio = compressionRatio; + this.noSpeechProb = noSpeechProb; + this.tokens = tokens; + this.seek = seek; + } + + @Generated + @JsonCreator + private AudioTranscriptionSegment( + @JsonProperty(value = "id") int id, + @JsonProperty(value = "start") double start, + @JsonProperty(value = "end") double end, + @JsonProperty(value = "text") String text, + @JsonProperty(value = "temperature") double temperature, + @JsonProperty(value = "avg_logprob") double avgLogprob, + @JsonProperty(value = "compression_ratio") double compressionRatio, + @JsonProperty(value = "no_speech_prob") double noSpeechProb, + @JsonProperty(value = "tokens") List tokens, + @JsonProperty(value = "seek") int seek) { + this( + id, + Duration.ofNanos((long) (start * 1000_000_000L)), + Duration.ofNanos((long) (end * 1000_000_000L)), + text, + temperature, + avgLogprob, + compressionRatio, + noSpeechProb, + tokens, + seek); + } + + /** + * Get the id property: The 0-based index of this segment within a transcription. + * + * @return the id value. + */ + @Generated + public int getId() { + return this.id; + } + + /** + * Get the start property: The time at which this segment started relative to the beginning of the transcribed + * audio. + * + * @return the start value. + */ + @Generated + public Duration getStart() { + return Duration.ofNanos((long) (this.start * 1000_000_000L)); + } + + /** + * Get the end property: The time at which this segment ended relative to the beginning of the transcribed audio. + * + * @return the end value. + */ + @Generated + public Duration getEnd() { + return Duration.ofNanos((long) (this.end * 1000_000_000L)); + } + + /** + * Get the text property: The transcribed text that was part of this audio segment. + * + * @return the text value. + */ + @Generated + public String getText() { + return this.text; + } + + /** + * Get the temperature property: The temperature score associated with this audio segment. + * + * @return the temperature value. + */ + @Generated + public double getTemperature() { + return this.temperature; + } + + /** + * Get the avgLogprob property: The average log probability associated with this audio segment. + * + * @return the avgLogprob value. + */ + @Generated + public double getAvgLogprob() { + return this.avgLogprob; + } + + /** + * Get the compressionRatio property: The compression ratio of this audio segment. + * + * @return the compressionRatio value. + */ + @Generated + public double getCompressionRatio() { + return this.compressionRatio; + } + + /** + * Get the noSpeechProb property: The probability of no speech detection within this audio segment. + * + * @return the noSpeechProb value. + */ + @Generated + public double getNoSpeechProb() { + return this.noSpeechProb; + } + + /** + * Get the tokens property: The token IDs matching the transcribed text in this audio segment. + * + * @return the tokens value. + */ + @Generated + public List getTokens() { + return this.tokens; + } + + /** + * Get the seek property: The seek position associated with the processing of this audio segment. Seek positions are + * expressed as hundredths of seconds. The model may process several segments from a single seek position, so while + * the seek position will never represent a later time than the segment's start, the segment's start may represent a + * significantly later time than the segment's associated seek position. + * + * @return the seek value. + */ + @Generated + public int getSeek() { + return this.seek; + } +} diff --git a/sdk/openai/azure-ai-openai/src/main/java/com/azure/ai/openai/models/AudioTranslationOptions.java b/sdk/openai/azure-ai-openai/src/main/java/com/azure/ai/openai/models/AudioTranslationOptions.java new file mode 100644 index 000000000000..65f7b1f873ad --- /dev/null +++ b/sdk/openai/azure-ai-openai/src/main/java/com/azure/ai/openai/models/AudioTranslationOptions.java @@ -0,0 +1,175 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. +// Code generated by Microsoft (R) AutoRest Code Generator. +package com.azure.ai.openai.models; + +import com.azure.core.annotation.Fluent; +import com.azure.core.annotation.Generated; +import com.azure.core.util.CoreUtils; +import com.fasterxml.jackson.annotation.JsonCreator; +import com.fasterxml.jackson.annotation.JsonProperty; + +/** The configuration information for an audio translation request. */ +@Fluent +public final class AudioTranslationOptions { + + /* + * The audio data to transcribe. This must be the binary content of a file in one of the supported media formats: + * flac, mp3, mp4, mpeg, mpga, m4a, ogg, wav, webm. + */ + @Generated + @JsonProperty(value = "file") + private byte[] file; + + /* + * The requested format of the transcription response data, which will influence the content and detail of the + * result. + */ + @Generated + @JsonProperty(value = "response_format") + private AudioTranscriptionFormat responseFormat; + + /* + * An optional hint to guide the model's style or continue from a prior audio segment. The written language of the + * prompt should match the primary spoken language of the audio data. + */ + @Generated + @JsonProperty(value = "prompt") + private String prompt; + + /* + * The sampling temperature, between 0 and 1. + * Higher values like 0.8 will make the output more random, while lower values like 0.2 will make it more focused + * and deterministic. + * If set to 0, the model will use log probability to automatically increase the temperature until certain + * thresholds are hit. + */ + @Generated + @JsonProperty(value = "temperature") + private Double temperature; + + /* + * The model to use for this transcription request. + */ + @Generated + @JsonProperty(value = "model") + private String model; + + /** + * Creates an instance of AudioTranslationOptions class. + * + * @param file the file value to set. + */ + @Generated + @JsonCreator + public AudioTranslationOptions(@JsonProperty(value = "file") byte[] file) { + this.file = file; + } + + /** + * Get the file property: The audio data to transcribe. This must be the binary content of a file in one of the + * supported media formats: flac, mp3, mp4, mpeg, mpga, m4a, ogg, wav, webm. + * + * @return the file value. + */ + @Generated + public byte[] getFile() { + return CoreUtils.clone(this.file); + } + + /** + * Get the responseFormat property: The requested format of the transcription response data, which will influence + * the content and detail of the result. + * + * @return the responseFormat value. + */ + @Generated + public AudioTranscriptionFormat getResponseFormat() { + return this.responseFormat; + } + + /** + * Set the responseFormat property: The requested format of the transcription response data, which will influence + * the content and detail of the result. + * + * @param responseFormat the responseFormat value to set. + * @return the AudioTranslationOptions object itself. + */ + @Generated + public AudioTranslationOptions setResponseFormat(AudioTranscriptionFormat responseFormat) { + this.responseFormat = responseFormat; + return this; + } + + /** + * Get the prompt property: An optional hint to guide the model's style or continue from a prior audio segment. The + * written language of the prompt should match the primary spoken language of the audio data. + * + * @return the prompt value. + */ + @Generated + public String getPrompt() { + return this.prompt; + } + + /** + * Set the prompt property: An optional hint to guide the model's style or continue from a prior audio segment. The + * written language of the prompt should match the primary spoken language of the audio data. + * + * @param prompt the prompt value to set. + * @return the AudioTranslationOptions object itself. + */ + @Generated + public AudioTranslationOptions setPrompt(String prompt) { + this.prompt = prompt; + return this; + } + + /** + * Get the temperature property: The sampling temperature, between 0 and 1. Higher values like 0.8 will make the + * output more random, while lower values like 0.2 will make it more focused and deterministic. If set to 0, the + * model will use log probability to automatically increase the temperature until certain thresholds are hit. + * + * @return the temperature value. + */ + @Generated + public Double getTemperature() { + return this.temperature; + } + + /** + * Set the temperature property: The sampling temperature, between 0 and 1. Higher values like 0.8 will make the + * output more random, while lower values like 0.2 will make it more focused and deterministic. If set to 0, the + * model will use log probability to automatically increase the temperature until certain thresholds are hit. + * + * @param temperature the temperature value to set. + * @return the AudioTranslationOptions object itself. + */ + @Generated + public AudioTranslationOptions setTemperature(Double temperature) { + this.temperature = temperature; + return this; + } + + /** + * Get the model property: The model to use for this transcription request. + * + * @return the model value. + */ + @Generated + public String getModel() { + return this.model; + } + + /** + * Set the model property: The model to use for this transcription request. + * + * @param model the model value to set. + * @return the AudioTranslationOptions object itself. + */ + @Generated + public AudioTranslationOptions setModel(String model) { + this.model = model; + return this; + } +} diff --git a/sdk/openai/azure-ai-openai/src/main/java/com/azure/ai/openai/models/ContentFilterResults.java b/sdk/openai/azure-ai-openai/src/main/java/com/azure/ai/openai/models/ContentFilterResults.java index 65883af4465f..2c1c3c668bd3 100644 --- a/sdk/openai/azure-ai-openai/src/main/java/com/azure/ai/openai/models/ContentFilterResults.java +++ b/sdk/openai/azure-ai-openai/src/main/java/com/azure/ai/openai/models/ContentFilterResults.java @@ -5,6 +5,7 @@ import com.azure.core.annotation.Generated; import com.azure.core.annotation.Immutable; +import com.azure.core.models.ResponseError; import com.fasterxml.jackson.annotation.JsonProperty; /** Information about the content filtering category, if it has been detected. */ @@ -98,4 +99,23 @@ public ContentFilterResult getSelfHarm() { /** Creates an instance of ContentFilterResults class. */ @Generated private ContentFilterResults() {} + + /* + * Describes an error returned if the content filtering system is + * down or otherwise unable to complete the operation in time. + */ + @Generated + @JsonProperty(value = "error") + private ResponseError error; + + /** + * Get the error property: Describes an error returned if the content filtering system is down or otherwise unable + * to complete the operation in time. + * + * @return the error value. + */ + @Generated + public ResponseError getError() { + return this.error; + } } diff --git a/sdk/openai/azure-ai-openai/src/main/java/module-info.java b/sdk/openai/azure-ai-openai/src/main/java/module-info.java index 016c2a1fc8be..c8eafa553ff1 100644 --- a/sdk/openai/azure-ai-openai/src/main/java/module-info.java +++ b/sdk/openai/azure-ai-openai/src/main/java/module-info.java @@ -7,7 +7,6 @@ exports com.azure.ai.openai; exports com.azure.ai.openai.models; - exports com.azure.ai.openai.implementation.models; opens com.azure.ai.openai.models to com.azure.core, diff --git a/sdk/openai/azure-ai-openai/src/samples/README.md b/sdk/openai/azure-ai-openai/src/samples/README.md index cf37cf05b527..fa5a898c27eb 100644 --- a/sdk/openai/azure-ai-openai/src/samples/README.md +++ b/sdk/openai/azure-ai-openai/src/samples/README.md @@ -28,12 +28,16 @@ Synchronous: - [Chat Completions][sample_get_chat_completions] - [Embeddings][sample_get_embedding] - [Image Generation][sample_image_generation] +- [Audio Transcription][sample_audio_transcription] +- [Audio Translation][sample_audio_translation] Asynchronous: - [Text Completions][async_sample_get_completions] - [Chat Completions][async_sample_get_chat_completions] - [Embeddings][async_sample_get_embedding] - [Image Generation][async_sample_image_generation] +- [Audio Transcription][async_sample_audio_transcription] +- [Audio Translation][async_sample_audio_translation] Cookbook: - [Chat bot][cookbook_chat_bot] @@ -66,11 +70,15 @@ This project welcomes contributions and suggestions. Find [more contributing][SD [async_sample_get_chat_completions]: https://github.com/Azure/azure-sdk-for-java/blob/main/sdk/openai/azure-ai-openai/src/samples/java/com/azure/ai/openai/usage/GetChatCompletionsAsyncSample.java [async_sample_get_embedding]: https://github.com/Azure/azure-sdk-for-java/blob/main/sdk/openai/azure-ai-openai/src/samples/java/com/azure/ai/openai/usage/GetEmbeddingsAsyncSample.java [async_sample_image_generation]: https://github.com/Azure/azure-sdk-for-java/blob/main/sdk/openai/azure-ai-openai/src/samples/java/com/azure/ai/openai/usage/GetImagesAsyncSample.java +[async_sample_audio_transcription]: https://github.com/Azure/azure-sdk-for-java/blob/main/sdk/openai/azure-ai-openai/src/samples/java/com/azure/ai/openai/usage/AudioTranscriptionAsyncSample.java +[async_sample_audio_translation]: https://github.com/Azure/azure-sdk-for-java/blob/main/sdk/openai/azure-ai-openai/src/samples/java/com/azure/ai/openai/usage/AudioTranslationAsyncSample.java [sample_get_completions]: https://github.com/Azure/azure-sdk-for-java/blob/main/sdk/openai/azure-ai-openai/src/samples/java/com/azure/ai/openai/usage/GetCompletionsSample.java [sample_get_chat_completions]: https://github.com/Azure/azure-sdk-for-java/blob/main/sdk/openai/azure-ai-openai/src/samples/java/com/azure/ai/openai/usage/GetChatCompletionsSample.java [sample_get_embedding]: https://github.com/Azure/azure-sdk-for-java/blob/main/sdk/openai/azure-ai-openai/src/samples/java/com/azure/ai/openai/usage/GetEmbeddingsSample.java [sample_image_generation]: https://github.com/Azure/azure-sdk-for-java/blob/main/sdk/openai/azure-ai-openai/src/samples/java/com/azure/ai/openai/usage/GetImagesSample.java +[sample_audio_transcription]: https://github.com/Azure/azure-sdk-for-java/blob/main/sdk/openai/azure-ai-openai/src/samples/java/com/azure/ai/openai/usage/AudioTranscriptionSample.java +[sample_audio_translation]: https://github.com/Azure/azure-sdk-for-java/blob/main/sdk/openai/azure-ai-openai/src/samples/java/com/azure/ai/openai/usage/AudioTranslationSample.java [cookbook_chat_bot]: https://github.com/Azure/azure-sdk-for-java/blob/main/sdk/openai/azure-ai-openai/src/samples/java/com/azure/ai/openai/ChatbotSample.java [cookbook_chat_bot_with_key]: https://github.com/Azure/azure-sdk-for-java/blob/main/sdk/openai/azure-ai-openai/src/samples/java/com/azure/ai/openai/ChatbotWithKeySample.java diff --git a/sdk/openai/azure-ai-openai/src/samples/java/com/azure/ai/openai/ChatCompletionsWithYourData.java b/sdk/openai/azure-ai-openai/src/samples/java/com/azure/ai/openai/ChatCompletionsWithYourData.java index 0d732704c90c..fecaa9dccf77 100644 --- a/sdk/openai/azure-ai-openai/src/samples/java/com/azure/ai/openai/ChatCompletionsWithYourData.java +++ b/sdk/openai/azure-ai-openai/src/samples/java/com/azure/ai/openai/ChatCompletionsWithYourData.java @@ -29,7 +29,7 @@ public class ChatCompletionsWithYourData { * * @param args Unused. Arguments to the program. */ - public static void main(String[] args){ + public static void main(String[] args) { String azureOpenaiKey = "{azure-open-ai-key}"; String endpoint = "{azure-open-ai-endpoint}"; String deploymentOrModelId = "{azure-open-ai-deployment-model-id}"; diff --git a/sdk/openai/azure-ai-openai/src/samples/java/com/azure/ai/openai/impl/ReadmeSamples.java b/sdk/openai/azure-ai-openai/src/samples/java/com/azure/ai/openai/impl/ReadmeSamples.java index 3384e3cb3e2f..7488e04c3271 100644 --- a/sdk/openai/azure-ai-openai/src/samples/java/com/azure/ai/openai/impl/ReadmeSamples.java +++ b/sdk/openai/azure-ai-openai/src/samples/java/com/azure/ai/openai/impl/ReadmeSamples.java @@ -6,6 +6,10 @@ import com.azure.ai.openai.OpenAIAsyncClient; import com.azure.ai.openai.OpenAIClient; import com.azure.ai.openai.OpenAIClientBuilder; +import com.azure.ai.openai.models.AudioTranscription; +import com.azure.ai.openai.models.AudioTranscriptionFormat; +import com.azure.ai.openai.models.AudioTranscriptionOptions; +import com.azure.ai.openai.models.AudioTranslationOptions; import com.azure.ai.openai.models.ChatChoice; import com.azure.ai.openai.models.ChatCompletions; import com.azure.ai.openai.models.ChatCompletionsOptions; @@ -25,11 +29,14 @@ import com.azure.core.credential.TokenCredential; import com.azure.core.http.ProxyOptions; import com.azure.core.models.ResponseError; +import com.azure.core.util.BinaryData; import com.azure.core.util.HttpClientOptions; import com.azure.core.util.IterableStream; import com.azure.identity.DefaultAzureCredentialBuilder; import java.net.InetSocketAddress; +import java.nio.file.Path; +import java.nio.file.Paths; import java.util.ArrayList; import java.util.Arrays; import java.util.List; @@ -221,4 +228,34 @@ public void imageGeneration() { } // END: readme-sample-imageGeneration } + + public void audioTranscription() { + // BEGIN: readme-sample-audioTranscription + String fileName = "{your-file-name}"; + Path filePath = Paths.get("{your-file-path}" + fileName); + + byte[] file = BinaryData.fromFile(filePath).toBytes(); + AudioTranscriptionOptions transcriptionOptions = new AudioTranscriptionOptions(file) + .setResponseFormat(AudioTranscriptionFormat.JSON); + + AudioTranscription transcription = client.getAudioTranscription("{deploymentOrModelId}", fileName, transcriptionOptions); + + System.out.println("Transcription: " + transcription.getText()); + // END: readme-sample-audioTranscription + } + + public void audioTranslation() { + // BEGIN: readme-sample-audioTranslation + String fileName = "{your-file-name}"; + Path filePath = Paths.get("{your-file-path}" + fileName); + + byte[] file = BinaryData.fromFile(filePath).toBytes(); + AudioTranslationOptions translationOptions = new AudioTranslationOptions(file) + .setResponseFormat(AudioTranscriptionFormat.JSON); + + AudioTranscription translation = client.getAudioTranslation("{deploymentOrModelId}", fileName, translationOptions); + + System.out.println("Translation: " + translation.getText()); + // END: readme-sample-audioTranslation + } } diff --git a/sdk/openai/azure-ai-openai/src/samples/java/com/azure/ai/openai/resources/JP_it_is_rainy_today.wav b/sdk/openai/azure-ai-openai/src/samples/java/com/azure/ai/openai/resources/JP_it_is_rainy_today.wav new file mode 100644 index 000000000000..5970c85ec1cd Binary files /dev/null and b/sdk/openai/azure-ai-openai/src/samples/java/com/azure/ai/openai/resources/JP_it_is_rainy_today.wav differ diff --git a/sdk/openai/azure-ai-openai/src/samples/java/com/azure/ai/openai/resources/batman.wav b/sdk/openai/azure-ai-openai/src/samples/java/com/azure/ai/openai/resources/batman.wav new file mode 100644 index 000000000000..4c0b7248a39c Binary files /dev/null and b/sdk/openai/azure-ai-openai/src/samples/java/com/azure/ai/openai/resources/batman.wav differ diff --git a/sdk/openai/azure-ai-openai/src/samples/java/com/azure/ai/openai/usage/AudioTranscriptionAsyncSample.java b/sdk/openai/azure-ai-openai/src/samples/java/com/azure/ai/openai/usage/AudioTranscriptionAsyncSample.java new file mode 100644 index 000000000000..fbebd49b5965 --- /dev/null +++ b/sdk/openai/azure-ai-openai/src/samples/java/com/azure/ai/openai/usage/AudioTranscriptionAsyncSample.java @@ -0,0 +1,52 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +package com.azure.ai.openai.usage; + +import com.azure.ai.openai.OpenAIAsyncClient; +import com.azure.ai.openai.OpenAIClientBuilder; +import com.azure.ai.openai.models.AudioTranscriptionFormat; +import com.azure.ai.openai.models.AudioTranscriptionOptions; +import com.azure.core.credential.AzureKeyCredential; +import com.azure.core.util.BinaryData; + +import java.nio.file.Path; +import java.nio.file.Paths; +import java.util.concurrent.TimeUnit; + +/** + * An asynchronous sample demonstrates how to transcript a given audio file. + */ +public class AudioTranscriptionAsyncSample { + /** + * Runs the sample algorithm and demonstrates how to transcript a given audio file. + * + * @param args Unused. Arguments to the program. + */ + public static void main(String[] args) throws InterruptedException { + String azureOpenaiKey = "{azure-open-ai-key}"; + String endpoint = "{azure-open-ai-endpoint}"; + String deploymentOrModelId = "{azure-open-ai-deployment-model-id}"; + String fileName = "batman.wav"; + Path filePath = Paths.get("src/samples/java/com/azure/ai/openai/resources/" + fileName); + + OpenAIAsyncClient client = new OpenAIClientBuilder() + .endpoint(endpoint) + .credential(new AzureKeyCredential(azureOpenaiKey)) + .buildAsyncClient(); + + byte[] file = BinaryData.fromFile(filePath).toBytes(); + AudioTranscriptionOptions transcriptionOptions = new AudioTranscriptionOptions(file) + .setResponseFormat(AudioTranscriptionFormat.JSON); + + client.getAudioTranscription(deploymentOrModelId, fileName, transcriptionOptions) + .subscribe(transcription -> { + System.out.println("Transcription: " + transcription.getText()); + }); + + // The .subscribe() creation and assignment is not a blocking call. For the purpose of this example, we sleep + // the thread so the program does not end before the send operation is complete. Using .block() instead of + // .subscribe() will turn this into a synchronous call. + TimeUnit.SECONDS.sleep(10); + } +} diff --git a/sdk/openai/azure-ai-openai/src/samples/java/com/azure/ai/openai/usage/AudioTranscriptionSample.java b/sdk/openai/azure-ai-openai/src/samples/java/com/azure/ai/openai/usage/AudioTranscriptionSample.java new file mode 100644 index 000000000000..e16238116533 --- /dev/null +++ b/sdk/openai/azure-ai-openai/src/samples/java/com/azure/ai/openai/usage/AudioTranscriptionSample.java @@ -0,0 +1,46 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +package com.azure.ai.openai.usage; + +import com.azure.ai.openai.OpenAIClient; +import com.azure.ai.openai.OpenAIClientBuilder; +import com.azure.ai.openai.models.AudioTranscription; +import com.azure.ai.openai.models.AudioTranscriptionFormat; +import com.azure.ai.openai.models.AudioTranscriptionOptions; +import com.azure.core.credential.AzureKeyCredential; +import com.azure.core.util.BinaryData; + +import java.nio.file.Path; +import java.nio.file.Paths; + +/** + * A sample demonstrates how to transcript a given audio file. + */ +public class AudioTranscriptionSample { + /** + * Runs the sample algorithm and demonstrates how to get the images for a given prompt. + * + * @param args Unused. Arguments to the program. + */ + public static void main(String[] args) { + String azureOpenaiKey = "{azure-open-ai-key}"; + String endpoint = "{azure-open-ai-endpoint}"; + String deploymentOrModelId = "{azure-open-ai-deployment-model-id}"; + String fileName = "batman.wav"; + Path filePath = Paths.get("src/samples/java/com/azure/ai/openai/resources/" + fileName); + + OpenAIClient client = new OpenAIClientBuilder() + .endpoint(endpoint) + .credential(new AzureKeyCredential(azureOpenaiKey)) + .buildClient(); + + byte[] file = BinaryData.fromFile(filePath).toBytes(); + AudioTranscriptionOptions transcriptionOptions = new AudioTranscriptionOptions(file) + .setResponseFormat(AudioTranscriptionFormat.JSON); + + AudioTranscription transcription = client.getAudioTranscription(deploymentOrModelId, fileName, transcriptionOptions); + + System.out.println("Transcription: " + transcription.getText()); + } +} diff --git a/sdk/openai/azure-ai-openai/src/samples/java/com/azure/ai/openai/usage/AudioTranslationAsyncSample.java b/sdk/openai/azure-ai-openai/src/samples/java/com/azure/ai/openai/usage/AudioTranslationAsyncSample.java new file mode 100644 index 000000000000..4ba19ad37b7f --- /dev/null +++ b/sdk/openai/azure-ai-openai/src/samples/java/com/azure/ai/openai/usage/AudioTranslationAsyncSample.java @@ -0,0 +1,51 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +package com.azure.ai.openai.usage; + +import com.azure.ai.openai.OpenAIAsyncClient; +import com.azure.ai.openai.OpenAIClientBuilder; +import com.azure.ai.openai.models.AudioTranscriptionFormat; +import com.azure.ai.openai.models.AudioTranslationOptions; +import com.azure.core.credential.AzureKeyCredential; +import com.azure.core.util.BinaryData; + +import java.nio.file.Path; +import java.nio.file.Paths; +import java.util.concurrent.TimeUnit; + +/** + * An asynchronous sample demonstrates how to translate a given audio file. + */ +public class AudioTranslationAsyncSample { + /** + * Runs the sample algorithm and demonstrates how to translate a given audio file. + * + * @param args Unused. Arguments to the program. + */ + public static void main(String[] args) throws InterruptedException { + String azureOpenaiKey = "{azure-open-ai-key}"; + String endpoint = "{azure-open-ai-endpoint}"; + String deploymentOrModelId = "{azure-open-ai-deployment-model-id}"; + String fileName = "JP_it_is_rainy_today.wav"; + Path filePath = Paths.get("src/samples/java/com/azure/ai/openai/resources/" + fileName); + + OpenAIAsyncClient client = new OpenAIClientBuilder() + .endpoint(endpoint) + .credential(new AzureKeyCredential(azureOpenaiKey)) + .buildAsyncClient(); + byte[] file = BinaryData.fromFile(filePath).toBytes(); + AudioTranslationOptions translationOptions = new AudioTranslationOptions(file) + .setResponseFormat(AudioTranscriptionFormat.JSON); + + client.getAudioTranslation(deploymentOrModelId, fileName, translationOptions) + .subscribe(translation -> { + System.out.println("Translation: " + translation.getText()); + }); + + // The .subscribe() creation and assignment is not a blocking call. For the purpose of this example, we sleep + // the thread so the program does not end before the send operation is complete. Using .block() instead of + // .subscribe() will turn this into a synchronous call. + TimeUnit.SECONDS.sleep(10); + } +} diff --git a/sdk/openai/azure-ai-openai/src/samples/java/com/azure/ai/openai/usage/AudioTranslationSample.java b/sdk/openai/azure-ai-openai/src/samples/java/com/azure/ai/openai/usage/AudioTranslationSample.java new file mode 100644 index 000000000000..18a56d967fef --- /dev/null +++ b/sdk/openai/azure-ai-openai/src/samples/java/com/azure/ai/openai/usage/AudioTranslationSample.java @@ -0,0 +1,45 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +package com.azure.ai.openai.usage; + +import com.azure.ai.openai.OpenAIClient; +import com.azure.ai.openai.OpenAIClientBuilder; +import com.azure.ai.openai.models.AudioTranscription; +import com.azure.ai.openai.models.AudioTranscriptionFormat; +import com.azure.ai.openai.models.AudioTranslationOptions; +import com.azure.core.credential.AzureKeyCredential; +import com.azure.core.util.BinaryData; + +import java.nio.file.Path; +import java.nio.file.Paths; + +/** + * A sample demonstrates how to translate a given audio file. + */ +public class AudioTranslationSample { + /** + * Runs the sample algorithm and demonstrates how to translate a given audio file. + * + * @param args Unused. Arguments to the program. + */ + public static void main(String[] args) { + String azureOpenaiKey = "{azure-open-ai-key}"; + String endpoint = "{azure-open-ai-endpoint}"; + String deploymentOrModelId = "{azure-open-ai-deployment-model-id}"; + String fileName = "JP_it_is_rainy_today.wav"; + Path filePath = Paths.get("src/samples/java/com/azure/ai/openai/resources/" + fileName); + + OpenAIClient client = new OpenAIClientBuilder() + .endpoint(endpoint) + .credential(new AzureKeyCredential(azureOpenaiKey)) + .buildClient(); + byte[] file = BinaryData.fromFile(filePath).toBytes(); + AudioTranslationOptions translationOptions = new AudioTranslationOptions(file) + .setResponseFormat(AudioTranscriptionFormat.JSON); + + AudioTranscription translation = client.getAudioTranslation(deploymentOrModelId, fileName, translationOptions); + + System.out.println("Translation: " + translation.getText()); + } +} diff --git a/sdk/openai/azure-ai-openai/src/test/java/com/azure/ai/openai/NonAzureOpenAIAsyncClientTest.java b/sdk/openai/azure-ai-openai/src/test/java/com/azure/ai/openai/NonAzureOpenAIAsyncClientTest.java index 7cc7ec3429c6..fb842b09df8a 100644 --- a/sdk/openai/azure-ai-openai/src/test/java/com/azure/ai/openai/NonAzureOpenAIAsyncClientTest.java +++ b/sdk/openai/azure-ai-openai/src/test/java/com/azure/ai/openai/NonAzureOpenAIAsyncClientTest.java @@ -4,6 +4,10 @@ package com.azure.ai.openai; import com.azure.ai.openai.functions.MyFunctionCallArguments; +import com.azure.ai.openai.models.AudioTaskLabel; +import com.azure.ai.openai.models.AudioTranscriptionFormat; +import com.azure.ai.openai.models.AudioTranscriptionOptions; +import com.azure.ai.openai.models.AudioTranslationOptions; import com.azure.ai.openai.models.ChatChoice; import com.azure.ai.openai.models.ChatCompletions; import com.azure.ai.openai.models.ChatCompletionsOptions; @@ -25,6 +29,7 @@ import java.util.ArrayList; import java.util.Arrays; +import java.util.List; import static com.azure.ai.openai.TestUtils.DISPLAY_NAME_WITH_ARGUMENTS; import static org.junit.jupiter.api.Assertions.assertEquals; @@ -322,4 +327,281 @@ public void testCompletionContentFiltering(HttpClient httpClient, OpenAIServiceV }).verifyComplete(); }); } + + @ParameterizedTest(name = DISPLAY_NAME_WITH_ARGUMENTS) + @MethodSource("com.azure.ai.openai.TestUtils#getTestParameters") + public void testGetAudioTranscriptionJson(HttpClient httpClient, OpenAIServiceVersion serviceVersion) { + client = getNonAzureOpenAIAsyncClient(httpClient); + + getAudioTranscriptionRunnerForNonAzure((deploymentName, fileName) -> { + byte[] file = BinaryData.fromFile(openTestResourceFile(fileName)).toBytes(); + AudioTranscriptionOptions transcriptionOptions = new AudioTranscriptionOptions(file); + transcriptionOptions.setResponseFormat(AudioTranscriptionFormat.JSON); + + StepVerifier.create(client.getAudioTranscription(deploymentName, fileName, transcriptionOptions)) + .assertNext(transcription -> + assertAudioTranscriptionSimpleJson(transcription, BATMAN_TRANSCRIPTION)) + .verifyComplete(); + }); + } + + @ParameterizedTest(name = DISPLAY_NAME_WITH_ARGUMENTS) + @MethodSource("com.azure.ai.openai.TestUtils#getTestParameters") + public void testGetAudioTranscriptionVerboseJson(HttpClient httpClient, OpenAIServiceVersion serviceVersion) { + client = getNonAzureOpenAIAsyncClient(httpClient); + + getAudioTranscriptionRunnerForNonAzure((deploymentName, fileName) -> { + byte[] file = BinaryData.fromFile(openTestResourceFile(fileName)).toBytes(); + AudioTranscriptionOptions transcriptionOptions = new AudioTranscriptionOptions(file); + transcriptionOptions.setResponseFormat(AudioTranscriptionFormat.VERBOSE_JSON); + + StepVerifier.create(client.getAudioTranscription(deploymentName, fileName, transcriptionOptions)) + .assertNext(transcription -> + assertAudioTranscriptionVerboseJson(transcription, BATMAN_TRANSCRIPTION, AudioTaskLabel.TRANSCRIBE)) + .verifyComplete(); + }); + } + + @ParameterizedTest(name = DISPLAY_NAME_WITH_ARGUMENTS) + @MethodSource("com.azure.ai.openai.TestUtils#getTestParameters") + public void testGetAudioTranscriptionTextPlain(HttpClient httpClient, OpenAIServiceVersion serviceVersion) { + client = getNonAzureOpenAIAsyncClient(httpClient); + + getAudioTranscriptionRunnerForNonAzure((deploymentName, fileName) -> { + byte[] file = BinaryData.fromFile(openTestResourceFile(fileName)).toBytes(); + AudioTranscriptionOptions transcriptionOptions = new AudioTranscriptionOptions(file); + transcriptionOptions.setResponseFormat(AudioTranscriptionFormat.TEXT); + + StepVerifier.create(client.getAudioTranscriptionText(deploymentName, fileName, transcriptionOptions)) + .assertNext(transcription -> + // A plain/text request adds a line break as an artifact. Also observed for translations + assertEquals(BATMAN_TRANSCRIPTION + "\n", transcription)) + .verifyComplete(); + }); + } + + @ParameterizedTest(name = DISPLAY_NAME_WITH_ARGUMENTS) + @MethodSource("com.azure.ai.openai.TestUtils#getTestParameters") + public void testGetAudioTranscriptionSrt(HttpClient httpClient, OpenAIServiceVersion serviceVersion) { + client = getNonAzureOpenAIAsyncClient(httpClient); + + getAudioTranscriptionRunnerForNonAzure((modelId, fileName) -> { + byte[] file = BinaryData.fromFile(openTestResourceFile(fileName)).toBytes(); + AudioTranscriptionOptions transcriptionOptions = new AudioTranscriptionOptions(file); + transcriptionOptions.setResponseFormat(AudioTranscriptionFormat.SRT); + + StepVerifier.create(client.getAudioTranscriptionText(modelId, fileName, transcriptionOptions)) + .assertNext(translation -> { + // Sequence number + assertTrue(translation.contains("1\n")); + // First sequence starts at timestamp 0 + assertTrue(translation.contains("00:00:00,000 --> ")); + // Contains at least one expected word + assertTrue(translation.contains("Batman")); + }).verifyComplete(); + }); + } + + @ParameterizedTest(name = DISPLAY_NAME_WITH_ARGUMENTS) + @MethodSource("com.azure.ai.openai.TestUtils#getTestParameters") + public void testGetAudioTranscriptionVtt(HttpClient httpClient, OpenAIServiceVersion serviceVersion) { + client = getNonAzureOpenAIAsyncClient(httpClient); + + getAudioTranscriptionRunnerForNonAzure((modelId, fileName) -> { + byte[] file = BinaryData.fromFile(openTestResourceFile(fileName)).toBytes(); + AudioTranscriptionOptions transcriptionOptions = new AudioTranscriptionOptions(file); + transcriptionOptions.setResponseFormat(AudioTranscriptionFormat.VTT); + + StepVerifier.create(client.getAudioTranscriptionText(modelId, fileName, transcriptionOptions)) + .assertNext(translation -> { + // Start value according to spec + assertTrue(translation.startsWith("WEBVTT\n")); + // First sequence starts at timestamp 0. Note: unlike SRT, the millisecond separator is a "." + assertTrue(translation.contains("00:00:00.000 --> ")); + // Contains at least one expected word + assertTrue(translation.contains("Batman")); + }).verifyComplete(); + }); + } + + @ParameterizedTest(name = DISPLAY_NAME_WITH_ARGUMENTS) + @MethodSource("com.azure.ai.openai.TestUtils#getTestParameters") + public void testGetAudioTranscriptionTextWrongFormats(HttpClient httpClient, OpenAIServiceVersion serviceVersion) { + client = getNonAzureOpenAIAsyncClient(httpClient); + List wrongFormats = Arrays.asList( + AudioTranscriptionFormat.JSON, + AudioTranscriptionFormat.VERBOSE_JSON + ); + + getAudioTranscriptionRunnerForNonAzure((modelId, fileName) -> { + byte[] file = BinaryData.fromFile(openTestResourceFile(fileName)).toBytes(); + AudioTranscriptionOptions transcriptionOptions = new AudioTranscriptionOptions(file); + + for (AudioTranscriptionFormat format: wrongFormats) { + transcriptionOptions.setResponseFormat(format); + StepVerifier.create(client.getAudioTranscriptionText(modelId, fileName, transcriptionOptions)) + .verifyErrorSatisfies(error -> assertTrue(error instanceof IllegalArgumentException)); + } + }); + } + + @ParameterizedTest(name = DISPLAY_NAME_WITH_ARGUMENTS) + @MethodSource("com.azure.ai.openai.TestUtils#getTestParameters") + public void testGetAudioTranscriptionJsonWrongFormats(HttpClient httpClient, OpenAIServiceVersion serviceVersion) { + client = getNonAzureOpenAIAsyncClient(httpClient); + List wrongFormats = Arrays.asList( + AudioTranscriptionFormat.TEXT, + AudioTranscriptionFormat.SRT, + AudioTranscriptionFormat.VTT + ); + + getAudioTranscriptionRunnerForNonAzure((modelId, fileName) -> { + byte[] file = BinaryData.fromFile(openTestResourceFile(fileName)).toBytes(); + AudioTranscriptionOptions transcriptionOptions = new AudioTranscriptionOptions(file); + + for (AudioTranscriptionFormat format: wrongFormats) { + transcriptionOptions.setResponseFormat(format); + StepVerifier.create(client.getAudioTranscription(modelId, fileName, transcriptionOptions)) + .verifyErrorSatisfies(error -> assertTrue(error instanceof IllegalArgumentException)); + } + }); + } + + @ParameterizedTest(name = DISPLAY_NAME_WITH_ARGUMENTS) + @MethodSource("com.azure.ai.openai.TestUtils#getTestParameters") + public void testGetAudioTranslationJson(HttpClient httpClient, OpenAIServiceVersion serviceVersion) { + client = getNonAzureOpenAIAsyncClient(httpClient); + + getAudioTranslationRunnerForNonAzure((modelId, fileName) -> { + byte[] file = BinaryData.fromFile(openTestResourceFile(fileName)).toBytes(); + AudioTranslationOptions translationOptions = new AudioTranslationOptions(file); + translationOptions.setResponseFormat(AudioTranscriptionFormat.JSON); + + StepVerifier.create(client.getAudioTranslation(modelId, fileName, translationOptions)) + .assertNext(translation -> + assertAudioTranscriptionSimpleJson(translation, "It's raining today.")) + .verifyComplete(); + }); + } + + @ParameterizedTest(name = DISPLAY_NAME_WITH_ARGUMENTS) + @MethodSource("com.azure.ai.openai.TestUtils#getTestParameters") + public void testGetAudioTranslationVerboseJson(HttpClient httpClient, OpenAIServiceVersion serviceVersion) { + client = getNonAzureOpenAIAsyncClient(httpClient); + + getAudioTranslationRunnerForNonAzure((modelId, fileName) -> { + byte[] file = BinaryData.fromFile(openTestResourceFile(fileName)).toBytes(); + AudioTranslationOptions translationOptions = new AudioTranslationOptions(file); + translationOptions.setResponseFormat(AudioTranscriptionFormat.VERBOSE_JSON); + + StepVerifier.create(client.getAudioTranslation(modelId, fileName, translationOptions)) + .assertNext(translation -> + assertAudioTranscriptionVerboseJson(translation, "It's raining today.", AudioTaskLabel.TRANSLATE)) + .verifyComplete(); + }); + } + + @ParameterizedTest(name = DISPLAY_NAME_WITH_ARGUMENTS) + @MethodSource("com.azure.ai.openai.TestUtils#getTestParameters") + public void testGetAudioTranslationTextPlain(HttpClient httpClient, OpenAIServiceVersion serviceVersion) { + client = getNonAzureOpenAIAsyncClient(httpClient); + + getAudioTranslationRunnerForNonAzure((modelId, fileName) -> { + byte[] file = BinaryData.fromFile(openTestResourceFile(fileName)).toBytes(); + AudioTranslationOptions translationOptions = new AudioTranslationOptions(file); + translationOptions.setResponseFormat(AudioTranscriptionFormat.TEXT); + + StepVerifier.create(client.getAudioTranslationText(modelId, fileName, translationOptions)) + .assertNext(translation -> { + assertEquals("It's raining today.\n", translation); + }).verifyComplete(); + }); + } + + @ParameterizedTest(name = DISPLAY_NAME_WITH_ARGUMENTS) + @MethodSource("com.azure.ai.openai.TestUtils#getTestParameters") + public void testGetAudioTranslationSrt(HttpClient httpClient, OpenAIServiceVersion serviceVersion) { + client = getNonAzureOpenAIAsyncClient(httpClient); + + getAudioTranslationRunnerForNonAzure((modelId, fileName) -> { + byte[] file = BinaryData.fromFile(openTestResourceFile(fileName)).toBytes(); + AudioTranslationOptions translationOptions = new AudioTranslationOptions(file); + translationOptions.setResponseFormat(AudioTranscriptionFormat.SRT); + + StepVerifier.create(client.getAudioTranslationText(modelId, fileName, translationOptions)) + .assertNext(translation -> { + // Sequence number + assertTrue(translation.contains("1\n")); + // First sequence starts at timestamp 0 + assertTrue(translation.contains("00:00:00,000 --> ")); + // Actual translation value + assertTrue(translation.contains("It's raining today.")); + }).verifyComplete(); + }); + } + + @ParameterizedTest(name = DISPLAY_NAME_WITH_ARGUMENTS) + @MethodSource("com.azure.ai.openai.TestUtils#getTestParameters") + public void testGetAudioTranslationVtt(HttpClient httpClient, OpenAIServiceVersion serviceVersion) { + client = getNonAzureOpenAIAsyncClient(httpClient); + + getAudioTranslationRunnerForNonAzure((modelId, fileName) -> { + byte[] file = BinaryData.fromFile(openTestResourceFile(fileName)).toBytes(); + AudioTranslationOptions translationOptions = new AudioTranslationOptions(file); + translationOptions.setResponseFormat(AudioTranscriptionFormat.VTT); + + StepVerifier.create(client.getAudioTranslationText(modelId, fileName, translationOptions)) + .assertNext(translation -> { + // Start value according to spec + assertTrue(translation.startsWith("WEBVTT\n")); + // First sequence starts at timestamp 0. Note: unlike SRT, the millisecond separator is a "." + assertTrue(translation.contains("00:00:00.000 --> ")); + // Actual translation value + assertTrue(translation.contains("It's raining today.")); + }).verifyComplete(); + }); + } + + @ParameterizedTest(name = DISPLAY_NAME_WITH_ARGUMENTS) + @MethodSource("com.azure.ai.openai.TestUtils#getTestParameters") + public void testGetAudioTranslationTextWrongFormats(HttpClient httpClient, OpenAIServiceVersion serviceVersion) { + client = getNonAzureOpenAIAsyncClient(httpClient); + List wrongFormats = Arrays.asList( + AudioTranscriptionFormat.JSON, + AudioTranscriptionFormat.VERBOSE_JSON + ); + + getAudioTranslationRunnerForNonAzure((modelId, fileName) -> { + byte[] file = BinaryData.fromFile(openTestResourceFile(fileName)).toBytes(); + AudioTranslationOptions translationOptions = new AudioTranslationOptions(file); + + for (AudioTranscriptionFormat format: wrongFormats) { + translationOptions.setResponseFormat(format); + StepVerifier.create(client.getAudioTranslationText(modelId, fileName, translationOptions)) + .verifyErrorSatisfies(error -> assertTrue(error instanceof IllegalArgumentException)); + } + }); + } + + @ParameterizedTest(name = DISPLAY_NAME_WITH_ARGUMENTS) + @MethodSource("com.azure.ai.openai.TestUtils#getTestParameters") + public void testGetAudioTranslationJsonWrongFormats(HttpClient httpClient, OpenAIServiceVersion serviceVersion) { + client = getNonAzureOpenAIAsyncClient(httpClient); + List wrongFormats = Arrays.asList( + AudioTranscriptionFormat.TEXT, + AudioTranscriptionFormat.SRT, + AudioTranscriptionFormat.VTT + ); + + getAudioTranslationRunnerForNonAzure((modelId, fileName) -> { + byte[] file = BinaryData.fromFile(openTestResourceFile(fileName)).toBytes(); + AudioTranslationOptions translationOptions = new AudioTranslationOptions(file); + + for (AudioTranscriptionFormat format: wrongFormats) { + translationOptions.setResponseFormat(format); + StepVerifier.create(client.getAudioTranslation(modelId, fileName, translationOptions)) + .verifyErrorSatisfies(error -> assertTrue(error instanceof IllegalArgumentException)); + } + }); + } } diff --git a/sdk/openai/azure-ai-openai/src/test/java/com/azure/ai/openai/NonAzureOpenAISyncClientTest.java b/sdk/openai/azure-ai-openai/src/test/java/com/azure/ai/openai/NonAzureOpenAISyncClientTest.java index 43af2bf9bc43..25055df90aa7 100644 --- a/sdk/openai/azure-ai-openai/src/test/java/com/azure/ai/openai/NonAzureOpenAISyncClientTest.java +++ b/sdk/openai/azure-ai-openai/src/test/java/com/azure/ai/openai/NonAzureOpenAISyncClientTest.java @@ -4,6 +4,11 @@ package com.azure.ai.openai; import com.azure.ai.openai.functions.MyFunctionCallArguments; +import com.azure.ai.openai.models.AudioTaskLabel; +import com.azure.ai.openai.models.AudioTranscription; +import com.azure.ai.openai.models.AudioTranscriptionFormat; +import com.azure.ai.openai.models.AudioTranscriptionOptions; +import com.azure.ai.openai.models.AudioTranslationOptions; import com.azure.ai.openai.models.ChatChoice; import com.azure.ai.openai.models.ChatCompletions; import com.azure.ai.openai.models.ChatCompletionsOptions; @@ -25,6 +30,7 @@ import org.junit.jupiter.params.provider.MethodSource; import java.util.Arrays; +import java.util.List; import static com.azure.ai.openai.TestUtils.DISPLAY_NAME_WITH_ARGUMENTS; import static org.junit.jupiter.api.Assertions.assertEquals; @@ -274,4 +280,265 @@ public void testCompletionContentFiltering(HttpClient httpClient, OpenAIServiceV assertNull(completions.getChoices().get(0).getContentFilterResults()); }); } + + @ParameterizedTest(name = DISPLAY_NAME_WITH_ARGUMENTS) + @MethodSource("com.azure.ai.openai.TestUtils#getTestParameters") + public void testGetAudioTranscriptionJson(HttpClient httpClient, OpenAIServiceVersion serviceVersion) { + client = getNonAzureOpenAISyncClient(httpClient); + + getAudioTranscriptionRunnerForNonAzure((modelId, fileName) -> { + byte[] file = BinaryData.fromFile(openTestResourceFile(fileName)).toBytes(); + AudioTranscriptionOptions transcriptionOptions = new AudioTranscriptionOptions(file); + transcriptionOptions.setResponseFormat(AudioTranscriptionFormat.JSON); + + AudioTranscription transcription = client.getAudioTranscription(modelId, fileName, transcriptionOptions); + assertAudioTranscriptionSimpleJson(transcription, BATMAN_TRANSCRIPTION); + }); + } + + @ParameterizedTest(name = DISPLAY_NAME_WITH_ARGUMENTS) + @MethodSource("com.azure.ai.openai.TestUtils#getTestParameters") + public void testGetAudioTranscriptionVerboseJson(HttpClient httpClient, OpenAIServiceVersion serviceVersion) { + client = getNonAzureOpenAISyncClient(httpClient); + + getAudioTranscriptionRunnerForNonAzure((modelId, fileName) -> { + byte[] file = BinaryData.fromFile(openTestResourceFile(fileName)).toBytes(); + AudioTranscriptionOptions transcriptionOptions = new AudioTranscriptionOptions(file); + transcriptionOptions.setResponseFormat(AudioTranscriptionFormat.VERBOSE_JSON); + + AudioTranscription transcription = client.getAudioTranscription(modelId, fileName, transcriptionOptions); + assertAudioTranscriptionVerboseJson(transcription, BATMAN_TRANSCRIPTION, AudioTaskLabel.TRANSCRIBE); + }); + } + + @ParameterizedTest(name = DISPLAY_NAME_WITH_ARGUMENTS) + @MethodSource("com.azure.ai.openai.TestUtils#getTestParameters") + public void testGetAudioTranscriptionTextPlain(HttpClient httpClient, OpenAIServiceVersion serviceVersion) { + client = getNonAzureOpenAISyncClient(httpClient); + + getAudioTranscriptionRunnerForNonAzure((modelId, fileName) -> { + byte[] file = BinaryData.fromFile(openTestResourceFile(fileName)).toBytes(); + AudioTranscriptionOptions transcriptionOptions = new AudioTranscriptionOptions(file); + transcriptionOptions.setResponseFormat(AudioTranscriptionFormat.TEXT); + + String transcription = client.getAudioTranscriptionText(modelId, fileName, transcriptionOptions); + // A plain/text request adds a line break as an artifact. Also observed for translations + assertEquals(BATMAN_TRANSCRIPTION + "\n", transcription); + }); + } + + @ParameterizedTest(name = DISPLAY_NAME_WITH_ARGUMENTS) + @MethodSource("com.azure.ai.openai.TestUtils#getTestParameters") + public void testGetAudioTranscriptionSrt(HttpClient httpClient, OpenAIServiceVersion serviceVersion) { + client = getNonAzureOpenAISyncClient(httpClient); + + getAudioTranscriptionRunnerForNonAzure((modelId, fileName) -> { + byte[] file = BinaryData.fromFile(openTestResourceFile(fileName)).toBytes(); + AudioTranscriptionOptions transcriptionOptions = new AudioTranscriptionOptions(file); + transcriptionOptions.setResponseFormat(AudioTranscriptionFormat.SRT); + + String transcription = client.getAudioTranscriptionText(modelId, fileName, transcriptionOptions); + // Sequence number + assertTrue(transcription.contains("1\n")); + // First sequence starts at timestamp 0 + assertTrue(transcription.contains("00:00:00,000 --> ")); + // Contains one expected word + assertTrue(transcription.contains("Batman")); + }); + } + + @ParameterizedTest(name = DISPLAY_NAME_WITH_ARGUMENTS) + @MethodSource("com.azure.ai.openai.TestUtils#getTestParameters") + public void testGetAudioTranscriptionVtt(HttpClient httpClient, OpenAIServiceVersion serviceVersion) { + client = getNonAzureOpenAISyncClient(httpClient); + + getAudioTranscriptionRunnerForNonAzure((modelId, fileName) -> { + byte[] file = BinaryData.fromFile(openTestResourceFile(fileName)).toBytes(); + AudioTranscriptionOptions transcriptionOptions = new AudioTranscriptionOptions(file); + transcriptionOptions.setResponseFormat(AudioTranscriptionFormat.VTT); + + String transcription = client.getAudioTranscriptionText(modelId, fileName, transcriptionOptions); + // Start value according to spec + assertTrue(transcription.startsWith("WEBVTT\n")); + // First sequence starts at timestamp 0. Note: unlike SRT, the millisecond separator is a "." + assertTrue(transcription.contains("00:00:00.000 --> ")); + // Contains at least one expected word + assertTrue(transcription.contains("Batman")); + }); + } + + @ParameterizedTest(name = DISPLAY_NAME_WITH_ARGUMENTS) + @MethodSource("com.azure.ai.openai.TestUtils#getTestParameters") + public void testGetAudioTranscriptionTextWrongFormats(HttpClient httpClient, OpenAIServiceVersion serviceVersion) { + client = getNonAzureOpenAISyncClient(httpClient); + List wrongFormats = Arrays.asList( + AudioTranscriptionFormat.JSON, + AudioTranscriptionFormat.VERBOSE_JSON + ); + + getAudioTranscriptionRunnerForNonAzure((modelId, fileName) -> { + byte[] file = BinaryData.fromFile(openTestResourceFile(fileName)).toBytes(); + AudioTranscriptionOptions transcriptionOptions = new AudioTranscriptionOptions(file); + + for (AudioTranscriptionFormat format: wrongFormats) { + transcriptionOptions.setResponseFormat(format); + assertThrows(IllegalArgumentException.class, () -> { + client.getAudioTranscriptionText(modelId, fileName, transcriptionOptions); + }); + } + }); + } + + @ParameterizedTest(name = DISPLAY_NAME_WITH_ARGUMENTS) + @MethodSource("com.azure.ai.openai.TestUtils#getTestParameters") + public void testGetAudioTranscriptionJsonWrongFormats(HttpClient httpClient, OpenAIServiceVersion serviceVersion) { + client = getNonAzureOpenAISyncClient(httpClient); + List wrongFormats = Arrays.asList( + AudioTranscriptionFormat.TEXT, + AudioTranscriptionFormat.SRT, + AudioTranscriptionFormat.VTT + ); + + getAudioTranscriptionRunnerForNonAzure((modelId, fileName) -> { + byte[] file = BinaryData.fromFile(openTestResourceFile(fileName)).toBytes(); + AudioTranscriptionOptions transcriptionOptions = new AudioTranscriptionOptions(file); + + for (AudioTranscriptionFormat format: wrongFormats) { + transcriptionOptions.setResponseFormat(format); + assertThrows(IllegalArgumentException.class, () -> { + client.getAudioTranscription(modelId, fileName, transcriptionOptions); + }); + } + }); + } + + @ParameterizedTest(name = DISPLAY_NAME_WITH_ARGUMENTS) + @MethodSource("com.azure.ai.openai.TestUtils#getTestParameters") + public void testGetAudioTranslationJson(HttpClient httpClient, OpenAIServiceVersion serviceVersion) { + client = getNonAzureOpenAISyncClient(httpClient); + + getAudioTranslationRunnerForNonAzure((modelId, fileName) -> { + byte[] file = BinaryData.fromFile(openTestResourceFile(fileName)).toBytes(); + AudioTranslationOptions translationOptions = new AudioTranslationOptions(file); + translationOptions.setResponseFormat(AudioTranscriptionFormat.JSON); + + AudioTranscription translation = client.getAudioTranslation(modelId, fileName, translationOptions); + assertAudioTranscriptionSimpleJson(translation, "It's raining today."); + }); + } + + @ParameterizedTest(name = DISPLAY_NAME_WITH_ARGUMENTS) + @MethodSource("com.azure.ai.openai.TestUtils#getTestParameters") + public void testGetAudioTranslationVerboseJson(HttpClient httpClient, OpenAIServiceVersion serviceVersion) { + client = getNonAzureOpenAISyncClient(httpClient); + + getAudioTranslationRunnerForNonAzure((modelId, fileName) -> { + byte[] file = BinaryData.fromFile(openTestResourceFile(fileName)).toBytes(); + AudioTranslationOptions translationOptions = new AudioTranslationOptions(file); + translationOptions.setResponseFormat(AudioTranscriptionFormat.VERBOSE_JSON); + + AudioTranscription translation = client.getAudioTranslation(modelId, fileName, translationOptions); + assertAudioTranscriptionVerboseJson(translation, "It's raining today.", AudioTaskLabel.TRANSLATE); + }); + } + + @ParameterizedTest(name = DISPLAY_NAME_WITH_ARGUMENTS) + @MethodSource("com.azure.ai.openai.TestUtils#getTestParameters") + public void testGetAudioTranslationTextPlain(HttpClient httpClient, OpenAIServiceVersion serviceVersion) { + client = getNonAzureOpenAISyncClient(httpClient); + + getAudioTranslationRunnerForNonAzure((modelId, fileName) -> { + byte[] file = BinaryData.fromFile(openTestResourceFile(fileName)).toBytes(); + AudioTranslationOptions translationOptions = new AudioTranslationOptions(file); + translationOptions.setResponseFormat(AudioTranscriptionFormat.TEXT); + + String transcription = client.getAudioTranslationText(modelId, fileName, translationOptions); + assertEquals("It's raining today.\n", transcription); + }); + } + + @ParameterizedTest(name = DISPLAY_NAME_WITH_ARGUMENTS) + @MethodSource("com.azure.ai.openai.TestUtils#getTestParameters") + public void testGetAudioTranslationSrt(HttpClient httpClient, OpenAIServiceVersion serviceVersion) { + client = getNonAzureOpenAISyncClient(httpClient); + + getAudioTranslationRunnerForNonAzure((modelId, fileName) -> { + byte[] file = BinaryData.fromFile(openTestResourceFile(fileName)).toBytes(); + AudioTranslationOptions translationOptions = new AudioTranslationOptions(file); + translationOptions.setResponseFormat(AudioTranscriptionFormat.SRT); + + String transcription = client.getAudioTranslationText(modelId, fileName, translationOptions); + // Sequence number + assertTrue(transcription.contains("1\n")); + // First sequence starts at timestamp 0 + assertTrue(transcription.contains("00:00:00,000 --> ")); + // Actual translation value + assertTrue(transcription.contains("It's raining today.")); + }); + } + + @ParameterizedTest(name = DISPLAY_NAME_WITH_ARGUMENTS) + @MethodSource("com.azure.ai.openai.TestUtils#getTestParameters") + public void testGetAudioTranslationVtt(HttpClient httpClient, OpenAIServiceVersion serviceVersion) { + client = getNonAzureOpenAISyncClient(httpClient); + + getAudioTranslationRunnerForNonAzure((modelId, fileName) -> { + byte[] file = BinaryData.fromFile(openTestResourceFile(fileName)).toBytes(); + AudioTranslationOptions translationOptions = new AudioTranslationOptions(file); + translationOptions.setResponseFormat(AudioTranscriptionFormat.VTT); + + String transcription = client.getAudioTranslationText(modelId, fileName, translationOptions); + // Start value according to spec + assertTrue(transcription.startsWith("WEBVTT\n")); + // First sequence starts at timestamp 0. Note: unlike SRT, the millisecond separator is a "." + assertTrue(transcription.contains("00:00:00.000 --> ")); + // Actual translation value + assertTrue(transcription.contains("It's raining today.")); + }); + } + + @ParameterizedTest(name = DISPLAY_NAME_WITH_ARGUMENTS) + @MethodSource("com.azure.ai.openai.TestUtils#getTestParameters") + public void testGetAudioTranslationTextWrongFormats(HttpClient httpClient, OpenAIServiceVersion serviceVersion) { + client = getNonAzureOpenAISyncClient(httpClient); + List wrongFormats = Arrays.asList( + AudioTranscriptionFormat.JSON, + AudioTranscriptionFormat.VERBOSE_JSON + ); + + getAudioTranslationRunnerForNonAzure((modelId, fileName) -> { + byte[] file = BinaryData.fromFile(openTestResourceFile(fileName)).toBytes(); + AudioTranslationOptions translationOptions = new AudioTranslationOptions(file); + + for (AudioTranscriptionFormat format: wrongFormats) { + translationOptions.setResponseFormat(format); + assertThrows(IllegalArgumentException.class, () -> { + client.getAudioTranslationText(modelId, fileName, translationOptions); + }); + } + }); + } + + @ParameterizedTest(name = DISPLAY_NAME_WITH_ARGUMENTS) + @MethodSource("com.azure.ai.openai.TestUtils#getTestParameters") + public void testGetAudioTranslationJsonWrongFormats(HttpClient httpClient, OpenAIServiceVersion serviceVersion) { + client = getNonAzureOpenAISyncClient(httpClient); + List wrongFormats = Arrays.asList( + AudioTranscriptionFormat.TEXT, + AudioTranscriptionFormat.SRT, + AudioTranscriptionFormat.VTT + ); + + getAudioTranslationRunnerForNonAzure((modelId, fileName) -> { + byte[] file = BinaryData.fromFile(openTestResourceFile(fileName)).toBytes(); + AudioTranslationOptions translationOptions = new AudioTranslationOptions(file); + + for (AudioTranscriptionFormat format: wrongFormats) { + translationOptions.setResponseFormat(format); + assertThrows(IllegalArgumentException.class, () -> { + client.getAudioTranslation(modelId, fileName, translationOptions); + }); + } + }); + } } diff --git a/sdk/openai/azure-ai-openai/src/test/java/com/azure/ai/openai/OpenAIAsyncClientTest.java b/sdk/openai/azure-ai-openai/src/test/java/com/azure/ai/openai/OpenAIAsyncClientTest.java index 0c99aa4a6fb4..44987bd6c7c4 100644 --- a/sdk/openai/azure-ai-openai/src/test/java/com/azure/ai/openai/OpenAIAsyncClientTest.java +++ b/sdk/openai/azure-ai-openai/src/test/java/com/azure/ai/openai/OpenAIAsyncClientTest.java @@ -4,6 +4,10 @@ package com.azure.ai.openai; import com.azure.ai.openai.functions.MyFunctionCallArguments; +import com.azure.ai.openai.models.AudioTaskLabel; +import com.azure.ai.openai.models.AudioTranscriptionFormat; +import com.azure.ai.openai.models.AudioTranscriptionOptions; +import com.azure.ai.openai.models.AudioTranslationOptions; import com.azure.ai.openai.models.AzureChatExtensionConfiguration; import com.azure.ai.openai.models.AzureChatExtensionType; import com.azure.ai.openai.models.AzureCognitiveSearchChatExtensionConfiguration; @@ -31,13 +35,10 @@ import java.util.ArrayList; import java.util.Arrays; import java.util.Iterator; +import java.util.List; import static com.azure.ai.openai.TestUtils.DISPLAY_NAME_WITH_ARGUMENTS; -import static org.junit.jupiter.api.Assertions.assertEquals; -import static org.junit.jupiter.api.Assertions.assertInstanceOf; -import static org.junit.jupiter.api.Assertions.assertNotNull; -import static org.junit.jupiter.api.Assertions.assertNull; -import static org.junit.jupiter.api.Assertions.assertTrue; +import static org.junit.jupiter.api.Assertions.*; public class OpenAIAsyncClientTest extends OpenAIClientTestBase { private OpenAIAsyncClient client; @@ -294,7 +295,7 @@ public void testChatFunctionNotSuppliedByNamePreset(HttpClient httpClient, OpenA @ParameterizedTest(name = DISPLAY_NAME_WITH_ARGUMENTS) @MethodSource("com.azure.ai.openai.TestUtils#getTestParameters") public void testChatCompletionContentFiltering(HttpClient httpClient, OpenAIServiceVersion serviceVersion) { - client = getOpenAIAsyncClient(httpClient, serviceVersion); + client = getOpenAIAsyncClient(httpClient, OpenAIServiceVersion.V2023_08_01_PREVIEW); getChatCompletionsContentFilterRunner((modelId, chatMessages) -> { StepVerifier.create(client.getChatCompletions(modelId, new ChatCompletionsOptions(chatMessages))) .assertNext(chatCompletions -> { @@ -310,7 +311,7 @@ public void testChatCompletionContentFiltering(HttpClient httpClient, OpenAIServ @ParameterizedTest(name = DISPLAY_NAME_WITH_ARGUMENTS) @MethodSource("com.azure.ai.openai.TestUtils#getTestParameters") public void testChatCompletionStreamContentFiltering(HttpClient httpClient, OpenAIServiceVersion serviceVersion) { - client = getOpenAIAsyncClient(httpClient, serviceVersion); + client = getOpenAIAsyncClient(httpClient, OpenAIServiceVersion.V2023_08_01_PREVIEW); getChatCompletionsContentFilterRunner((modelId, chatMessages) -> { StepVerifier.create(client.getChatCompletionsStream(modelId, new ChatCompletionsOptions(chatMessages))) .recordWith(ArrayList::new) @@ -362,7 +363,7 @@ public void testChatCompletionStreamContentFiltering(HttpClient httpClient, Open @ParameterizedTest(name = DISPLAY_NAME_WITH_ARGUMENTS) @MethodSource("com.azure.ai.openai.TestUtils#getTestParameters") public void testCompletionContentFiltering(HttpClient httpClient, OpenAIServiceVersion serviceVersion) { - client = getOpenAIAsyncClient(httpClient, serviceVersion); + client = getOpenAIAsyncClient(httpClient, OpenAIServiceVersion.V2023_08_01_PREVIEW); getCompletionsContentFilterRunner((modelId, prompt) -> { CompletionsOptions completionsOptions = new CompletionsOptions(Arrays.asList(prompt)); // work around for this model, there seem to be some issues with Completions in gpt-turbo models @@ -380,7 +381,7 @@ public void testCompletionContentFiltering(HttpClient httpClient, OpenAIServiceV @ParameterizedTest(name = DISPLAY_NAME_WITH_ARGUMENTS) @MethodSource("com.azure.ai.openai.TestUtils#getTestParameters") public void testCompletionStreamContentFiltering(HttpClient httpClient, OpenAIServiceVersion serviceVersion) { - client = getOpenAIAsyncClient(httpClient, serviceVersion); + client = getOpenAIAsyncClient(httpClient, OpenAIServiceVersion.V2023_08_01_PREVIEW); getCompletionsContentFilterRunner((modelId, prompt) -> { CompletionsOptions completionsOptions = new CompletionsOptions(Arrays.asList(prompt)); // work around for this model, there seem to be some issues with Completions in gpt-turbo models @@ -427,7 +428,7 @@ public void testCompletionStreamContentFiltering(HttpClient httpClient, OpenAISe @ParameterizedTest(name = DISPLAY_NAME_WITH_ARGUMENTS) @MethodSource("com.azure.ai.openai.TestUtils#getTestParameters") public void testChatCompletionsBasicSearchExtension(HttpClient httpClient, OpenAIServiceVersion serviceVersion) { - client = getOpenAIAsyncClient(httpClient, serviceVersion); + client = getOpenAIAsyncClient(httpClient, OpenAIServiceVersion.V2023_08_01_PREVIEW); getChatCompletionsAzureChatSearchRunner((deploymentName, chatCompletionsOptions) -> { AzureCognitiveSearchChatExtensionConfiguration cognitiveSearchConfiguration = @@ -452,7 +453,7 @@ public void testChatCompletionsBasicSearchExtension(HttpClient httpClient, OpenA @ParameterizedTest(name = DISPLAY_NAME_WITH_ARGUMENTS) @MethodSource("com.azure.ai.openai.TestUtils#getTestParameters") public void testChatCompletionsStreamingBasicSearchExtension(HttpClient httpClient, OpenAIServiceVersion serviceVersion) { - client = getOpenAIAsyncClient(httpClient, serviceVersion); + client = getOpenAIAsyncClient(httpClient, OpenAIServiceVersion.V2023_08_01_PREVIEW); getChatCompletionsAzureChatSearchRunner((deploymentName, chatCompletionsOptions) -> { AzureCognitiveSearchChatExtensionConfiguration cognitiveSearchConfiguration = @@ -475,4 +476,281 @@ public void testChatCompletionsStreamingBasicSearchExtension(HttpClient httpClie .verifyComplete(); }); } + + @ParameterizedTest(name = DISPLAY_NAME_WITH_ARGUMENTS) + @MethodSource("com.azure.ai.openai.TestUtils#getTestParameters") + public void testGetAudioTranscriptionJson(HttpClient httpClient, OpenAIServiceVersion serviceVersion) { + client = getOpenAIAsyncClient(httpClient, serviceVersion); + + getAudioTranscriptionRunner((deploymentName, fileName) -> { + byte[] file = BinaryData.fromFile(openTestResourceFile(fileName)).toBytes(); + AudioTranscriptionOptions transcriptionOptions = new AudioTranscriptionOptions(file); + transcriptionOptions.setResponseFormat(AudioTranscriptionFormat.JSON); + + StepVerifier.create(client.getAudioTranscription(deploymentName, fileName, transcriptionOptions)) + .assertNext(transcription -> + assertAudioTranscriptionSimpleJson(transcription, BATMAN_TRANSCRIPTION)) + .verifyComplete(); + }); + } + + @ParameterizedTest(name = DISPLAY_NAME_WITH_ARGUMENTS) + @MethodSource("com.azure.ai.openai.TestUtils#getTestParameters") + public void testGetAudioTranscriptionVerboseJson(HttpClient httpClient, OpenAIServiceVersion serviceVersion) { + client = getOpenAIAsyncClient(httpClient, serviceVersion); + + getAudioTranscriptionRunner((deploymentName, fileName) -> { + byte[] file = BinaryData.fromFile(openTestResourceFile(fileName)).toBytes(); + AudioTranscriptionOptions transcriptionOptions = new AudioTranscriptionOptions(file); + transcriptionOptions.setResponseFormat(AudioTranscriptionFormat.VERBOSE_JSON); + + StepVerifier.create(client.getAudioTranscription(deploymentName, fileName, transcriptionOptions)) + .assertNext(transcription -> + assertAudioTranscriptionVerboseJson(transcription, BATMAN_TRANSCRIPTION, AudioTaskLabel.TRANSCRIBE)) + .verifyComplete(); + }); + } + + @ParameterizedTest(name = DISPLAY_NAME_WITH_ARGUMENTS) + @MethodSource("com.azure.ai.openai.TestUtils#getTestParameters") + public void testGetAudioTranscriptionTextPlain(HttpClient httpClient, OpenAIServiceVersion serviceVersion) { + client = getOpenAIAsyncClient(httpClient, serviceVersion); + + getAudioTranscriptionRunner((deploymentName, fileName) -> { + byte[] file = BinaryData.fromFile(openTestResourceFile(fileName)).toBytes(); + AudioTranscriptionOptions transcriptionOptions = new AudioTranscriptionOptions(file); + transcriptionOptions.setResponseFormat(AudioTranscriptionFormat.TEXT); + + StepVerifier.create(client.getAudioTranscriptionText(deploymentName, fileName, transcriptionOptions)) + .assertNext(transcription -> + // A plain/text request adds a line break as an artifact. Also observed for translations + assertEquals(BATMAN_TRANSCRIPTION + "\n", transcription)) + .verifyComplete(); + }); + } + + @ParameterizedTest(name = DISPLAY_NAME_WITH_ARGUMENTS) + @MethodSource("com.azure.ai.openai.TestUtils#getTestParameters") + public void testGetAudioTranscriptionSrt(HttpClient httpClient, OpenAIServiceVersion serviceVersion) { + client = getOpenAIAsyncClient(httpClient, serviceVersion); + + getAudioTranscriptionRunner((deploymentName, fileName) -> { + byte[] file = BinaryData.fromFile(openTestResourceFile(fileName)).toBytes(); + AudioTranscriptionOptions transcriptionOptions = new AudioTranscriptionOptions(file); + transcriptionOptions.setResponseFormat(AudioTranscriptionFormat.SRT); + + StepVerifier.create(client.getAudioTranscriptionText(deploymentName, fileName, transcriptionOptions)) + .assertNext(translation -> { + // 1st Sequence number + assertTrue(translation.contains("1\n")); + // First sequence starts at timestamp 0 + assertTrue(translation.contains("00:00:00,000 --> ")); + // Transcription contains at least one expected word + assertTrue(translation.contains("Batman")); + }).verifyComplete(); + }); + } + + @ParameterizedTest(name = DISPLAY_NAME_WITH_ARGUMENTS) + @MethodSource("com.azure.ai.openai.TestUtils#getTestParameters") + public void testGetAudioTranscriptionVtt(HttpClient httpClient, OpenAIServiceVersion serviceVersion) { + client = getOpenAIAsyncClient(httpClient, serviceVersion); + + getAudioTranscriptionRunner((deploymentName, fileName) -> { + byte[] file = BinaryData.fromFile(openTestResourceFile(fileName)).toBytes(); + AudioTranscriptionOptions transcriptionOptions = new AudioTranscriptionOptions(file); + transcriptionOptions.setResponseFormat(AudioTranscriptionFormat.VTT); + + StepVerifier.create(client.getAudioTranscriptionText(deploymentName, fileName, transcriptionOptions)) + .assertNext(translation -> { + // Start value according to spec + assertTrue(translation.startsWith("WEBVTT\n")); + // First sequence starts at timestamp 0. Note: unlike SRT, the millisecond separator is a "." + assertTrue(translation.contains("00:00:00.000 --> ")); + // Transcription contains at least one expected word + assertTrue(translation.contains("Batman")); + }).verifyComplete(); + }); + } + + @ParameterizedTest(name = DISPLAY_NAME_WITH_ARGUMENTS) + @MethodSource("com.azure.ai.openai.TestUtils#getTestParameters") + public void testGetAudioTranscriptionTextWrongFormats(HttpClient httpClient, OpenAIServiceVersion serviceVersion) { + client = getOpenAIAsyncClient(httpClient, serviceVersion); + List wrongFormats = Arrays.asList( + AudioTranscriptionFormat.JSON, + AudioTranscriptionFormat.VERBOSE_JSON + ); + + getAudioTranscriptionRunner((deploymentName, fileName) -> { + byte[] file = BinaryData.fromFile(openTestResourceFile(fileName)).toBytes(); + AudioTranscriptionOptions transcriptionOptions = new AudioTranscriptionOptions(file); + + for (AudioTranscriptionFormat format: wrongFormats) { + transcriptionOptions.setResponseFormat(format); + StepVerifier.create(client.getAudioTranscriptionText(deploymentName, fileName, transcriptionOptions)) + .verifyErrorSatisfies(error -> assertTrue(error instanceof IllegalArgumentException)); + } + }); + } + + @ParameterizedTest(name = DISPLAY_NAME_WITH_ARGUMENTS) + @MethodSource("com.azure.ai.openai.TestUtils#getTestParameters") + public void testGetAudioTranscriptionJsonWrongFormats(HttpClient httpClient, OpenAIServiceVersion serviceVersion) { + client = getOpenAIAsyncClient(httpClient, serviceVersion); + List wrongFormats = Arrays.asList( + AudioTranscriptionFormat.TEXT, + AudioTranscriptionFormat.SRT, + AudioTranscriptionFormat.VTT + ); + + getAudioTranscriptionRunner((deploymentName, fileName) -> { + byte[] file = BinaryData.fromFile(openTestResourceFile(fileName)).toBytes(); + AudioTranscriptionOptions transcriptionOptions = new AudioTranscriptionOptions(file); + + for (AudioTranscriptionFormat format: wrongFormats) { + transcriptionOptions.setResponseFormat(format); + StepVerifier.create(client.getAudioTranscription(deploymentName, fileName, transcriptionOptions)) + .verifyErrorSatisfies(error -> assertTrue(error instanceof IllegalArgumentException)); + } + }); + } + + @ParameterizedTest(name = DISPLAY_NAME_WITH_ARGUMENTS) + @MethodSource("com.azure.ai.openai.TestUtils#getTestParameters") + public void testGetAudioTranslationJson(HttpClient httpClient, OpenAIServiceVersion serviceVersion) { + client = getOpenAIAsyncClient(httpClient, serviceVersion); + + getAudioTranslationRunner((deploymentName, fileName) -> { + byte[] file = BinaryData.fromFile(openTestResourceFile(fileName)).toBytes(); + AudioTranslationOptions translationOptions = new AudioTranslationOptions(file); + translationOptions.setResponseFormat(AudioTranscriptionFormat.JSON); + + StepVerifier.create(client.getAudioTranslation(deploymentName, fileName, translationOptions)) + .assertNext(translation -> + assertAudioTranscriptionSimpleJson(translation, "It's raining today.")) + .verifyComplete(); + }); + } + + @ParameterizedTest(name = DISPLAY_NAME_WITH_ARGUMENTS) + @MethodSource("com.azure.ai.openai.TestUtils#getTestParameters") + public void testGetAudioTranslationVerboseJson(HttpClient httpClient, OpenAIServiceVersion serviceVersion) { + client = getOpenAIAsyncClient(httpClient, serviceVersion); + + getAudioTranslationRunner((deploymentName, fileName) -> { + byte[] file = BinaryData.fromFile(openTestResourceFile(fileName)).toBytes(); + AudioTranslationOptions translationOptions = new AudioTranslationOptions(file); + translationOptions.setResponseFormat(AudioTranscriptionFormat.VERBOSE_JSON); + + StepVerifier.create(client.getAudioTranslation(deploymentName, fileName, translationOptions)) + .assertNext(translation -> + assertAudioTranscriptionVerboseJson(translation, "It's raining today.", AudioTaskLabel.TRANSLATE)) + .verifyComplete(); + }); + } + + @ParameterizedTest(name = DISPLAY_NAME_WITH_ARGUMENTS) + @MethodSource("com.azure.ai.openai.TestUtils#getTestParameters") + public void testGetAudioTranslationTextPlain(HttpClient httpClient, OpenAIServiceVersion serviceVersion) { + client = getOpenAIAsyncClient(httpClient, serviceVersion); + + getAudioTranslationRunner((deploymentName, fileName) -> { + byte[] file = BinaryData.fromFile(openTestResourceFile(fileName)).toBytes(); + AudioTranslationOptions translationOptions = new AudioTranslationOptions(file); + translationOptions.setResponseFormat(AudioTranscriptionFormat.TEXT); + + StepVerifier.create(client.getAudioTranslationText(deploymentName, fileName, translationOptions)) + .assertNext(translation -> { + assertEquals("It's raining today.\n", translation); + }).verifyComplete(); + }); + } + + @ParameterizedTest(name = DISPLAY_NAME_WITH_ARGUMENTS) + @MethodSource("com.azure.ai.openai.TestUtils#getTestParameters") + public void testGetAudioTranslationSrt(HttpClient httpClient, OpenAIServiceVersion serviceVersion) { + client = getOpenAIAsyncClient(httpClient, serviceVersion); + + getAudioTranslationRunner((deploymentName, fileName) -> { + byte[] file = BinaryData.fromFile(openTestResourceFile(fileName)).toBytes(); + AudioTranslationOptions translationOptions = new AudioTranslationOptions(file); + translationOptions.setResponseFormat(AudioTranscriptionFormat.SRT); + + StepVerifier.create(client.getAudioTranslationText(deploymentName, fileName, translationOptions)) + .assertNext(translation -> { + // Sequence number + assertTrue(translation.contains("1\n")); + // First sequence starts at timestamp 0 + assertTrue(translation.contains("00:00:00,000 --> ")); + // Actual translation value + assertTrue(translation.contains("It's raining today.")); + }).verifyComplete(); + }); + } + + @ParameterizedTest(name = DISPLAY_NAME_WITH_ARGUMENTS) + @MethodSource("com.azure.ai.openai.TestUtils#getTestParameters") + public void testGetAudioTranslationVtt(HttpClient httpClient, OpenAIServiceVersion serviceVersion) { + client = getOpenAIAsyncClient(httpClient, serviceVersion); + + getAudioTranslationRunner((deploymentName, fileName) -> { + byte[] file = BinaryData.fromFile(openTestResourceFile(fileName)).toBytes(); + AudioTranslationOptions translationOptions = new AudioTranslationOptions(file); + translationOptions.setResponseFormat(AudioTranscriptionFormat.VTT); + + StepVerifier.create(client.getAudioTranslationText(deploymentName, fileName, translationOptions)) + .assertNext(translation -> { + // Start value according to spec + assertTrue(translation.startsWith("WEBVTT\n")); + // First sequence starts at timestamp 0. Note: unlike SRT, the millisecond separator is a "." + assertTrue(translation.contains("00:00:00.000 --> ")); + // Actual translation value + assertTrue(translation.contains("It's raining today.")); + }).verifyComplete(); + }); + } + + @ParameterizedTest(name = DISPLAY_NAME_WITH_ARGUMENTS) + @MethodSource("com.azure.ai.openai.TestUtils#getTestParameters") + public void testGetAudioTranslationTextWrongFormats(HttpClient httpClient, OpenAIServiceVersion serviceVersion) { + client = getOpenAIAsyncClient(httpClient, serviceVersion); + List wrongFormats = Arrays.asList( + AudioTranscriptionFormat.JSON, + AudioTranscriptionFormat.VERBOSE_JSON + ); + + getAudioTranslationRunner((deploymentName, fileName) -> { + byte[] file = BinaryData.fromFile(openTestResourceFile(fileName)).toBytes(); + AudioTranslationOptions translationOptions = new AudioTranslationOptions(file); + + for (AudioTranscriptionFormat format: wrongFormats) { + translationOptions.setResponseFormat(format); + StepVerifier.create(client.getAudioTranslationText(deploymentName, fileName, translationOptions)) + .verifyErrorSatisfies(error -> assertTrue(error instanceof IllegalArgumentException)); + } + }); + } + + @ParameterizedTest(name = DISPLAY_NAME_WITH_ARGUMENTS) + @MethodSource("com.azure.ai.openai.TestUtils#getTestParameters") + public void testGetAudioTranslationJsonWrongFormats(HttpClient httpClient, OpenAIServiceVersion serviceVersion) { + client = getOpenAIAsyncClient(httpClient, serviceVersion); + List wrongFormats = Arrays.asList( + AudioTranscriptionFormat.TEXT, + AudioTranscriptionFormat.SRT, + AudioTranscriptionFormat.VTT + ); + + getAudioTranslationRunner((deploymentName, fileName) -> { + byte[] file = BinaryData.fromFile(openTestResourceFile(fileName)).toBytes(); + AudioTranslationOptions translationOptions = new AudioTranslationOptions(file); + + for (AudioTranscriptionFormat format: wrongFormats) { + translationOptions.setResponseFormat(format); + StepVerifier.create(client.getAudioTranslation(deploymentName, fileName, translationOptions)) + .verifyErrorSatisfies(error -> assertTrue(error instanceof IllegalArgumentException)); + } + }); + } } diff --git a/sdk/openai/azure-ai-openai/src/test/java/com/azure/ai/openai/OpenAIClientTestBase.java b/sdk/openai/azure-ai-openai/src/test/java/com/azure/ai/openai/OpenAIClientTestBase.java index 9b8cb0014cd0..a3364ee90bdc 100644 --- a/sdk/openai/azure-ai-openai/src/test/java/com/azure/ai/openai/OpenAIClientTestBase.java +++ b/sdk/openai/azure-ai-openai/src/test/java/com/azure/ai/openai/OpenAIClientTestBase.java @@ -5,6 +5,8 @@ package com.azure.ai.openai; import com.azure.ai.openai.functions.Parameters; +import com.azure.ai.openai.models.AudioTaskLabel; +import com.azure.ai.openai.models.AudioTranscription; import com.azure.ai.openai.models.AzureChatExtensionsMessageContext; import com.azure.ai.openai.models.ChatChoice; import com.azure.ai.openai.models.ChatCompletions; @@ -26,6 +28,8 @@ import com.azure.core.credential.AzureKeyCredential; import com.azure.core.credential.KeyCredential; import com.azure.core.http.HttpClient; +import com.azure.core.http.policy.HttpLogDetailLevel; +import com.azure.core.http.policy.HttpLogOptions; import com.azure.core.http.rest.Response; import com.azure.core.test.TestMode; import com.azure.core.test.TestProxyTestBase; @@ -35,6 +39,8 @@ import com.azure.core.util.Configuration; import org.junit.jupiter.api.Test; +import java.nio.file.Path; +import java.nio.file.Paths; import java.util.ArrayList; import java.util.Arrays; import java.util.List; @@ -55,7 +61,7 @@ public abstract class OpenAIClientTestBase extends TestProxyTestBase { OpenAIClientBuilder getOpenAIClientBuilder(HttpClient httpClient, OpenAIServiceVersion serviceVersion) { OpenAIClientBuilder builder = new OpenAIClientBuilder() -// .httpLogOptions(new HttpLogOptions().setLogLevel(HttpLogDetailLevel.BODY_AND_HEADERS)) + .httpLogOptions(new HttpLogOptions().setLogLevel(HttpLogDetailLevel.BODY_AND_HEADERS)) .httpClient(httpClient) .serviceVersion(serviceVersion); @@ -122,7 +128,6 @@ protected String getAzureCognitiveSearchKey() { } } - @Test public abstract void testGetCompletions(HttpClient httpClient, OpenAIServiceVersion serviceVersion); @@ -206,6 +211,22 @@ void getCompletionsContentFilterRunnerForNonAzure(BiConsumer tes testRunner.accept("text-davinci-002", "What is 3 times 4?"); } + void getAudioTranscriptionRunner(BiConsumer testRunner) { + testRunner.accept("whisper-deployment", "batman.wav"); + } + + void getAudioTranslationRunner(BiConsumer testRunner) { + testRunner.accept("whisper-deployment", "JP_it_is_rainy_today.wav"); + } + + void getAudioTranscriptionRunnerForNonAzure(BiConsumer testRunner) { + testRunner.accept("whisper-1", "batman.wav"); + } + + void getAudioTranslationRunnerForNonAzure(BiConsumer testRunner) { + testRunner.accept("whisper-1", "JP_it_is_rainy_today.wav"); + } + private List getChatMessages() { List chatMessages = new ArrayList<>(); chatMessages.add(new ChatMessage(ChatRole.SYSTEM, "You are a helpful assistant. You will talk like a pirate.")); @@ -229,6 +250,10 @@ private ChatCompletionsOptions getChatMessagesWithFunction() { return chatCompletionOptions; } + static Path openTestResourceFile(String fileName) { + return Paths.get("src/test/resources/" + fileName); + } + static void assertCompletions(int choicesPerPrompt, Completions actual) { assertCompletions(choicesPerPrompt, "stop", actual); } @@ -413,4 +438,42 @@ static void assertChatCompletionsStreamingCognitiveSearch(Stream { ChatCompletions chatCompletions = client.getChatCompletions(modelId, new ChatCompletionsOptions(chatMessages)); assertSafeContentFilterResults(chatCompletions.getPromptFilterResults().get(0).getContentFilterResults()); @@ -260,7 +266,7 @@ public void testChatCompletionContentFiltering(HttpClient httpClient, OpenAIServ @ParameterizedTest(name = DISPLAY_NAME_WITH_ARGUMENTS) @MethodSource("com.azure.ai.openai.TestUtils#getTestParameters") public void testChatCompletionStreamContentFiltering(HttpClient httpClient, OpenAIServiceVersion serviceVersion) { - client = getOpenAIClient(httpClient, serviceVersion); + client = getOpenAIClient(httpClient, OpenAIServiceVersion.V2023_08_01_PREVIEW); getChatCompletionsContentFilterRunner((modelId, chatMessages) -> { IterableStream messageList = client.getChatCompletionsStream(modelId, new ChatCompletionsOptions(chatMessages)); @@ -306,7 +312,7 @@ public void testChatCompletionStreamContentFiltering(HttpClient httpClient, Open @ParameterizedTest(name = DISPLAY_NAME_WITH_ARGUMENTS) @MethodSource("com.azure.ai.openai.TestUtils#getTestParameters") public void testCompletionContentFiltering(HttpClient httpClient, OpenAIServiceVersion serviceVersion) { - client = getOpenAIClient(httpClient, serviceVersion); + client = getOpenAIClient(httpClient, OpenAIServiceVersion.V2023_08_01_PREVIEW); getCompletionsContentFilterRunner((modelId, prompt) -> { CompletionsOptions completionsOptions = new CompletionsOptions(Arrays.asList(prompt)); // work around for this model, there seem to be some issues with Completions in gpt-turbo models @@ -358,7 +364,7 @@ public void testCompletionStreamContentFiltering(HttpClient httpClient, OpenAISe @ParameterizedTest(name = DISPLAY_NAME_WITH_ARGUMENTS) @MethodSource("com.azure.ai.openai.TestUtils#getTestParameters") public void testChatCompletionsBasicSearchExtension(HttpClient httpClient, OpenAIServiceVersion serviceVersion) { - client = getOpenAIClient(httpClient, serviceVersion); + client = getOpenAIClient(httpClient, OpenAIServiceVersion.V2023_08_01_PREVIEW); getChatCompletionsAzureChatSearchRunner((deploymentName, chatCompletionsOptions) -> { AzureCognitiveSearchChatExtensionConfiguration cognitiveSearchConfiguration = @@ -382,7 +388,7 @@ public void testChatCompletionsBasicSearchExtension(HttpClient httpClient, OpenA @ParameterizedTest(name = DISPLAY_NAME_WITH_ARGUMENTS) @MethodSource("com.azure.ai.openai.TestUtils#getTestParameters") public void testChatCompletionsStreamingBasicSearchExtension(HttpClient httpClient, OpenAIServiceVersion serviceVersion) { - client = getOpenAIClient(httpClient, serviceVersion); + client = getOpenAIClient(httpClient, OpenAIServiceVersion.V2023_08_01_PREVIEW); getChatCompletionsAzureChatSearchRunner((deploymentName, chatCompletionsOptions) -> { AzureCognitiveSearchChatExtensionConfiguration cognitiveSearchConfiguration = @@ -402,4 +408,263 @@ public void testChatCompletionsStreamingBasicSearchExtension(HttpClient httpClie assertChatCompletionsStreamingCognitiveSearch(resultChatCompletions.stream()); }); } + + @ParameterizedTest(name = DISPLAY_NAME_WITH_ARGUMENTS) + @MethodSource("com.azure.ai.openai.TestUtils#getTestParameters") + public void testGetAudioTranscriptionJson(HttpClient httpClient, OpenAIServiceVersion serviceVersion) { + client = getOpenAIClient(httpClient, serviceVersion); + + getAudioTranscriptionRunner((deploymentName, fileName) -> { + byte[] file = BinaryData.fromFile(openTestResourceFile(fileName)).toBytes(); + AudioTranscriptionOptions transcriptionOptions = new AudioTranscriptionOptions(file); + transcriptionOptions.setResponseFormat(AudioTranscriptionFormat.JSON); + + AudioTranscription transcription = client.getAudioTranscription(deploymentName, fileName, transcriptionOptions); + assertAudioTranscriptionSimpleJson(transcription, BATMAN_TRANSCRIPTION); + }); + } + + @ParameterizedTest(name = DISPLAY_NAME_WITH_ARGUMENTS) + @MethodSource("com.azure.ai.openai.TestUtils#getTestParameters") + public void testGetAudioTranscriptionVerboseJson(HttpClient httpClient, OpenAIServiceVersion serviceVersion) { + client = getOpenAIClient(httpClient, serviceVersion); + + getAudioTranscriptionRunner((deploymentName, fileName) -> { + byte[] file = BinaryData.fromFile(openTestResourceFile(fileName)).toBytes(); + AudioTranscriptionOptions transcriptionOptions = new AudioTranscriptionOptions(file); + transcriptionOptions.setResponseFormat(AudioTranscriptionFormat.VERBOSE_JSON); + + AudioTranscription transcription = client.getAudioTranscription(deploymentName, fileName, transcriptionOptions); + assertAudioTranscriptionVerboseJson(transcription, BATMAN_TRANSCRIPTION, AudioTaskLabel.TRANSCRIBE); + }); + } + + @ParameterizedTest(name = DISPLAY_NAME_WITH_ARGUMENTS) + @MethodSource("com.azure.ai.openai.TestUtils#getTestParameters") + public void testGetAudioTranscriptionTextPlain(HttpClient httpClient, OpenAIServiceVersion serviceVersion) { + client = getOpenAIClient(httpClient, serviceVersion); + + getAudioTranscriptionRunner((deploymentName, fileName) -> { + byte[] file = BinaryData.fromFile(openTestResourceFile(fileName)).toBytes(); + AudioTranscriptionOptions transcriptionOptions = new AudioTranscriptionOptions(file); + transcriptionOptions.setResponseFormat(AudioTranscriptionFormat.TEXT); + + String transcription = client.getAudioTranscriptionText(deploymentName, fileName, transcriptionOptions); + // A plain/text request adds a line break as an artifact. Also observed for translations + assertEquals(BATMAN_TRANSCRIPTION + "\n", transcription); + }); + } + + @ParameterizedTest(name = DISPLAY_NAME_WITH_ARGUMENTS) + @MethodSource("com.azure.ai.openai.TestUtils#getTestParameters") + public void testGetAudioTranscriptionSrt(HttpClient httpClient, OpenAIServiceVersion serviceVersion) { + client = getOpenAIClient(httpClient, serviceVersion); + + getAudioTranscriptionRunner((deploymentName, fileName) -> { + byte[] file = BinaryData.fromFile(openTestResourceFile(fileName)).toBytes(); + AudioTranscriptionOptions transcriptionOptions = new AudioTranscriptionOptions(file); + transcriptionOptions.setResponseFormat(AudioTranscriptionFormat.SRT); + + String transcription = client.getAudioTranscriptionText(deploymentName, fileName, transcriptionOptions); + // Contains at least one sequence + assertTrue(transcription.contains("1\n")); + // First sequence starts at timestamp 0 + assertTrue(transcription.contains("00:00:00,000 --> ")); + // Contains at least one expected word + assertTrue(transcription.contains("Batman")); + }); + } + + @ParameterizedTest(name = DISPLAY_NAME_WITH_ARGUMENTS) + @MethodSource("com.azure.ai.openai.TestUtils#getTestParameters") + public void testGetAudioTranscriptionVtt(HttpClient httpClient, OpenAIServiceVersion serviceVersion) { + client = getOpenAIClient(httpClient, serviceVersion); + + getAudioTranscriptionRunner((deploymentName, fileName) -> { + byte[] file = BinaryData.fromFile(openTestResourceFile(fileName)).toBytes(); + AudioTranscriptionOptions transcriptionOptions = new AudioTranscriptionOptions(file); + transcriptionOptions.setResponseFormat(AudioTranscriptionFormat.VTT); + + String transcription = client.getAudioTranscriptionText(deploymentName, fileName, transcriptionOptions); + // Start value according to spec + assertTrue(transcription.startsWith("WEBVTT\n")); + // First sequence starts at timestamp 0. Note: unlike SRT, the millisecond separator is a "." + assertTrue(transcription.contains("00:00:00.000 --> ")); + // Contains at least one expected word in the transcription + assertTrue(transcription.contains("Batman")); + }); + } + + @ParameterizedTest(name = DISPLAY_NAME_WITH_ARGUMENTS) + @MethodSource("com.azure.ai.openai.TestUtils#getTestParameters") + public void testGetAudioTranscriptionTextWrongFormats(HttpClient httpClient, OpenAIServiceVersion serviceVersion) { + client = getOpenAIClient(httpClient, serviceVersion); + List wrongFormats = Arrays.asList( + AudioTranscriptionFormat.JSON, + AudioTranscriptionFormat.VERBOSE_JSON + ); + + getAudioTranscriptionRunner((deploymentName, fileName) -> { + byte[] file = BinaryData.fromFile(openTestResourceFile(fileName)).toBytes(); + AudioTranscriptionOptions audioTranscriptionOptions = new AudioTranscriptionOptions(file); + + for (AudioTranscriptionFormat format: wrongFormats) { + audioTranscriptionOptions.setResponseFormat(format); + assertThrows(IllegalArgumentException.class, () -> + client.getAudioTranscriptionText(deploymentName, fileName, audioTranscriptionOptions)); + } + }); + } + + @ParameterizedTest(name = DISPLAY_NAME_WITH_ARGUMENTS) + @MethodSource("com.azure.ai.openai.TestUtils#getTestParameters") + public void testGetAudioTranscriptionJsonWrongFormats(HttpClient httpClient, OpenAIServiceVersion serviceVersion) { + client = getOpenAIClient(httpClient, serviceVersion); + List wrongFormats = Arrays.asList( + AudioTranscriptionFormat.TEXT, + AudioTranscriptionFormat.SRT, + AudioTranscriptionFormat.VTT + ); + + getAudioTranscriptionRunner((deploymentName, fileName) -> { + byte[] file = BinaryData.fromFile(openTestResourceFile(fileName)).toBytes(); + AudioTranscriptionOptions audioTranscriptionOptions = new AudioTranscriptionOptions(file); + + for (AudioTranscriptionFormat format: wrongFormats) { + audioTranscriptionOptions.setResponseFormat(format); + assertThrows(IllegalArgumentException.class, () -> + client.getAudioTranscription(deploymentName, fileName, audioTranscriptionOptions)); + } + }); + } + + @ParameterizedTest(name = DISPLAY_NAME_WITH_ARGUMENTS) + @MethodSource("com.azure.ai.openai.TestUtils#getTestParameters") + public void testGetAudioTranslationJson(HttpClient httpClient, OpenAIServiceVersion serviceVersion) { + client = getOpenAIClient(httpClient, serviceVersion); + + getAudioTranslationRunner((deploymentName, fileName) -> { + byte[] file = BinaryData.fromFile(openTestResourceFile(fileName)).toBytes(); + AudioTranslationOptions translationOptions = new AudioTranslationOptions(file); + translationOptions.setResponseFormat(AudioTranscriptionFormat.JSON); + + AudioTranscription translation = client.getAudioTranslation(deploymentName, fileName, translationOptions); + assertAudioTranscriptionSimpleJson(translation, "It's raining today."); + }); + } + + @ParameterizedTest(name = DISPLAY_NAME_WITH_ARGUMENTS) + @MethodSource("com.azure.ai.openai.TestUtils#getTestParameters") + public void testGetAudioTranslationVerboseJson(HttpClient httpClient, OpenAIServiceVersion serviceVersion) { + client = getOpenAIClient(httpClient, serviceVersion); + + getAudioTranslationRunner((deploymentName, fileName) -> { + byte[] file = BinaryData.fromFile(openTestResourceFile(fileName)).toBytes(); + AudioTranslationOptions translationOptions = new AudioTranslationOptions(file); + translationOptions.setResponseFormat(AudioTranscriptionFormat.VERBOSE_JSON); + + AudioTranscription translation = client.getAudioTranslation(deploymentName, fileName, translationOptions); + assertAudioTranscriptionVerboseJson(translation, "It's raining today.", AudioTaskLabel.TRANSLATE); + }); + } + + @ParameterizedTest(name = DISPLAY_NAME_WITH_ARGUMENTS) + @MethodSource("com.azure.ai.openai.TestUtils#getTestParameters") + public void testGetAudioTranslationTextPlain(HttpClient httpClient, OpenAIServiceVersion serviceVersion) { + client = getOpenAIClient(httpClient, serviceVersion); + + getAudioTranslationRunner((deploymentName, fileName) -> { + byte[] file = BinaryData.fromFile(openTestResourceFile(fileName)).toBytes(); + AudioTranslationOptions translationOptions = new AudioTranslationOptions(file); + translationOptions.setResponseFormat(AudioTranscriptionFormat.TEXT); + + String transcription = client.getAudioTranslationText(deploymentName, fileName, translationOptions); + assertEquals("It's raining today.\n", transcription); + }); + } + + @ParameterizedTest(name = DISPLAY_NAME_WITH_ARGUMENTS) + @MethodSource("com.azure.ai.openai.TestUtils#getTestParameters") + public void testGetAudioTranslationSrt(HttpClient httpClient, OpenAIServiceVersion serviceVersion) { + client = getOpenAIClient(httpClient, serviceVersion); + + getAudioTranslationRunner((deploymentName, fileName) -> { + byte[] file = BinaryData.fromFile(openTestResourceFile(fileName)).toBytes(); + AudioTranslationOptions translationOptions = new AudioTranslationOptions(file); + translationOptions.setResponseFormat(AudioTranscriptionFormat.SRT); + + String transcription = client.getAudioTranslationText(deploymentName, fileName, translationOptions); + // Sequence number + assertTrue(transcription.contains("1\n")); + // First sequence starts at timestamp 0 + assertTrue(transcription.contains("00:00:00,000 --> ")); + // Actual translation value + assertTrue(transcription.contains("It's raining today.")); + }); + } + + @ParameterizedTest(name = DISPLAY_NAME_WITH_ARGUMENTS) + @MethodSource("com.azure.ai.openai.TestUtils#getTestParameters") + public void testGetAudioTranslationVtt(HttpClient httpClient, OpenAIServiceVersion serviceVersion) { + client = getOpenAIClient(httpClient, serviceVersion); + + getAudioTranslationRunner((deploymentName, fileName) -> { + byte[] file = BinaryData.fromFile(openTestResourceFile(fileName)).toBytes(); + AudioTranslationOptions translationOptions = new AudioTranslationOptions(file); + translationOptions.setResponseFormat(AudioTranscriptionFormat.VTT); + + String transcription = client.getAudioTranslationText(deploymentName, fileName, translationOptions); + // Start value according to spec + assertTrue(transcription.startsWith("WEBVTT\n")); + // First sequence starts at timestamp 0. Note: unlike SRT, the millisecond separator is a "." + assertTrue(transcription.contains("00:00:00.000 --> ")); + // Actual translation value + assertTrue(transcription.contains("It's raining today.")); + }); + } + + @ParameterizedTest(name = DISPLAY_NAME_WITH_ARGUMENTS) + @MethodSource("com.azure.ai.openai.TestUtils#getTestParameters") + public void testGetAudioTranslationTextWrongFormats(HttpClient httpClient, OpenAIServiceVersion serviceVersion) { + client = getOpenAIClient(httpClient, serviceVersion); + List wrongFormats = Arrays.asList( + AudioTranscriptionFormat.JSON, + AudioTranscriptionFormat.VERBOSE_JSON + ); + + getAudioTranslationRunner((deploymentName, fileName) -> { + byte[] file = BinaryData.fromFile(openTestResourceFile(fileName)).toBytes(); + AudioTranslationOptions translationOptions = new AudioTranslationOptions(file); + + for (AudioTranscriptionFormat format: wrongFormats) { + translationOptions.setResponseFormat(format); + assertThrows(IllegalArgumentException.class, () -> { + client.getAudioTranslationText(deploymentName, fileName, translationOptions); + }); + } + }); + } + + @ParameterizedTest(name = DISPLAY_NAME_WITH_ARGUMENTS) + @MethodSource("com.azure.ai.openai.TestUtils#getTestParameters") + public void testGetAudioTranslationJsonWrongFormats(HttpClient httpClient, OpenAIServiceVersion serviceVersion) { + client = getOpenAIClient(httpClient, serviceVersion); + List wrongFormats = Arrays.asList( + AudioTranscriptionFormat.TEXT, + AudioTranscriptionFormat.SRT, + AudioTranscriptionFormat.VTT + ); + + getAudioTranslationRunner((deploymentName, fileName) -> { + byte[] file = BinaryData.fromFile(openTestResourceFile(fileName)).toBytes(); + AudioTranslationOptions translationOptions = new AudioTranslationOptions(file); + + for (AudioTranscriptionFormat format: wrongFormats) { + translationOptions.setResponseFormat(format); + assertThrows(IllegalArgumentException.class, () -> { + client.getAudioTranslation(deploymentName, fileName, translationOptions); + }); + } + }); + } } diff --git a/sdk/openai/azure-ai-openai/src/test/java/com/azure/ai/openai/implementation/MultipartDataHelperTest.java b/sdk/openai/azure-ai-openai/src/test/java/com/azure/ai/openai/implementation/MultipartDataHelperTest.java new file mode 100644 index 000000000000..be4fdadbe97e --- /dev/null +++ b/sdk/openai/azure-ai-openai/src/test/java/com/azure/ai/openai/implementation/MultipartDataHelperTest.java @@ -0,0 +1,132 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +package com.azure.ai.openai.implementation; + +import com.azure.ai.openai.models.AudioTranscriptionFormat; +import com.azure.ai.openai.models.AudioTranscriptionOptions; +import com.azure.ai.openai.models.AudioTranslationOptions; +import com.azure.ai.openai.models.EmbeddingsOptions; +import org.junit.jupiter.api.Test; + +import java.nio.charset.StandardCharsets; +import java.util.ArrayList; + +import static org.junit.jupiter.api.Assertions.assertEquals; +import static org.junit.jupiter.api.Assertions.assertThrows; + +/** + * Unit tests for {@link MultipartDataHelper} + */ +public class MultipartDataHelperTest { + + private static final String TEST_BOUNDARY = "test-boundary"; + + @Test + public void serializeAudioTranslationOptionsAllFields() { + MultipartDataHelper helper = new MultipartDataHelper(TEST_BOUNDARY); + byte[] file = new byte[] {73, 32, 115, 104, 111, 117, 108, 100, 32, 104, 97, 118, 101, 32, 116, 104, 111, 117, + 103, 104, 116, 32, 111, 102, 32, 97, 32, 103, 111, 111, 100, 32, 101, 97, 115, 116, 101, 114, 32, 101, + 103, 103}; + String fileName = "file_name.wav"; + AudioTranslationOptions translationOptions = new AudioTranslationOptions(file); + translationOptions.setModel("model_name") + .setPrompt("prompt text") + .setResponseFormat(AudioTranscriptionFormat.TEXT) + .setTemperature(0.1); + MultipartDataSerializationResult actual = helper.serializeRequest(translationOptions, fileName); + + String expected = multipartFileSegment(fileName, file) + + fieldFormData("response_format", "text") + + fieldFormData("model", "model_name") + + fieldFormData("prompt", "prompt text") + + fieldFormData("temperature", "0.1") + + closingMarker(); + + assertEquals(expected, actual.getData().toString()); + assertEquals(expected.getBytes(StandardCharsets.US_ASCII).length, actual.getDataLength()); + } + + @Test + public void serializeAudioTranscriptionOptionsAllFields() { + MultipartDataHelper helper = new MultipartDataHelper(TEST_BOUNDARY); + byte[] file = new byte[] {73, 32, 115, 104, 111, 117, 108, 100, 32, 104, 97, 118, 101, 32, 116, 104, 111, 117, + 103, 104, 116, 32, 111, 102, 32, 97, 32, 103, 111, 111, 100, 32, 101, 97, 115, 116, 101, 114, 32, 101, + 103, 103}; + String fileName = "file_name.wav"; + AudioTranscriptionOptions transcriptionOptions = new AudioTranscriptionOptions(file); + transcriptionOptions.setModel("model_name") + .setPrompt("prompt text") + .setResponseFormat(AudioTranscriptionFormat.TEXT) + .setLanguage("en") + .setTemperature(0.1); + MultipartDataSerializationResult actual = helper.serializeRequest(transcriptionOptions, fileName); + + String expected = multipartFileSegment(fileName, file) + + fieldFormData("response_format", "text") + + fieldFormData("model", "model_name") + + fieldFormData("prompt", "prompt text") + + fieldFormData("temperature", "0.1") + + fieldFormData("language", "en") + + closingMarker(); + + assertEquals(expected, actual.getData().toString()); + assertEquals(expected.getBytes(StandardCharsets.US_ASCII).length, actual.getDataLength()); + } + + @Test + public void serializeAudioTranslationOptionsNoFields() { + MultipartDataHelper helper = new MultipartDataHelper(TEST_BOUNDARY); + byte[] file = new byte[] {}; + String fileName = "file_name.wav"; + AudioTranslationOptions translationOptions = new AudioTranslationOptions(file); + MultipartDataSerializationResult actual = helper.serializeRequest(translationOptions, fileName); + + String expected = multipartFileSegment(fileName, file) + + closingMarker(); + + assertEquals(expected, actual.getData().toString()); + assertEquals(expected.getBytes(StandardCharsets.US_ASCII).length, actual.getDataLength()); + } + + @Test + public void serializeAudioTranscriptionOptionsNoFields() { + MultipartDataHelper helper = new MultipartDataHelper(TEST_BOUNDARY); + byte[] file = new byte[] {}; + String fileName = "file_name.wav"; + AudioTranscriptionOptions transcriptionOptions = new AudioTranscriptionOptions(file); + MultipartDataSerializationResult actual = helper.serializeRequest(transcriptionOptions, fileName); + + String expected = multipartFileSegment(fileName, file) + + closingMarker(); + + assertEquals(expected, actual.getData().toString()); + assertEquals(expected.getBytes(StandardCharsets.US_ASCII).length, actual.getDataLength()); + } + + @Test + public void serializeUnsupportedType() { + assertThrows(IllegalArgumentException.class, () -> { + MultipartDataHelper helper = new MultipartDataHelper(TEST_BOUNDARY); + EmbeddingsOptions embeddingsOptions = new EmbeddingsOptions(new ArrayList<>()); + helper.serializeRequest(embeddingsOptions, "path/to/file"); + }); + } + + private static String fieldFormData(String fieldName, String fieldValue) { + return "\r\n--test-boundary" + + "\r\nContent-Disposition: form-data; name=\"" + fieldName + "\"\r\n\r\n" + + fieldValue; + } + + private static String multipartFileSegment(String fileName, byte[] fileBytes) { + return "--test-boundary\r\n" + + "Content-Disposition: form-data; name=\"file\"; filename=\"" + fileName + "\"\r\n" + + "Content-Type: application/octet-stream\r\n\r\n" + + new String(fileBytes, StandardCharsets.US_ASCII); + } + + private static String closingMarker() { + return "\r\n--test-boundary--"; + } +} diff --git a/sdk/openai/azure-ai-openai/src/test/resources/JP_it_is_rainy_today.wav b/sdk/openai/azure-ai-openai/src/test/resources/JP_it_is_rainy_today.wav new file mode 100644 index 000000000000..5970c85ec1cd Binary files /dev/null and b/sdk/openai/azure-ai-openai/src/test/resources/JP_it_is_rainy_today.wav differ diff --git a/sdk/openai/azure-ai-openai/src/test/resources/batman.wav b/sdk/openai/azure-ai-openai/src/test/resources/batman.wav new file mode 100644 index 000000000000..4c0b7248a39c Binary files /dev/null and b/sdk/openai/azure-ai-openai/src/test/resources/batman.wav differ diff --git a/sdk/openai/azure-ai-openai/tsp-location.yaml b/sdk/openai/azure-ai-openai/tsp-location.yaml index 368074679599..bc4052dd97e4 100644 --- a/sdk/openai/azure-ai-openai/tsp-location.yaml +++ b/sdk/openai/azure-ai-openai/tsp-location.yaml @@ -1,5 +1,5 @@ directory: specification/cognitiveservices/OpenAI.Inference additionalDirectories: - specification/cognitiveservices/OpenAI.Authoring -commit: b646a42aa3b7a0ce488d05f1724827ea41d12cf1 +commit: dd2d1e8957ac6654272137e8d5874eacafd80a5f repo: Azure/azure-rest-api-specs