generateImageWithResponse(
*
* @param inputJson JSON submitted by the client
* @param modelId The LLM model ID to be injected in the JSON
- * @return
+ * @return an updated version of the JSON with the key "model" and its corresponding value "modelId" added
*/
private static BinaryData addModelIdJson(BinaryData inputJson, String modelId) throws JsonProcessingException {
JsonNode jsonNode = JSON_MAPPER.readTree(inputJson.toString());
@@ -905,4 +1057,446 @@ private static BinaryData addModelIdJson(BinaryData inputJson, String modelId) t
return inputJson;
}
+
+ /**
+ * Gets transcribed text and associated metadata from provided spoken audio data. Audio will be transcribed in the
+ * written language corresponding to the language it was spoken in.
+ *
+ * Request Body Schema
+ *
+ *
{@code
+ * {
+ * file: byte[] (Required)
+ * response_format: String(json/verbose_json/text/srt/vtt) (Optional)
+ * language: String (Optional)
+ * prompt: String (Optional)
+ * temperature: Double (Optional)
+ * model: String (Optional)
+ * }
+ * }
+ *
+ * Response Body Schema
+ *
+ *
{@code
+ * {
+ * text: String (Required)
+ * task: String(transcribe/translate) (Optional)
+ * language: String (Optional)
+ * duration: Double (Optional)
+ * segments (Optional): [
+ * (Optional){
+ * id: int (Required)
+ * start: double (Required)
+ * end: double (Required)
+ * text: String (Required)
+ * temperature: double (Required)
+ * avg_logprob: double (Required)
+ * compression_ratio: double (Required)
+ * no_speech_prob: double (Required)
+ * tokens (Required): [
+ * int (Required)
+ * ]
+ * seek: int (Required)
+ * }
+ * ]
+ * }
+ * }
+ *
+ * @param modelId Specifies the model name to use for this request.
+ * @param audioTranscriptionOptions The configuration information for an audio transcription request.
+ * @param requestOptions The options to configure the HTTP request before HTTP client sends it.
+ * @throws HttpResponseException thrown if the request is rejected by server.
+ * @throws ClientAuthenticationException thrown if the request is rejected by server on status code 401.
+ * @throws ResourceNotFoundException thrown if the request is rejected by server on status code 404.
+ * @throws ResourceModifiedException thrown if the request is rejected by server on status code 409.
+ * @return transcribed text and associated metadata from provided spoken audio data along with {@link Response} on
+ * successful completion of {@link Mono}.
+ */
+ @ServiceMethod(returns = ReturnType.SINGLE)
+ public Mono> getAudioTranscriptionAsResponseObjectWithResponseAsync(
+ String modelId, BinaryData audioTranscriptionOptions, RequestOptions requestOptions) {
+ final String accept = "application/json";
+ return FluxUtil.withContext(
+ context ->
+ service.getAudioTranscriptionAsResponseObject(
+ OPEN_AI_ENDPOINT,
+ accept,
+ audioTranscriptionOptions,
+ requestOptions,
+ context));
+ }
+
+ /**
+ * Gets transcribed text and associated metadata from provided spoken audio data. Audio will be transcribed in the
+ * written language corresponding to the language it was spoken in.
+ *
+ * Request Body Schema
+ *
+ *
{@code
+ * {
+ * file: byte[] (Required)
+ * response_format: String(json/verbose_json/text/srt/vtt) (Optional)
+ * language: String (Optional)
+ * prompt: String (Optional)
+ * temperature: Double (Optional)
+ * model: String (Optional)
+ * }
+ * }
+ *
+ * Response Body Schema
+ *
+ *
{@code
+ * {
+ * text: String (Required)
+ * task: String(transcribe/translate) (Optional)
+ * language: String (Optional)
+ * duration: Double (Optional)
+ * segments (Optional): [
+ * (Optional){
+ * id: int (Required)
+ * start: double (Required)
+ * end: double (Required)
+ * text: String (Required)
+ * temperature: double (Required)
+ * avg_logprob: double (Required)
+ * compression_ratio: double (Required)
+ * no_speech_prob: double (Required)
+ * tokens (Required): [
+ * int (Required)
+ * ]
+ * seek: int (Required)
+ * }
+ * ]
+ * }
+ * }
+ *
+ * @param modelId Specifies the model name to use for this request.
+ * @param audioTranscriptionOptions The configuration information for an audio transcription request.
+ * @param requestOptions The options to configure the HTTP request before HTTP client sends it.
+ * @throws HttpResponseException thrown if the request is rejected by server.
+ * @throws ClientAuthenticationException thrown if the request is rejected by server on status code 401.
+ * @throws ResourceNotFoundException thrown if the request is rejected by server on status code 404.
+ * @throws ResourceModifiedException thrown if the request is rejected by server on status code 409.
+ * @return transcribed text and associated metadata from provided spoken audio data along with {@link Response}.
+ */
+ @ServiceMethod(returns = ReturnType.SINGLE)
+ public Response getAudioTranscriptionAsResponseObjectWithResponse(
+ String modelId, BinaryData audioTranscriptionOptions, RequestOptions requestOptions) {
+ final String accept = "application/json";
+ return service.getAudioTranscriptionAsResponseObjectSync(
+ OPEN_AI_ENDPOINT,
+ accept,
+ audioTranscriptionOptions,
+ requestOptions,
+ Context.NONE);
+ }
+
+ /**
+ * Gets transcribed text and associated metadata from provided spoken audio data. Audio will be transcribed in the
+ * written language corresponding to the language it was spoken in.
+ *
+ * Request Body Schema
+ *
+ *
{@code
+ * {
+ * file: byte[] (Required)
+ * response_format: String(json/verbose_json/text/srt/vtt) (Optional)
+ * language: String (Optional)
+ * prompt: String (Optional)
+ * temperature: Double (Optional)
+ * model: String (Optional)
+ * }
+ * }
+ *
+ * Response Body Schema
+ *
+ *
{@code
+ * String
+ * }
+ *
+ * @param modelId Specifies the model name to use for this request.
+ * @param audioTranscriptionOptions The configuration information for an audio transcription request.
+ * @param requestOptions The options to configure the HTTP request before HTTP client sends it.
+ * @throws HttpResponseException thrown if the request is rejected by server.
+ * @throws ClientAuthenticationException thrown if the request is rejected by server on status code 401.
+ * @throws ResourceNotFoundException thrown if the request is rejected by server on status code 404.
+ * @throws ResourceModifiedException thrown if the request is rejected by server on status code 409.
+ * @return transcribed text and associated metadata from provided spoken audio data along with {@link Response} on
+ * successful completion of {@link Mono}.
+ */
+ @ServiceMethod(returns = ReturnType.SINGLE)
+ public Mono> getAudioTranscriptionAsPlainTextWithResponseAsync(
+ String modelId, BinaryData audioTranscriptionOptions, RequestOptions requestOptions) {
+ final String accept = "application/json";
+ return FluxUtil.withContext(
+ context ->
+ service.getAudioTranscriptionAsPlainText(
+ OPEN_AI_ENDPOINT,
+ accept,
+ audioTranscriptionOptions,
+ requestOptions,
+ context));
+ }
+
+ /**
+ * Gets transcribed text and associated metadata from provided spoken audio data. Audio will be transcribed in the
+ * written language corresponding to the language it was spoken in.
+ *
+ * Request Body Schema
+ *
+ *
{@code
+ * {
+ * file: byte[] (Required)
+ * response_format: String(json/verbose_json/text/srt/vtt) (Optional)
+ * language: String (Optional)
+ * prompt: String (Optional)
+ * temperature: Double (Optional)
+ * model: String (Optional)
+ * }
+ * }
+ *
+ * Response Body Schema
+ *
+ *
{@code
+ * String
+ * }
+ *
+ * @param modelId Specifies the model name to use for this request.
+ * @param audioTranscriptionOptions The configuration information for an audio transcription request.
+ * @param requestOptions The options to configure the HTTP request before HTTP client sends it.
+ * @throws HttpResponseException thrown if the request is rejected by server.
+ * @throws ClientAuthenticationException thrown if the request is rejected by server on status code 401.
+ * @throws ResourceNotFoundException thrown if the request is rejected by server on status code 404.
+ * @throws ResourceModifiedException thrown if the request is rejected by server on status code 409.
+ * @return transcribed text and associated metadata from provided spoken audio data along with {@link Response}.
+ */
+ @ServiceMethod(returns = ReturnType.SINGLE)
+ public Response getAudioTranscriptionAsPlainTextWithResponse(
+ String modelId, BinaryData audioTranscriptionOptions, RequestOptions requestOptions) {
+ final String accept = "application/json";
+ return service.getAudioTranscriptionAsPlainTextSync(
+ OPEN_AI_ENDPOINT,
+ accept,
+ audioTranscriptionOptions,
+ requestOptions,
+ Context.NONE);
+ }
+
+ /**
+ * Gets English language transcribed text and associated metadata from provided spoken audio data.
+ *
+ * Request Body Schema
+ *
+ *
{@code
+ * {
+ * file: byte[] (Required)
+ * response_format: String(json/verbose_json/text/srt/vtt) (Optional)
+ * prompt: String (Optional)
+ * temperature: Double (Optional)
+ * model: String (Optional)
+ * }
+ * }
+ *
+ * Response Body Schema
+ *
+ *
{@code
+ * {
+ * text: String (Required)
+ * task: String(transcribe/translate) (Optional)
+ * language: String (Optional)
+ * duration: Double (Optional)
+ * segments (Optional): [
+ * (Optional){
+ * id: int (Required)
+ * start: double (Required)
+ * end: double (Required)
+ * text: String (Required)
+ * temperature: double (Required)
+ * avg_logprob: double (Required)
+ * compression_ratio: double (Required)
+ * no_speech_prob: double (Required)
+ * tokens (Required): [
+ * int (Required)
+ * ]
+ * seek: int (Required)
+ * }
+ * ]
+ * }
+ * }
+ *
+ * @param deploymentOrModelName Specifies the model name to use for this request.
+ * @param audioTranslationOptions The configuration information for an audio translation request.
+ * @param requestOptions The options to configure the HTTP request before HTTP client sends it.
+ * @throws HttpResponseException thrown if the request is rejected by server.
+ * @throws ClientAuthenticationException thrown if the request is rejected by server on status code 401.
+ * @throws ResourceNotFoundException thrown if the request is rejected by server on status code 404.
+ * @throws ResourceModifiedException thrown if the request is rejected by server on status code 409.
+ * @return english language transcribed text and associated metadata from provided spoken audio data along with
+ * {@link Response} on successful completion of {@link Mono}.
+ */
+ @ServiceMethod(returns = ReturnType.SINGLE)
+ public Mono> getAudioTranslationAsResponseObjectWithResponseAsync(
+ String deploymentOrModelName, BinaryData audioTranslationOptions, RequestOptions requestOptions) {
+ final String accept = "application/json";
+ return FluxUtil.withContext(
+ context ->
+ service.getAudioTranslationAsResponseObject(
+ OPEN_AI_ENDPOINT,
+ accept,
+ audioTranslationOptions,
+ requestOptions,
+ context));
+ }
+
+ /**
+ * Gets English language transcribed text and associated metadata from provided spoken audio data.
+ *
+ * Request Body Schema
+ *
+ *
{@code
+ * {
+ * file: byte[] (Required)
+ * response_format: String(json/verbose_json/text/srt/vtt) (Optional)
+ * prompt: String (Optional)
+ * temperature: Double (Optional)
+ * model: String (Optional)
+ * }
+ * }
+ *
+ * Response Body Schema
+ *
+ *
{@code
+ * {
+ * text: String (Required)
+ * task: String(transcribe/translate) (Optional)
+ * language: String (Optional)
+ * duration: Double (Optional)
+ * segments (Optional): [
+ * (Optional){
+ * id: int (Required)
+ * start: double (Required)
+ * end: double (Required)
+ * text: String (Required)
+ * temperature: double (Required)
+ * avg_logprob: double (Required)
+ * compression_ratio: double (Required)
+ * no_speech_prob: double (Required)
+ * tokens (Required): [
+ * int (Required)
+ * ]
+ * seek: int (Required)
+ * }
+ * ]
+ * }
+ * }
+ *
+ * @param modelId Specifies the model name to use for this request.
+ * @param audioTranslationOptions The configuration information for an audio translation request.
+ * @param requestOptions The options to configure the HTTP request before HTTP client sends it.
+ * @throws HttpResponseException thrown if the request is rejected by server.
+ * @throws ClientAuthenticationException thrown if the request is rejected by server on status code 401.
+ * @throws ResourceNotFoundException thrown if the request is rejected by server on status code 404.
+ * @throws ResourceModifiedException thrown if the request is rejected by server on status code 409.
+ * @return english language transcribed text and associated metadata from provided spoken audio data along with
+ * {@link Response}.
+ */
+ @ServiceMethod(returns = ReturnType.SINGLE)
+ public Response getAudioTranslationAsResponseObjectWithResponse(
+ String modelId, BinaryData audioTranslationOptions, RequestOptions requestOptions) {
+ final String accept = "application/json";
+ return service.getAudioTranslationAsResponseObjectSync(
+ OPEN_AI_ENDPOINT,
+ accept,
+ audioTranslationOptions,
+ requestOptions,
+ Context.NONE);
+ }
+
+ /**
+ * Gets English language transcribed text and associated metadata from provided spoken audio data.
+ *
+ * Request Body Schema
+ *
+ *
{@code
+ * {
+ * file: byte[] (Required)
+ * response_format: String(json/verbose_json/text/srt/vtt) (Optional)
+ * prompt: String (Optional)
+ * temperature: Double (Optional)
+ * model: String (Optional)
+ * }
+ * }
+ *
+ * Response Body Schema
+ *
+ *
{@code
+ * String
+ * }
+ *
+ * @param modelId Specifies the model name to use for this request.
+ * @param audioTranslationOptions The configuration information for an audio translation request.
+ * @param requestOptions The options to configure the HTTP request before HTTP client sends it.
+ * @throws HttpResponseException thrown if the request is rejected by server.
+ * @throws ClientAuthenticationException thrown if the request is rejected by server on status code 401.
+ * @throws ResourceNotFoundException thrown if the request is rejected by server on status code 404.
+ * @throws ResourceModifiedException thrown if the request is rejected by server on status code 409.
+ * @return english language transcribed text and associated metadata from provided spoken audio data along with
+ * {@link Response} on successful completion of {@link Mono}.
+ */
+ @ServiceMethod(returns = ReturnType.SINGLE)
+ public Mono> getAudioTranslationAsPlainTextWithResponseAsync(
+ String modelId, BinaryData audioTranslationOptions, RequestOptions requestOptions) {
+ final String accept = "application/json";
+ return FluxUtil.withContext(
+ context ->
+ service.getAudioTranslationAsPlainText(
+ OPEN_AI_ENDPOINT,
+ accept,
+ audioTranslationOptions,
+ requestOptions,
+ context));
+ }
+
+ /**
+ * Gets English language transcribed text and associated metadata from provided spoken audio data.
+ *
+ * Request Body Schema
+ *
+ *
{@code
+ * {
+ * file: byte[] (Required)
+ * response_format: String(json/verbose_json/text/srt/vtt) (Optional)
+ * prompt: String (Optional)
+ * temperature: Double (Optional)
+ * model: String (Optional)
+ * }
+ * }
+ *
+ * Response Body Schema
+ *
+ *
{@code
+ * String
+ * }
+ *
+ * @param modelId Specifies the model name to use for this request.
+ * @param audioTranslationOptions The configuration information for an audio translation request.
+ * @param requestOptions The options to configure the HTTP request before HTTP client sends it.
+ * @throws HttpResponseException thrown if the request is rejected by server.
+ * @throws ClientAuthenticationException thrown if the request is rejected by server on status code 401.
+ * @throws ResourceNotFoundException thrown if the request is rejected by server on status code 404.
+ * @throws ResourceModifiedException thrown if the request is rejected by server on status code 409.
+ * @return english language transcribed text and associated metadata from provided spoken audio data along with
+ * {@link Response}.
+ */
+ @ServiceMethod(returns = ReturnType.SINGLE)
+ public Response getAudioTranslationAsPlainTextWithResponse(
+ String modelId, BinaryData audioTranslationOptions, RequestOptions requestOptions) {
+ final String accept = "application/json";
+ return service.getAudioTranslationAsPlainTextSync(
+ OPEN_AI_ENDPOINT,
+ accept,
+ audioTranslationOptions,
+ requestOptions,
+ Context.NONE);
+ }
}
diff --git a/sdk/openai/azure-ai-openai/src/main/java/com/azure/ai/openai/implementation/OpenAIClientImpl.java b/sdk/openai/azure-ai-openai/src/main/java/com/azure/ai/openai/implementation/OpenAIClientImpl.java
index d74f569bcc04..73beb5f1faa2 100644
--- a/sdk/openai/azure-ai-openai/src/main/java/com/azure/ai/openai/implementation/OpenAIClientImpl.java
+++ b/sdk/openai/azure-ai-openai/src/main/java/com/azure/ai/openai/implementation/OpenAIClientImpl.java
@@ -360,6 +360,182 @@ Response beginAzureBatchImageGenerationSync(
@BodyParam("application/json") BinaryData imageGenerationOptions,
RequestOptions requestOptions,
Context context);
+
+ @Post("/deployments/{deploymentId}/audio/transcriptions")
+ @ExpectedResponses({200})
+ @UnexpectedResponseExceptionType(
+ value = ClientAuthenticationException.class,
+ code = {401})
+ @UnexpectedResponseExceptionType(
+ value = ResourceNotFoundException.class,
+ code = {404})
+ @UnexpectedResponseExceptionType(
+ value = ResourceModifiedException.class,
+ code = {409})
+ @UnexpectedResponseExceptionType(HttpResponseException.class)
+ Mono> getAudioTranscriptionAsPlainText(
+ @HostParam("endpoint") String endpoint,
+ @QueryParam("api-version") String apiVersion,
+ @PathParam("deploymentId") String deploymentOrModelName,
+ @HeaderParam("accept") String accept,
+ @BodyParam("application/json") BinaryData audioTranscriptionOptions,
+ RequestOptions requestOptions,
+ Context context);
+
+ @Post("/deployments/{deploymentId}/audio/transcriptions")
+ @ExpectedResponses({200})
+ @UnexpectedResponseExceptionType(
+ value = ClientAuthenticationException.class,
+ code = {401})
+ @UnexpectedResponseExceptionType(
+ value = ResourceNotFoundException.class,
+ code = {404})
+ @UnexpectedResponseExceptionType(
+ value = ResourceModifiedException.class,
+ code = {409})
+ @UnexpectedResponseExceptionType(HttpResponseException.class)
+ Response getAudioTranscriptionAsPlainTextSync(
+ @HostParam("endpoint") String endpoint,
+ @QueryParam("api-version") String apiVersion,
+ @PathParam("deploymentId") String deploymentOrModelName,
+ @HeaderParam("accept") String accept,
+ @BodyParam("application/json") BinaryData audioTranscriptionOptions,
+ RequestOptions requestOptions,
+ Context context);
+
+ // @Multipart not supported by RestProxy
+ @Post("/deployments/{deploymentId}/audio/transcriptions")
+ @ExpectedResponses({200})
+ @UnexpectedResponseExceptionType(
+ value = ClientAuthenticationException.class,
+ code = {401})
+ @UnexpectedResponseExceptionType(
+ value = ResourceNotFoundException.class,
+ code = {404})
+ @UnexpectedResponseExceptionType(
+ value = ResourceModifiedException.class,
+ code = {409})
+ @UnexpectedResponseExceptionType(HttpResponseException.class)
+ Mono> getAudioTranscriptionAsResponseObject(
+ @HostParam("endpoint") String endpoint,
+ @QueryParam("api-version") String apiVersion,
+ @PathParam("deploymentId") String deploymentOrModelName,
+ @HeaderParam("content-type") String contentType,
+ @HeaderParam("accept") String accept,
+ @BodyParam("multipart/form-data") BinaryData audioTranscriptionOptions,
+ RequestOptions requestOptions,
+ Context context);
+
+ // @Multipart not supported by RestProxy
+ @Post("/deployments/{deploymentId}/audio/transcriptions")
+ @ExpectedResponses({200})
+ @UnexpectedResponseExceptionType(
+ value = ClientAuthenticationException.class,
+ code = {401})
+ @UnexpectedResponseExceptionType(
+ value = ResourceNotFoundException.class,
+ code = {404})
+ @UnexpectedResponseExceptionType(
+ value = ResourceModifiedException.class,
+ code = {409})
+ @UnexpectedResponseExceptionType(HttpResponseException.class)
+ Response getAudioTranscriptionAsResponseObjectSync(
+ @HostParam("endpoint") String endpoint,
+ @QueryParam("api-version") String apiVersion,
+ @PathParam("deploymentId") String deploymentOrModelName,
+ @HeaderParam("content-type") String contentType,
+ @HeaderParam("accept") String accept,
+ @BodyParam("multipart/form-data") BinaryData audioTranscriptionOptions,
+ RequestOptions requestOptions,
+ Context context);
+
+ @Post("/deployments/{deploymentId}/audio/translations")
+ @ExpectedResponses({200})
+ @UnexpectedResponseExceptionType(
+ value = ClientAuthenticationException.class,
+ code = {401})
+ @UnexpectedResponseExceptionType(
+ value = ResourceNotFoundException.class,
+ code = {404})
+ @UnexpectedResponseExceptionType(
+ value = ResourceModifiedException.class,
+ code = {409})
+ @UnexpectedResponseExceptionType(HttpResponseException.class)
+ Mono> getAudioTranslationAsPlainText(
+ @HostParam("endpoint") String endpoint,
+ @QueryParam("api-version") String apiVersion,
+ @PathParam("deploymentId") String deploymentOrModelName,
+ @HeaderParam("accept") String accept,
+ @BodyParam("application/json") BinaryData audioTranslationOptions,
+ RequestOptions requestOptions,
+ Context context);
+
+ @Post("/deployments/{deploymentId}/audio/translations")
+ @ExpectedResponses({200})
+ @UnexpectedResponseExceptionType(
+ value = ClientAuthenticationException.class,
+ code = {401})
+ @UnexpectedResponseExceptionType(
+ value = ResourceNotFoundException.class,
+ code = {404})
+ @UnexpectedResponseExceptionType(
+ value = ResourceModifiedException.class,
+ code = {409})
+ @UnexpectedResponseExceptionType(HttpResponseException.class)
+ Response getAudioTranslationAsPlainTextSync(
+ @HostParam("endpoint") String endpoint,
+ @QueryParam("api-version") String apiVersion,
+ @PathParam("deploymentId") String deploymentOrModelName,
+ @HeaderParam("accept") String accept,
+ @BodyParam("application/json") BinaryData audioTranslationOptions,
+ RequestOptions requestOptions,
+ Context context);
+
+ // @Multipart not supported by RestProxy
+ @Post("/deployments/{deploymentId}/audio/translations")
+ @ExpectedResponses({200})
+ @UnexpectedResponseExceptionType(
+ value = ClientAuthenticationException.class,
+ code = {401})
+ @UnexpectedResponseExceptionType(
+ value = ResourceNotFoundException.class,
+ code = {404})
+ @UnexpectedResponseExceptionType(
+ value = ResourceModifiedException.class,
+ code = {409})
+ @UnexpectedResponseExceptionType(HttpResponseException.class)
+ Mono> getAudioTranslationAsResponseObject(
+ @HostParam("endpoint") String endpoint,
+ @QueryParam("api-version") String apiVersion,
+ @PathParam("deploymentId") String deploymentOrModelName,
+ @HeaderParam("content-type") String contentType,
+ @HeaderParam("accept") String accept,
+ @BodyParam("multipart/form-data") BinaryData audioTranslationOptions,
+ RequestOptions requestOptions,
+ Context context);
+
+ // @Multipart not supported by RestProxy
+ @Post("/deployments/{deploymentId}/audio/translations")
+ @ExpectedResponses({200})
+ @UnexpectedResponseExceptionType(
+ value = ClientAuthenticationException.class,
+ code = {401})
+ @UnexpectedResponseExceptionType(
+ value = ResourceNotFoundException.class,
+ code = {404})
+ @UnexpectedResponseExceptionType(
+ value = ResourceModifiedException.class,
+ code = {409})
+ @UnexpectedResponseExceptionType(HttpResponseException.class)
+ Response getAudioTranslationAsResponseObjectSync(
+ @HostParam("endpoint") String endpoint,
+ @QueryParam("api-version") String apiVersion,
+ @PathParam("deploymentId") String deploymentOrModelName,
+ @HeaderParam("content-type") String contentType,
+ @HeaderParam("accept") String accept,
+ @BodyParam("multipart/form-data") BinaryData audioTranslationOptions,
+ RequestOptions requestOptions,
+ Context context);
}
/**
@@ -537,6 +713,18 @@ public Response getEmbeddingsWithResponse(
* violence (Optional): (recursive schema, see violence above)
* hate (Optional): (recursive schema, see hate above)
* self_harm (Optional): (recursive schema, see self_harm above)
+ * error (Optional): {
+ * code: String (Required)
+ * message: String (Required)
+ * target: String (Optional)
+ * details (Optional): [
+ * (recursive schema, see above)
+ * ]
+ * innererror (Optional): {
+ * code: String (Optional)
+ * innererror (Optional): (recursive schema, see innererror above)
+ * }
+ * }
* }
* }
* ]
@@ -650,6 +838,18 @@ public Mono> getCompletionsWithResponseAsync(
* violence (Optional): (recursive schema, see violence above)
* hate (Optional): (recursive schema, see hate above)
* self_harm (Optional): (recursive schema, see self_harm above)
+ * error (Optional): {
+ * code: String (Required)
+ * message: String (Required)
+ * target: String (Optional)
+ * details (Optional): [
+ * (recursive schema, see above)
+ * ]
+ * innererror (Optional): {
+ * code: String (Optional)
+ * innererror (Optional): (recursive schema, see innererror above)
+ * }
+ * }
* }
* }
* ]
@@ -800,6 +1000,18 @@ public Response getCompletionsWithResponse(
* violence (Optional): (recursive schema, see violence above)
* hate (Optional): (recursive schema, see hate above)
* self_harm (Optional): (recursive schema, see self_harm above)
+ * error (Optional): {
+ * code: String (Required)
+ * message: String (Required)
+ * target: String (Optional)
+ * details (Optional): [
+ * (recursive schema, see above)
+ * ]
+ * innererror (Optional): {
+ * code: String (Optional)
+ * innererror (Optional): (recursive schema, see innererror above)
+ * }
+ * }
* }
* }
* ]
@@ -935,6 +1147,18 @@ public Mono> getChatCompletionsWithResponseAsync(
* violence (Optional): (recursive schema, see violence above)
* hate (Optional): (recursive schema, see hate above)
* self_harm (Optional): (recursive schema, see self_harm above)
+ * error (Optional): {
+ * code: String (Required)
+ * message: String (Required)
+ * target: String (Optional)
+ * details (Optional): [
+ * (recursive schema, see above)
+ * ]
+ * innererror (Optional): {
+ * code: String (Optional)
+ * innererror (Optional): (recursive schema, see innererror above)
+ * }
+ * }
* }
* }
* ]
@@ -1068,6 +1292,18 @@ public Response getChatCompletionsWithResponse(
* violence (Optional): (recursive schema, see violence above)
* hate (Optional): (recursive schema, see hate above)
* self_harm (Optional): (recursive schema, see self_harm above)
+ * error (Optional): {
+ * code: String (Required)
+ * message: String (Required)
+ * target: String (Optional)
+ * details (Optional): [
+ * (recursive schema, see above)
+ * ]
+ * innererror (Optional): {
+ * code: String (Optional)
+ * innererror (Optional): (recursive schema, see innererror above)
+ * }
+ * }
* }
* }
* ]
@@ -1204,6 +1440,18 @@ public Mono> getChatCompletionsWithAzureExtensionsWithRespo
* violence (Optional): (recursive schema, see violence above)
* hate (Optional): (recursive schema, see hate above)
* self_harm (Optional): (recursive schema, see self_harm above)
+ * error (Optional): {
+ * code: String (Required)
+ * message: String (Required)
+ * target: String (Optional)
+ * details (Optional): [
+ * (recursive schema, see above)
+ * ]
+ * innererror (Optional): {
+ * code: String (Optional)
+ * innererror (Optional): (recursive schema, see innererror above)
+ * }
+ * }
* }
* }
* ]
@@ -1509,4 +1757,478 @@ public SyncPoller beginBeginAzureBatchImageGeneration(
TypeReference.createInstance(BinaryData.class),
TypeReference.createInstance(BinaryData.class));
}
+
+ /**
+ * Gets transcribed text and associated metadata from provided spoken audio data. Audio will be transcribed in the
+ * written language corresponding to the language it was spoken in.
+ *
+ * Request Body Schema
+ *
+ *
{@code
+ * {
+ * file: byte[] (Required)
+ * response_format: String(json/verbose_json/text/srt/vtt) (Optional)
+ * language: String (Optional)
+ * prompt: String (Optional)
+ * temperature: Double (Optional)
+ * model: String (Optional)
+ * }
+ * }
+ *
+ * Response Body Schema
+ *
+ *
{@code
+ * String
+ * }
+ *
+ * @param deploymentOrModelName Specifies either the model deployment name (when using Azure OpenAI) or model name
+ * (when using non-Azure OpenAI) to use for this request.
+ * @param audioTranscriptionOptions The configuration information for an audio transcription request.
+ * @param requestOptions The options to configure the HTTP request before HTTP client sends it.
+ * @throws HttpResponseException thrown if the request is rejected by server.
+ * @throws ClientAuthenticationException thrown if the request is rejected by server on status code 401.
+ * @throws ResourceNotFoundException thrown if the request is rejected by server on status code 404.
+ * @throws ResourceModifiedException thrown if the request is rejected by server on status code 409.
+ * @return transcribed text and associated metadata from provided spoken audio data along with {@link Response} on
+ * successful completion of {@link Mono}.
+ */
+ @ServiceMethod(returns = ReturnType.SINGLE)
+ public Mono> getAudioTranscriptionAsPlainTextWithResponseAsync(
+ String deploymentOrModelName, BinaryData audioTranscriptionOptions, RequestOptions requestOptions) {
+ final String accept = "application/json";
+ return FluxUtil.withContext(
+ context ->
+ service.getAudioTranscriptionAsPlainText(
+ this.getEndpoint(),
+ this.getServiceVersion().getVersion(),
+ deploymentOrModelName,
+ accept,
+ audioTranscriptionOptions,
+ requestOptions,
+ context));
+ }
+
+ /**
+ * Gets transcribed text and associated metadata from provided spoken audio data. Audio will be transcribed in the
+ * written language corresponding to the language it was spoken in.
+ *
+ * Request Body Schema
+ *
+ *
{@code
+ * {
+ * file: byte[] (Required)
+ * response_format: String(json/verbose_json/text/srt/vtt) (Optional)
+ * language: String (Optional)
+ * prompt: String (Optional)
+ * temperature: Double (Optional)
+ * model: String (Optional)
+ * }
+ * }
+ *
+ * Response Body Schema
+ *
+ *
{@code
+ * String
+ * }
+ *
+ * @param deploymentOrModelName Specifies either the model deployment name (when using Azure OpenAI) or model name
+ * (when using non-Azure OpenAI) to use for this request.
+ * @param audioTranscriptionOptions The configuration information for an audio transcription request.
+ * @param requestOptions The options to configure the HTTP request before HTTP client sends it.
+ * @throws HttpResponseException thrown if the request is rejected by server.
+ * @throws ClientAuthenticationException thrown if the request is rejected by server on status code 401.
+ * @throws ResourceNotFoundException thrown if the request is rejected by server on status code 404.
+ * @throws ResourceModifiedException thrown if the request is rejected by server on status code 409.
+ * @return transcribed text and associated metadata from provided spoken audio data along with {@link Response}.
+ */
+ @ServiceMethod(returns = ReturnType.SINGLE)
+ public Response getAudioTranscriptionAsPlainTextWithResponse(
+ String deploymentOrModelName, BinaryData audioTranscriptionOptions, RequestOptions requestOptions) {
+ final String accept = "application/json";
+ return service.getAudioTranscriptionAsPlainTextSync(
+ this.getEndpoint(),
+ this.getServiceVersion().getVersion(),
+ deploymentOrModelName,
+ accept,
+ audioTranscriptionOptions,
+ requestOptions,
+ Context.NONE);
+ }
+
+ /**
+ * Gets transcribed text and associated metadata from provided spoken audio data. Audio will be transcribed in the
+ * written language corresponding to the language it was spoken in.
+ *
+ * Request Body Schema
+ *
+ *
{@code
+ * {
+ * file: byte[] (Required)
+ * response_format: String(json/verbose_json/text/srt/vtt) (Optional)
+ * language: String (Optional)
+ * prompt: String (Optional)
+ * temperature: Double (Optional)
+ * model: String (Optional)
+ * }
+ * }
+ *
+ * Response Body Schema
+ *
+ *
{@code
+ * {
+ * text: String (Required)
+ * task: String(transcribe/translate) (Optional)
+ * language: String (Optional)
+ * duration: Double (Optional)
+ * segments (Optional): [
+ * (Optional){
+ * id: int (Required)
+ * start: double (Required)
+ * end: double (Required)
+ * text: String (Required)
+ * temperature: double (Required)
+ * avg_logprob: double (Required)
+ * compression_ratio: double (Required)
+ * no_speech_prob: double (Required)
+ * tokens (Required): [
+ * int (Required)
+ * ]
+ * seek: int (Required)
+ * }
+ * ]
+ * }
+ * }
+ *
+ * @param deploymentOrModelName Specifies either the model deployment name (when using Azure OpenAI) or model name
+ * (when using non-Azure OpenAI) to use for this request.
+ * @param audioTranscriptionOptions The configuration information for an audio transcription request.
+ * @param requestOptions The options to configure the HTTP request before HTTP client sends it.
+ * @throws HttpResponseException thrown if the request is rejected by server.
+ * @throws ClientAuthenticationException thrown if the request is rejected by server on status code 401.
+ * @throws ResourceNotFoundException thrown if the request is rejected by server on status code 404.
+ * @throws ResourceModifiedException thrown if the request is rejected by server on status code 409.
+ * @return transcribed text and associated metadata from provided spoken audio data along with {@link Response} on
+ * successful completion of {@link Mono}.
+ */
+ @ServiceMethod(returns = ReturnType.SINGLE)
+ public Mono> getAudioTranscriptionAsResponseObjectWithResponseAsync(
+ String deploymentOrModelName, BinaryData audioTranscriptionOptions, RequestOptions requestOptions) {
+ final String contentType = "multipart/form-data";
+ final String accept = "application/json";
+ return FluxUtil.withContext(
+ context ->
+ service.getAudioTranscriptionAsResponseObject(
+ this.getEndpoint(),
+ this.getServiceVersion().getVersion(),
+ deploymentOrModelName,
+ contentType,
+ accept,
+ audioTranscriptionOptions,
+ requestOptions,
+ context));
+ }
+
+ /**
+ * Gets transcribed text and associated metadata from provided spoken audio data. Audio will be transcribed in the
+ * written language corresponding to the language it was spoken in.
+ *
+ * Request Body Schema
+ *
+ *
{@code
+ * {
+ * file: byte[] (Required)
+ * response_format: String(json/verbose_json/text/srt/vtt) (Optional)
+ * language: String (Optional)
+ * prompt: String (Optional)
+ * temperature: Double (Optional)
+ * model: String (Optional)
+ * }
+ * }
+ *
+ * Response Body Schema
+ *
+ *
{@code
+ * {
+ * text: String (Required)
+ * task: String(transcribe/translate) (Optional)
+ * language: String (Optional)
+ * duration: Double (Optional)
+ * segments (Optional): [
+ * (Optional){
+ * id: int (Required)
+ * start: double (Required)
+ * end: double (Required)
+ * text: String (Required)
+ * temperature: double (Required)
+ * avg_logprob: double (Required)
+ * compression_ratio: double (Required)
+ * no_speech_prob: double (Required)
+ * tokens (Required): [
+ * int (Required)
+ * ]
+ * seek: int (Required)
+ * }
+ * ]
+ * }
+ * }
+ *
+ * @param deploymentOrModelName Specifies either the model deployment name (when using Azure OpenAI) or model name
+ * (when using non-Azure OpenAI) to use for this request.
+ * @param audioTranscriptionOptions The configuration information for an audio transcription request.
+ * @param requestOptions The options to configure the HTTP request before HTTP client sends it.
+ * @throws HttpResponseException thrown if the request is rejected by server.
+ * @throws ClientAuthenticationException thrown if the request is rejected by server on status code 401.
+ * @throws ResourceNotFoundException thrown if the request is rejected by server on status code 404.
+ * @throws ResourceModifiedException thrown if the request is rejected by server on status code 409.
+ * @return transcribed text and associated metadata from provided spoken audio data along with {@link Response}.
+ */
+ @ServiceMethod(returns = ReturnType.SINGLE)
+ public Response getAudioTranscriptionAsResponseObjectWithResponse(
+ String deploymentOrModelName, BinaryData audioTranscriptionOptions, RequestOptions requestOptions) {
+ final String contentType = "multipart/form-data";
+ final String accept = "application/json";
+ return service.getAudioTranscriptionAsResponseObjectSync(
+ this.getEndpoint(),
+ this.getServiceVersion().getVersion(),
+ deploymentOrModelName,
+ contentType,
+ accept,
+ audioTranscriptionOptions,
+ requestOptions,
+ Context.NONE);
+ }
+
+ /**
+ * Gets English language transcribed text and associated metadata from provided spoken audio data.
+ *
+ * Request Body Schema
+ *
+ *
{@code
+ * {
+ * file: byte[] (Required)
+ * response_format: String(json/verbose_json/text/srt/vtt) (Optional)
+ * prompt: String (Optional)
+ * temperature: Double (Optional)
+ * model: String (Optional)
+ * }
+ * }
+ *
+ * Response Body Schema
+ *
+ *
{@code
+ * String
+ * }
+ *
+ * @param deploymentOrModelName Specifies either the model deployment name (when using Azure OpenAI) or model name
+ * (when using non-Azure OpenAI) to use for this request.
+ * @param audioTranslationOptions The configuration information for an audio translation request.
+ * @param requestOptions The options to configure the HTTP request before HTTP client sends it.
+ * @throws HttpResponseException thrown if the request is rejected by server.
+ * @throws ClientAuthenticationException thrown if the request is rejected by server on status code 401.
+ * @throws ResourceNotFoundException thrown if the request is rejected by server on status code 404.
+ * @throws ResourceModifiedException thrown if the request is rejected by server on status code 409.
+ * @return english language transcribed text and associated metadata from provided spoken audio data along with
+ * {@link Response} on successful completion of {@link Mono}.
+ */
+ @ServiceMethod(returns = ReturnType.SINGLE)
+ public Mono> getAudioTranslationAsPlainTextWithResponseAsync(
+ String deploymentOrModelName, BinaryData audioTranslationOptions, RequestOptions requestOptions) {
+ final String accept = "application/json";
+ return FluxUtil.withContext(
+ context ->
+ service.getAudioTranslationAsPlainText(
+ this.getEndpoint(),
+ this.getServiceVersion().getVersion(),
+ deploymentOrModelName,
+ accept,
+ audioTranslationOptions,
+ requestOptions,
+ context));
+ }
+
+ /**
+ * Gets English language transcribed text and associated metadata from provided spoken audio data.
+ *
+ * Request Body Schema
+ *
+ *
{@code
+ * {
+ * file: byte[] (Required)
+ * response_format: String(json/verbose_json/text/srt/vtt) (Optional)
+ * prompt: String (Optional)
+ * temperature: Double (Optional)
+ * model: String (Optional)
+ * }
+ * }
+ *
+ * Response Body Schema
+ *
+ *
{@code
+ * String
+ * }
+ *
+ * @param deploymentOrModelName Specifies either the model deployment name (when using Azure OpenAI) or model name
+ * (when using non-Azure OpenAI) to use for this request.
+ * @param audioTranslationOptions The configuration information for an audio translation request.
+ * @param requestOptions The options to configure the HTTP request before HTTP client sends it.
+ * @throws HttpResponseException thrown if the request is rejected by server.
+ * @throws ClientAuthenticationException thrown if the request is rejected by server on status code 401.
+ * @throws ResourceNotFoundException thrown if the request is rejected by server on status code 404.
+ * @throws ResourceModifiedException thrown if the request is rejected by server on status code 409.
+ * @return english language transcribed text and associated metadata from provided spoken audio data along with
+ * {@link Response}.
+ */
+ @ServiceMethod(returns = ReturnType.SINGLE)
+ public Response getAudioTranslationAsPlainTextWithResponse(
+ String deploymentOrModelName, BinaryData audioTranslationOptions, RequestOptions requestOptions) {
+ final String accept = "application/json";
+ return service.getAudioTranslationAsPlainTextSync(
+ this.getEndpoint(),
+ this.getServiceVersion().getVersion(),
+ deploymentOrModelName,
+ accept,
+ audioTranslationOptions,
+ requestOptions,
+ Context.NONE);
+ }
+
+ /**
+ * Gets English language transcribed text and associated metadata from provided spoken audio data.
+ *
+ * Request Body Schema
+ *
+ *
{@code
+ * {
+ * file: byte[] (Required)
+ * response_format: String(json/verbose_json/text/srt/vtt) (Optional)
+ * prompt: String (Optional)
+ * temperature: Double (Optional)
+ * model: String (Optional)
+ * }
+ * }
+ *
+ * Response Body Schema
+ *
+ *
{@code
+ * {
+ * text: String (Required)
+ * task: String(transcribe/translate) (Optional)
+ * language: String (Optional)
+ * duration: Double (Optional)
+ * segments (Optional): [
+ * (Optional){
+ * id: int (Required)
+ * start: double (Required)
+ * end: double (Required)
+ * text: String (Required)
+ * temperature: double (Required)
+ * avg_logprob: double (Required)
+ * compression_ratio: double (Required)
+ * no_speech_prob: double (Required)
+ * tokens (Required): [
+ * int (Required)
+ * ]
+ * seek: int (Required)
+ * }
+ * ]
+ * }
+ * }
+ *
+ * @param deploymentOrModelName Specifies either the model deployment name (when using Azure OpenAI) or model name
+ * (when using non-Azure OpenAI) to use for this request.
+ * @param audioTranslationOptions The configuration information for an audio translation request.
+ * @param requestOptions The options to configure the HTTP request before HTTP client sends it.
+ * @throws HttpResponseException thrown if the request is rejected by server.
+ * @throws ClientAuthenticationException thrown if the request is rejected by server on status code 401.
+ * @throws ResourceNotFoundException thrown if the request is rejected by server on status code 404.
+ * @throws ResourceModifiedException thrown if the request is rejected by server on status code 409.
+ * @return english language transcribed text and associated metadata from provided spoken audio data along with
+ * {@link Response} on successful completion of {@link Mono}.
+ */
+ @ServiceMethod(returns = ReturnType.SINGLE)
+ public Mono> getAudioTranslationAsResponseObjectWithResponseAsync(
+ String deploymentOrModelName, BinaryData audioTranslationOptions, RequestOptions requestOptions) {
+ final String contentType = "multipart/form-data";
+ final String accept = "application/json";
+ return FluxUtil.withContext(
+ context ->
+ service.getAudioTranslationAsResponseObject(
+ this.getEndpoint(),
+ this.getServiceVersion().getVersion(),
+ deploymentOrModelName,
+ contentType,
+ accept,
+ audioTranslationOptions,
+ requestOptions,
+ context));
+ }
+
+ /**
+ * Gets English language transcribed text and associated metadata from provided spoken audio data.
+ *
+ * Request Body Schema
+ *
+ *
{@code
+ * {
+ * file: byte[] (Required)
+ * response_format: String(json/verbose_json/text/srt/vtt) (Optional)
+ * prompt: String (Optional)
+ * temperature: Double (Optional)
+ * model: String (Optional)
+ * }
+ * }
+ *
+ * Response Body Schema
+ *
+ *
{@code
+ * {
+ * text: String (Required)
+ * task: String(transcribe/translate) (Optional)
+ * language: String (Optional)
+ * duration: Double (Optional)
+ * segments (Optional): [
+ * (Optional){
+ * id: int (Required)
+ * start: double (Required)
+ * end: double (Required)
+ * text: String (Required)
+ * temperature: double (Required)
+ * avg_logprob: double (Required)
+ * compression_ratio: double (Required)
+ * no_speech_prob: double (Required)
+ * tokens (Required): [
+ * int (Required)
+ * ]
+ * seek: int (Required)
+ * }
+ * ]
+ * }
+ * }
+ *
+ * @param deploymentOrModelName Specifies either the model deployment name (when using Azure OpenAI) or model name
+ * (when using non-Azure OpenAI) to use for this request.
+ * @param audioTranslationOptions The configuration information for an audio translation request.
+ * @param requestOptions The options to configure the HTTP request before HTTP client sends it.
+ * @throws HttpResponseException thrown if the request is rejected by server.
+ * @throws ClientAuthenticationException thrown if the request is rejected by server on status code 401.
+ * @throws ResourceNotFoundException thrown if the request is rejected by server on status code 404.
+ * @throws ResourceModifiedException thrown if the request is rejected by server on status code 409.
+ * @return english language transcribed text and associated metadata from provided spoken audio data along with
+ * {@link Response}.
+ */
+ @ServiceMethod(returns = ReturnType.SINGLE)
+ public Response getAudioTranslationAsResponseObjectWithResponse(
+ String deploymentOrModelName, BinaryData audioTranslationOptions, RequestOptions requestOptions) {
+ final String contentType = "multipart/form-data";
+ final String accept = "application/json";
+ return service.getAudioTranslationAsResponseObjectSync(
+ this.getEndpoint(),
+ this.getServiceVersion().getVersion(),
+ deploymentOrModelName,
+ contentType,
+ accept,
+ audioTranslationOptions,
+ requestOptions,
+ Context.NONE);
+ }
}
diff --git a/sdk/openai/azure-ai-openai/src/main/java/com/azure/ai/openai/models/AudioTaskLabel.java b/sdk/openai/azure-ai-openai/src/main/java/com/azure/ai/openai/models/AudioTaskLabel.java
new file mode 100644
index 000000000000..36f8361ad2a4
--- /dev/null
+++ b/sdk/openai/azure-ai-openai/src/main/java/com/azure/ai/openai/models/AudioTaskLabel.java
@@ -0,0 +1,50 @@
+// Copyright (c) Microsoft Corporation. All rights reserved.
+// Licensed under the MIT License.
+// Code generated by Microsoft (R) AutoRest Code Generator.
+package com.azure.ai.openai.models;
+
+import com.azure.core.annotation.Generated;
+import com.azure.core.util.ExpandableStringEnum;
+import com.fasterxml.jackson.annotation.JsonCreator;
+import java.util.Collection;
+
+/** Defines the possible descriptors for available audio operation responses. */
+public final class AudioTaskLabel extends ExpandableStringEnum {
+
+ /** Accompanying response data resulted from an audio transcription task. */
+ @Generated public static final AudioTaskLabel TRANSCRIBE = fromString("transcribe");
+
+ /** Accompanying response data resulted from an audio translation task. */
+ @Generated public static final AudioTaskLabel TRANSLATE = fromString("translate");
+
+ /**
+ * Creates a new instance of AudioTaskLabel value.
+ *
+ * @deprecated Use the {@link #fromString(String)} factory method.
+ */
+ @Generated
+ @Deprecated
+ public AudioTaskLabel() {}
+
+ /**
+ * Creates or finds a AudioTaskLabel from its string representation.
+ *
+ * @param name a name to look for.
+ * @return the corresponding AudioTaskLabel.
+ */
+ @Generated
+ @JsonCreator
+ public static AudioTaskLabel fromString(String name) {
+ return fromString(name, AudioTaskLabel.class);
+ }
+
+ /**
+ * Gets known AudioTaskLabel values.
+ *
+ * @return known AudioTaskLabel values.
+ */
+ @Generated
+ public static Collection values() {
+ return values(AudioTaskLabel.class);
+ }
+}
diff --git a/sdk/openai/azure-ai-openai/src/main/java/com/azure/ai/openai/models/AudioTranscription.java b/sdk/openai/azure-ai-openai/src/main/java/com/azure/ai/openai/models/AudioTranscription.java
new file mode 100644
index 000000000000..8d7b085ce8af
--- /dev/null
+++ b/sdk/openai/azure-ai-openai/src/main/java/com/azure/ai/openai/models/AudioTranscription.java
@@ -0,0 +1,119 @@
+// Copyright (c) Microsoft Corporation. All rights reserved.
+// Licensed under the MIT License.
+// Code generated by Microsoft (R) AutoRest Code Generator.
+package com.azure.ai.openai.models;
+
+import com.azure.core.annotation.Generated;
+import com.azure.core.annotation.Immutable;
+import com.fasterxml.jackson.annotation.JsonCreator;
+import com.fasterxml.jackson.annotation.JsonProperty;
+import java.time.Duration;
+import java.util.List;
+
+/** Result information for an operation that transcribed spoken audio into written text. */
+@Immutable
+public final class AudioTranscription {
+
+ /*
+ * The transcribed text for the provided audio data.
+ */
+ @Generated
+ @JsonProperty(value = "text")
+ private String text;
+
+ /*
+ * The label that describes which operation type generated the accompanying response data.
+ */
+ @Generated
+ @JsonProperty(value = "task")
+ private AudioTaskLabel task;
+
+ /*
+ * The spoken language that was detected in the transcribed audio data.
+ * This is expressed as a two-letter ISO-639-1 language code like 'en' or 'fr'.
+ */
+ @Generated
+ @JsonProperty(value = "language")
+ private String language;
+
+ /*
+ * The total duration of the audio processed to produce accompanying transcription information.
+ */
+ @Generated
+ @JsonProperty(value = "duration")
+ private Double duration;
+
+ /*
+ * A collection of information about the timing, probabilities, and other detail of each processed audio segment.
+ */
+ @Generated
+ @JsonProperty(value = "segments")
+ private List segments;
+
+ /**
+ * Creates an instance of AudioTranscription class.
+ *
+ * @param text the text value to set.
+ */
+ @Generated
+ @JsonCreator
+ private AudioTranscription(@JsonProperty(value = "text") String text) {
+ this.text = text;
+ }
+
+ /**
+ * Get the text property: The transcribed text for the provided audio data.
+ *
+ * @return the text value.
+ */
+ @Generated
+ public String getText() {
+ return this.text;
+ }
+
+ /**
+ * Get the task property: The label that describes which operation type generated the accompanying response data.
+ *
+ * @return the task value.
+ */
+ @Generated
+ public AudioTaskLabel getTask() {
+ return this.task;
+ }
+
+ /**
+ * Get the language property: The spoken language that was detected in the transcribed audio data. This is expressed
+ * as a two-letter ISO-639-1 language code like 'en' or 'fr'.
+ *
+ * @return the language value.
+ */
+ @Generated
+ public String getLanguage() {
+ return this.language;
+ }
+
+ /**
+ * Get the duration property: The total duration of the audio processed to produce accompanying transcription
+ * information.
+ *
+ * @return the duration value.
+ */
+ @Generated
+ public Duration getDuration() {
+ if (this.duration == null) {
+ return null;
+ }
+ return Duration.ofNanos((long) (this.duration * 1000_000_000L));
+ }
+
+ /**
+ * Get the segments property: A collection of information about the timing, probabilities, and other detail of each
+ * processed audio segment.
+ *
+ * @return the segments value.
+ */
+ @Generated
+ public List getSegments() {
+ return this.segments;
+ }
+}
diff --git a/sdk/openai/azure-ai-openai/src/main/java/com/azure/ai/openai/models/AudioTranscriptionFormat.java b/sdk/openai/azure-ai-openai/src/main/java/com/azure/ai/openai/models/AudioTranscriptionFormat.java
new file mode 100644
index 000000000000..8429c748e7ca
--- /dev/null
+++ b/sdk/openai/azure-ai-openai/src/main/java/com/azure/ai/openai/models/AudioTranscriptionFormat.java
@@ -0,0 +1,65 @@
+// Copyright (c) Microsoft Corporation. All rights reserved.
+// Licensed under the MIT License.
+// Code generated by Microsoft (R) AutoRest Code Generator.
+package com.azure.ai.openai.models;
+
+import com.azure.core.annotation.Generated;
+import com.azure.core.util.ExpandableStringEnum;
+import com.fasterxml.jackson.annotation.JsonCreator;
+import java.util.Collection;
+
+/** Defines available options for the underlying response format of output transcription information. */
+public final class AudioTranscriptionFormat extends ExpandableStringEnum {
+
+ /** Use a response body that is a JSON object containing a single 'text' field for the transcription. */
+ @Generated public static final AudioTranscriptionFormat JSON = fromString("json");
+
+ /**
+ * Use a response body that is a JSON object containing transcription text along with timing, segments, and other
+ * metadata.
+ */
+ @Generated public static final AudioTranscriptionFormat VERBOSE_JSON = fromString("verbose_json");
+
+ /** Use a response body that is plain text containing the raw, unannotated transcription. */
+ @Generated public static final AudioTranscriptionFormat TEXT = fromString("text");
+
+ /** Use a response body that is plain text in SubRip (SRT) format that also includes timing information. */
+ @Generated public static final AudioTranscriptionFormat SRT = fromString("srt");
+
+ /**
+ * Use a response body that is plain text in Web Video Text Tracks (VTT) format that also includes timing
+ * information.
+ */
+ @Generated public static final AudioTranscriptionFormat VTT = fromString("vtt");
+
+ /**
+ * Creates a new instance of AudioTranscriptionFormat value.
+ *
+ * @deprecated Use the {@link #fromString(String)} factory method.
+ */
+ @Generated
+ @Deprecated
+ public AudioTranscriptionFormat() {}
+
+ /**
+ * Creates or finds a AudioTranscriptionFormat from its string representation.
+ *
+ * @param name a name to look for.
+ * @return the corresponding AudioTranscriptionFormat.
+ */
+ @Generated
+ @JsonCreator
+ public static AudioTranscriptionFormat fromString(String name) {
+ return fromString(name, AudioTranscriptionFormat.class);
+ }
+
+ /**
+ * Gets known AudioTranscriptionFormat values.
+ *
+ * @return known AudioTranscriptionFormat values.
+ */
+ @Generated
+ public static Collection values() {
+ return values(AudioTranscriptionFormat.class);
+ }
+}
diff --git a/sdk/openai/azure-ai-openai/src/main/java/com/azure/ai/openai/models/AudioTranscriptionOptions.java b/sdk/openai/azure-ai-openai/src/main/java/com/azure/ai/openai/models/AudioTranscriptionOptions.java
new file mode 100644
index 000000000000..7d72fd5ea891
--- /dev/null
+++ b/sdk/openai/azure-ai-openai/src/main/java/com/azure/ai/openai/models/AudioTranscriptionOptions.java
@@ -0,0 +1,211 @@
+// Copyright (c) Microsoft Corporation. All rights reserved.
+// Licensed under the MIT License.
+// Code generated by Microsoft (R) AutoRest Code Generator.
+package com.azure.ai.openai.models;
+
+import com.azure.core.annotation.Fluent;
+import com.azure.core.annotation.Generated;
+import com.azure.core.util.CoreUtils;
+import com.fasterxml.jackson.annotation.JsonCreator;
+import com.fasterxml.jackson.annotation.JsonProperty;
+
+/** The configuration information for an audio transcription request. */
+@Fluent
+public final class AudioTranscriptionOptions {
+
+ /*
+ * The audio data to transcribe. This must be the binary content of a file in one of the supported media formats:
+ * flac, mp3, mp4, mpeg, mpga, m4a, ogg, wav, webm.
+ */
+ @Generated
+ @JsonProperty(value = "file")
+ private byte[] file;
+
+ /*
+ * The requested format of the transcription response data, which will influence the content and detail of the
+ * result.
+ */
+ @Generated
+ @JsonProperty(value = "response_format")
+ private AudioTranscriptionFormat responseFormat;
+
+ /*
+ * The primary spoken language of the audio data to be transcribed, supplied as a two-letter ISO-639-1 language
+ * code
+ * such as 'en' or 'fr'.
+ * Providing this known input language is optional but may improve the accuracy and/or latency of transcription.
+ */
+ @Generated
+ @JsonProperty(value = "language")
+ private String language;
+
+ /*
+ * An optional hint to guide the model's style or continue from a prior audio segment. The written language of the
+ * prompt should match the primary spoken language of the audio data.
+ */
+ @Generated
+ @JsonProperty(value = "prompt")
+ private String prompt;
+
+ /*
+ * The sampling temperature, between 0 and 1.
+ * Higher values like 0.8 will make the output more random, while lower values like 0.2 will make it more focused
+ * and deterministic.
+ * If set to 0, the model will use log probability to automatically increase the temperature until certain
+ * thresholds are hit.
+ */
+ @Generated
+ @JsonProperty(value = "temperature")
+ private Double temperature;
+
+ /*
+ * The model to use for this transcription request.
+ */
+ @Generated
+ @JsonProperty(value = "model")
+ private String model;
+
+ /**
+ * Creates an instance of AudioTranscriptionOptions class.
+ *
+ * @param file the file value to set.
+ */
+ @Generated
+ @JsonCreator
+ public AudioTranscriptionOptions(@JsonProperty(value = "file") byte[] file) {
+ this.file = file;
+ }
+
+ /**
+ * Get the file property: The audio data to transcribe. This must be the binary content of a file in one of the
+ * supported media formats: flac, mp3, mp4, mpeg, mpga, m4a, ogg, wav, webm.
+ *
+ * @return the file value.
+ */
+ @Generated
+ public byte[] getFile() {
+ return CoreUtils.clone(this.file);
+ }
+
+ /**
+ * Get the responseFormat property: The requested format of the transcription response data, which will influence
+ * the content and detail of the result.
+ *
+ * @return the responseFormat value.
+ */
+ @Generated
+ public AudioTranscriptionFormat getResponseFormat() {
+ return this.responseFormat;
+ }
+
+ /**
+ * Set the responseFormat property: The requested format of the transcription response data, which will influence
+ * the content and detail of the result.
+ *
+ * @param responseFormat the responseFormat value to set.
+ * @return the AudioTranscriptionOptions object itself.
+ */
+ @Generated
+ public AudioTranscriptionOptions setResponseFormat(AudioTranscriptionFormat responseFormat) {
+ this.responseFormat = responseFormat;
+ return this;
+ }
+
+ /**
+ * Get the language property: The primary spoken language of the audio data to be transcribed, supplied as a
+ * two-letter ISO-639-1 language code such as 'en' or 'fr'. Providing this known input language is optional but may
+ * improve the accuracy and/or latency of transcription.
+ *
+ * @return the language value.
+ */
+ @Generated
+ public String getLanguage() {
+ return this.language;
+ }
+
+ /**
+ * Set the language property: The primary spoken language of the audio data to be transcribed, supplied as a
+ * two-letter ISO-639-1 language code such as 'en' or 'fr'. Providing this known input language is optional but may
+ * improve the accuracy and/or latency of transcription.
+ *
+ * @param language the language value to set.
+ * @return the AudioTranscriptionOptions object itself.
+ */
+ @Generated
+ public AudioTranscriptionOptions setLanguage(String language) {
+ this.language = language;
+ return this;
+ }
+
+ /**
+ * Get the prompt property: An optional hint to guide the model's style or continue from a prior audio segment. The
+ * written language of the prompt should match the primary spoken language of the audio data.
+ *
+ * @return the prompt value.
+ */
+ @Generated
+ public String getPrompt() {
+ return this.prompt;
+ }
+
+ /**
+ * Set the prompt property: An optional hint to guide the model's style or continue from a prior audio segment. The
+ * written language of the prompt should match the primary spoken language of the audio data.
+ *
+ * @param prompt the prompt value to set.
+ * @return the AudioTranscriptionOptions object itself.
+ */
+ @Generated
+ public AudioTranscriptionOptions setPrompt(String prompt) {
+ this.prompt = prompt;
+ return this;
+ }
+
+ /**
+ * Get the temperature property: The sampling temperature, between 0 and 1. Higher values like 0.8 will make the
+ * output more random, while lower values like 0.2 will make it more focused and deterministic. If set to 0, the
+ * model will use log probability to automatically increase the temperature until certain thresholds are hit.
+ *
+ * @return the temperature value.
+ */
+ @Generated
+ public Double getTemperature() {
+ return this.temperature;
+ }
+
+ /**
+ * Set the temperature property: The sampling temperature, between 0 and 1. Higher values like 0.8 will make the
+ * output more random, while lower values like 0.2 will make it more focused and deterministic. If set to 0, the
+ * model will use log probability to automatically increase the temperature until certain thresholds are hit.
+ *
+ * @param temperature the temperature value to set.
+ * @return the AudioTranscriptionOptions object itself.
+ */
+ @Generated
+ public AudioTranscriptionOptions setTemperature(Double temperature) {
+ this.temperature = temperature;
+ return this;
+ }
+
+ /**
+ * Get the model property: The model to use for this transcription request.
+ *
+ * @return the model value.
+ */
+ @Generated
+ public String getModel() {
+ return this.model;
+ }
+
+ /**
+ * Set the model property: The model to use for this transcription request.
+ *
+ * @param model the model value to set.
+ * @return the AudioTranscriptionOptions object itself.
+ */
+ @Generated
+ public AudioTranscriptionOptions setModel(String model) {
+ this.model = model;
+ return this;
+ }
+}
diff --git a/sdk/openai/azure-ai-openai/src/main/java/com/azure/ai/openai/models/AudioTranscriptionSegment.java b/sdk/openai/azure-ai-openai/src/main/java/com/azure/ai/openai/models/AudioTranscriptionSegment.java
new file mode 100644
index 000000000000..87e289da3b0e
--- /dev/null
+++ b/sdk/openai/azure-ai-openai/src/main/java/com/azure/ai/openai/models/AudioTranscriptionSegment.java
@@ -0,0 +1,262 @@
+// Copyright (c) Microsoft Corporation. All rights reserved.
+// Licensed under the MIT License.
+// Code generated by Microsoft (R) AutoRest Code Generator.
+package com.azure.ai.openai.models;
+
+import com.azure.core.annotation.Generated;
+import com.azure.core.annotation.Immutable;
+import com.fasterxml.jackson.annotation.JsonCreator;
+import com.fasterxml.jackson.annotation.JsonProperty;
+import java.time.Duration;
+import java.util.List;
+
+/**
+ * Extended information about a single segment of transcribed audio data. Segments generally represent roughly 5-10
+ * seconds of speech. Segment boundaries typically occur between words but not necessarily sentences.
+ */
+@Immutable
+public final class AudioTranscriptionSegment {
+
+ /*
+ * The 0-based index of this segment within a transcription.
+ */
+ @Generated
+ @JsonProperty(value = "id")
+ private int id;
+
+ /*
+ * The time at which this segment started relative to the beginning of the transcribed audio.
+ */
+ @Generated
+ @JsonProperty(value = "start")
+ private double start;
+
+ /*
+ * The time at which this segment ended relative to the beginning of the transcribed audio.
+ */
+ @Generated
+ @JsonProperty(value = "end")
+ private double end;
+
+ /*
+ * The transcribed text that was part of this audio segment.
+ */
+ @Generated
+ @JsonProperty(value = "text")
+ private String text;
+
+ /*
+ * The temperature score associated with this audio segment.
+ */
+ @Generated
+ @JsonProperty(value = "temperature")
+ private double temperature;
+
+ /*
+ * The average log probability associated with this audio segment.
+ */
+ @Generated
+ @JsonProperty(value = "avg_logprob")
+ private double avgLogprob;
+
+ /*
+ * The compression ratio of this audio segment.
+ */
+ @Generated
+ @JsonProperty(value = "compression_ratio")
+ private double compressionRatio;
+
+ /*
+ * The probability of no speech detection within this audio segment.
+ */
+ @Generated
+ @JsonProperty(value = "no_speech_prob")
+ private double noSpeechProb;
+
+ /*
+ * The token IDs matching the transcribed text in this audio segment.
+ */
+ @Generated
+ @JsonProperty(value = "tokens")
+ private List tokens;
+
+ /*
+ * The seek position associated with the processing of this audio segment.
+ * Seek positions are expressed as hundredths of seconds.
+ * The model may process several segments from a single seek position, so while the seek position will never
+ * represent
+ * a later time than the segment's start, the segment's start may represent a significantly later time than the
+ * segment's associated seek position.
+ */
+ @Generated
+ @JsonProperty(value = "seek")
+ private int seek;
+
+ /**
+ * Creates an instance of AudioTranscriptionSegment class.
+ *
+ * @param id the id value to set.
+ * @param start the start value to set.
+ * @param end the end value to set.
+ * @param text the text value to set.
+ * @param temperature the temperature value to set.
+ * @param avgLogprob the avgLogprob value to set.
+ * @param compressionRatio the compressionRatio value to set.
+ * @param noSpeechProb the noSpeechProb value to set.
+ * @param tokens the tokens value to set.
+ * @param seek the seek value to set.
+ */
+ @Generated
+ private AudioTranscriptionSegment(
+ int id,
+ Duration start,
+ Duration end,
+ String text,
+ double temperature,
+ double avgLogprob,
+ double compressionRatio,
+ double noSpeechProb,
+ List tokens,
+ int seek) {
+ this.id = id;
+ this.start = (double) start.toNanos() / 1000_000_000L;
+ this.end = (double) end.toNanos() / 1000_000_000L;
+ this.text = text;
+ this.temperature = temperature;
+ this.avgLogprob = avgLogprob;
+ this.compressionRatio = compressionRatio;
+ this.noSpeechProb = noSpeechProb;
+ this.tokens = tokens;
+ this.seek = seek;
+ }
+
+ @Generated
+ @JsonCreator
+ private AudioTranscriptionSegment(
+ @JsonProperty(value = "id") int id,
+ @JsonProperty(value = "start") double start,
+ @JsonProperty(value = "end") double end,
+ @JsonProperty(value = "text") String text,
+ @JsonProperty(value = "temperature") double temperature,
+ @JsonProperty(value = "avg_logprob") double avgLogprob,
+ @JsonProperty(value = "compression_ratio") double compressionRatio,
+ @JsonProperty(value = "no_speech_prob") double noSpeechProb,
+ @JsonProperty(value = "tokens") List tokens,
+ @JsonProperty(value = "seek") int seek) {
+ this(
+ id,
+ Duration.ofNanos((long) (start * 1000_000_000L)),
+ Duration.ofNanos((long) (end * 1000_000_000L)),
+ text,
+ temperature,
+ avgLogprob,
+ compressionRatio,
+ noSpeechProb,
+ tokens,
+ seek);
+ }
+
+ /**
+ * Get the id property: The 0-based index of this segment within a transcription.
+ *
+ * @return the id value.
+ */
+ @Generated
+ public int getId() {
+ return this.id;
+ }
+
+ /**
+ * Get the start property: The time at which this segment started relative to the beginning of the transcribed
+ * audio.
+ *
+ * @return the start value.
+ */
+ @Generated
+ public Duration getStart() {
+ return Duration.ofNanos((long) (this.start * 1000_000_000L));
+ }
+
+ /**
+ * Get the end property: The time at which this segment ended relative to the beginning of the transcribed audio.
+ *
+ * @return the end value.
+ */
+ @Generated
+ public Duration getEnd() {
+ return Duration.ofNanos((long) (this.end * 1000_000_000L));
+ }
+
+ /**
+ * Get the text property: The transcribed text that was part of this audio segment.
+ *
+ * @return the text value.
+ */
+ @Generated
+ public String getText() {
+ return this.text;
+ }
+
+ /**
+ * Get the temperature property: The temperature score associated with this audio segment.
+ *
+ * @return the temperature value.
+ */
+ @Generated
+ public double getTemperature() {
+ return this.temperature;
+ }
+
+ /**
+ * Get the avgLogprob property: The average log probability associated with this audio segment.
+ *
+ * @return the avgLogprob value.
+ */
+ @Generated
+ public double getAvgLogprob() {
+ return this.avgLogprob;
+ }
+
+ /**
+ * Get the compressionRatio property: The compression ratio of this audio segment.
+ *
+ * @return the compressionRatio value.
+ */
+ @Generated
+ public double getCompressionRatio() {
+ return this.compressionRatio;
+ }
+
+ /**
+ * Get the noSpeechProb property: The probability of no speech detection within this audio segment.
+ *
+ * @return the noSpeechProb value.
+ */
+ @Generated
+ public double getNoSpeechProb() {
+ return this.noSpeechProb;
+ }
+
+ /**
+ * Get the tokens property: The token IDs matching the transcribed text in this audio segment.
+ *
+ * @return the tokens value.
+ */
+ @Generated
+ public List getTokens() {
+ return this.tokens;
+ }
+
+ /**
+ * Get the seek property: The seek position associated with the processing of this audio segment. Seek positions are
+ * expressed as hundredths of seconds. The model may process several segments from a single seek position, so while
+ * the seek position will never represent a later time than the segment's start, the segment's start may represent a
+ * significantly later time than the segment's associated seek position.
+ *
+ * @return the seek value.
+ */
+ @Generated
+ public int getSeek() {
+ return this.seek;
+ }
+}
diff --git a/sdk/openai/azure-ai-openai/src/main/java/com/azure/ai/openai/models/AudioTranslationOptions.java b/sdk/openai/azure-ai-openai/src/main/java/com/azure/ai/openai/models/AudioTranslationOptions.java
new file mode 100644
index 000000000000..65f7b1f873ad
--- /dev/null
+++ b/sdk/openai/azure-ai-openai/src/main/java/com/azure/ai/openai/models/AudioTranslationOptions.java
@@ -0,0 +1,175 @@
+// Copyright (c) Microsoft Corporation. All rights reserved.
+// Licensed under the MIT License.
+// Code generated by Microsoft (R) AutoRest Code Generator.
+package com.azure.ai.openai.models;
+
+import com.azure.core.annotation.Fluent;
+import com.azure.core.annotation.Generated;
+import com.azure.core.util.CoreUtils;
+import com.fasterxml.jackson.annotation.JsonCreator;
+import com.fasterxml.jackson.annotation.JsonProperty;
+
+/** The configuration information for an audio translation request. */
+@Fluent
+public final class AudioTranslationOptions {
+
+ /*
+ * The audio data to transcribe. This must be the binary content of a file in one of the supported media formats:
+ * flac, mp3, mp4, mpeg, mpga, m4a, ogg, wav, webm.
+ */
+ @Generated
+ @JsonProperty(value = "file")
+ private byte[] file;
+
+ /*
+ * The requested format of the transcription response data, which will influence the content and detail of the
+ * result.
+ */
+ @Generated
+ @JsonProperty(value = "response_format")
+ private AudioTranscriptionFormat responseFormat;
+
+ /*
+ * An optional hint to guide the model's style or continue from a prior audio segment. The written language of the
+ * prompt should match the primary spoken language of the audio data.
+ */
+ @Generated
+ @JsonProperty(value = "prompt")
+ private String prompt;
+
+ /*
+ * The sampling temperature, between 0 and 1.
+ * Higher values like 0.8 will make the output more random, while lower values like 0.2 will make it more focused
+ * and deterministic.
+ * If set to 0, the model will use log probability to automatically increase the temperature until certain
+ * thresholds are hit.
+ */
+ @Generated
+ @JsonProperty(value = "temperature")
+ private Double temperature;
+
+ /*
+ * The model to use for this transcription request.
+ */
+ @Generated
+ @JsonProperty(value = "model")
+ private String model;
+
+ /**
+ * Creates an instance of AudioTranslationOptions class.
+ *
+ * @param file the file value to set.
+ */
+ @Generated
+ @JsonCreator
+ public AudioTranslationOptions(@JsonProperty(value = "file") byte[] file) {
+ this.file = file;
+ }
+
+ /**
+ * Get the file property: The audio data to transcribe. This must be the binary content of a file in one of the
+ * supported media formats: flac, mp3, mp4, mpeg, mpga, m4a, ogg, wav, webm.
+ *
+ * @return the file value.
+ */
+ @Generated
+ public byte[] getFile() {
+ return CoreUtils.clone(this.file);
+ }
+
+ /**
+ * Get the responseFormat property: The requested format of the transcription response data, which will influence
+ * the content and detail of the result.
+ *
+ * @return the responseFormat value.
+ */
+ @Generated
+ public AudioTranscriptionFormat getResponseFormat() {
+ return this.responseFormat;
+ }
+
+ /**
+ * Set the responseFormat property: The requested format of the transcription response data, which will influence
+ * the content and detail of the result.
+ *
+ * @param responseFormat the responseFormat value to set.
+ * @return the AudioTranslationOptions object itself.
+ */
+ @Generated
+ public AudioTranslationOptions setResponseFormat(AudioTranscriptionFormat responseFormat) {
+ this.responseFormat = responseFormat;
+ return this;
+ }
+
+ /**
+ * Get the prompt property: An optional hint to guide the model's style or continue from a prior audio segment. The
+ * written language of the prompt should match the primary spoken language of the audio data.
+ *
+ * @return the prompt value.
+ */
+ @Generated
+ public String getPrompt() {
+ return this.prompt;
+ }
+
+ /**
+ * Set the prompt property: An optional hint to guide the model's style or continue from a prior audio segment. The
+ * written language of the prompt should match the primary spoken language of the audio data.
+ *
+ * @param prompt the prompt value to set.
+ * @return the AudioTranslationOptions object itself.
+ */
+ @Generated
+ public AudioTranslationOptions setPrompt(String prompt) {
+ this.prompt = prompt;
+ return this;
+ }
+
+ /**
+ * Get the temperature property: The sampling temperature, between 0 and 1. Higher values like 0.8 will make the
+ * output more random, while lower values like 0.2 will make it more focused and deterministic. If set to 0, the
+ * model will use log probability to automatically increase the temperature until certain thresholds are hit.
+ *
+ * @return the temperature value.
+ */
+ @Generated
+ public Double getTemperature() {
+ return this.temperature;
+ }
+
+ /**
+ * Set the temperature property: The sampling temperature, between 0 and 1. Higher values like 0.8 will make the
+ * output more random, while lower values like 0.2 will make it more focused and deterministic. If set to 0, the
+ * model will use log probability to automatically increase the temperature until certain thresholds are hit.
+ *
+ * @param temperature the temperature value to set.
+ * @return the AudioTranslationOptions object itself.
+ */
+ @Generated
+ public AudioTranslationOptions setTemperature(Double temperature) {
+ this.temperature = temperature;
+ return this;
+ }
+
+ /**
+ * Get the model property: The model to use for this transcription request.
+ *
+ * @return the model value.
+ */
+ @Generated
+ public String getModel() {
+ return this.model;
+ }
+
+ /**
+ * Set the model property: The model to use for this transcription request.
+ *
+ * @param model the model value to set.
+ * @return the AudioTranslationOptions object itself.
+ */
+ @Generated
+ public AudioTranslationOptions setModel(String model) {
+ this.model = model;
+ return this;
+ }
+}
diff --git a/sdk/openai/azure-ai-openai/src/main/java/com/azure/ai/openai/models/ContentFilterResults.java b/sdk/openai/azure-ai-openai/src/main/java/com/azure/ai/openai/models/ContentFilterResults.java
index 65883af4465f..2c1c3c668bd3 100644
--- a/sdk/openai/azure-ai-openai/src/main/java/com/azure/ai/openai/models/ContentFilterResults.java
+++ b/sdk/openai/azure-ai-openai/src/main/java/com/azure/ai/openai/models/ContentFilterResults.java
@@ -5,6 +5,7 @@
import com.azure.core.annotation.Generated;
import com.azure.core.annotation.Immutable;
+import com.azure.core.models.ResponseError;
import com.fasterxml.jackson.annotation.JsonProperty;
/** Information about the content filtering category, if it has been detected. */
@@ -98,4 +99,23 @@ public ContentFilterResult getSelfHarm() {
/** Creates an instance of ContentFilterResults class. */
@Generated
private ContentFilterResults() {}
+
+ /*
+ * Describes an error returned if the content filtering system is
+ * down or otherwise unable to complete the operation in time.
+ */
+ @Generated
+ @JsonProperty(value = "error")
+ private ResponseError error;
+
+ /**
+ * Get the error property: Describes an error returned if the content filtering system is down or otherwise unable
+ * to complete the operation in time.
+ *
+ * @return the error value.
+ */
+ @Generated
+ public ResponseError getError() {
+ return this.error;
+ }
}
diff --git a/sdk/openai/azure-ai-openai/src/main/java/module-info.java b/sdk/openai/azure-ai-openai/src/main/java/module-info.java
index 016c2a1fc8be..c8eafa553ff1 100644
--- a/sdk/openai/azure-ai-openai/src/main/java/module-info.java
+++ b/sdk/openai/azure-ai-openai/src/main/java/module-info.java
@@ -7,7 +7,6 @@
exports com.azure.ai.openai;
exports com.azure.ai.openai.models;
- exports com.azure.ai.openai.implementation.models;
opens com.azure.ai.openai.models to
com.azure.core,
diff --git a/sdk/openai/azure-ai-openai/src/samples/README.md b/sdk/openai/azure-ai-openai/src/samples/README.md
index cf37cf05b527..fa5a898c27eb 100644
--- a/sdk/openai/azure-ai-openai/src/samples/README.md
+++ b/sdk/openai/azure-ai-openai/src/samples/README.md
@@ -28,12 +28,16 @@ Synchronous:
- [Chat Completions][sample_get_chat_completions]
- [Embeddings][sample_get_embedding]
- [Image Generation][sample_image_generation]
+- [Audio Transcription][sample_audio_transcription]
+- [Audio Translation][sample_audio_translation]
Asynchronous:
- [Text Completions][async_sample_get_completions]
- [Chat Completions][async_sample_get_chat_completions]
- [Embeddings][async_sample_get_embedding]
- [Image Generation][async_sample_image_generation]
+- [Audio Transcription][async_sample_audio_transcription]
+- [Audio Translation][async_sample_audio_translation]
Cookbook:
- [Chat bot][cookbook_chat_bot]
@@ -66,11 +70,15 @@ This project welcomes contributions and suggestions. Find [more contributing][SD
[async_sample_get_chat_completions]: https://github.com/Azure/azure-sdk-for-java/blob/main/sdk/openai/azure-ai-openai/src/samples/java/com/azure/ai/openai/usage/GetChatCompletionsAsyncSample.java
[async_sample_get_embedding]: https://github.com/Azure/azure-sdk-for-java/blob/main/sdk/openai/azure-ai-openai/src/samples/java/com/azure/ai/openai/usage/GetEmbeddingsAsyncSample.java
[async_sample_image_generation]: https://github.com/Azure/azure-sdk-for-java/blob/main/sdk/openai/azure-ai-openai/src/samples/java/com/azure/ai/openai/usage/GetImagesAsyncSample.java
+[async_sample_audio_transcription]: https://github.com/Azure/azure-sdk-for-java/blob/main/sdk/openai/azure-ai-openai/src/samples/java/com/azure/ai/openai/usage/AudioTranscriptionAsyncSample.java
+[async_sample_audio_translation]: https://github.com/Azure/azure-sdk-for-java/blob/main/sdk/openai/azure-ai-openai/src/samples/java/com/azure/ai/openai/usage/AudioTranslationAsyncSample.java
[sample_get_completions]: https://github.com/Azure/azure-sdk-for-java/blob/main/sdk/openai/azure-ai-openai/src/samples/java/com/azure/ai/openai/usage/GetCompletionsSample.java
[sample_get_chat_completions]: https://github.com/Azure/azure-sdk-for-java/blob/main/sdk/openai/azure-ai-openai/src/samples/java/com/azure/ai/openai/usage/GetChatCompletionsSample.java
[sample_get_embedding]: https://github.com/Azure/azure-sdk-for-java/blob/main/sdk/openai/azure-ai-openai/src/samples/java/com/azure/ai/openai/usage/GetEmbeddingsSample.java
[sample_image_generation]: https://github.com/Azure/azure-sdk-for-java/blob/main/sdk/openai/azure-ai-openai/src/samples/java/com/azure/ai/openai/usage/GetImagesSample.java
+[sample_audio_transcription]: https://github.com/Azure/azure-sdk-for-java/blob/main/sdk/openai/azure-ai-openai/src/samples/java/com/azure/ai/openai/usage/AudioTranscriptionSample.java
+[sample_audio_translation]: https://github.com/Azure/azure-sdk-for-java/blob/main/sdk/openai/azure-ai-openai/src/samples/java/com/azure/ai/openai/usage/AudioTranslationSample.java
[cookbook_chat_bot]: https://github.com/Azure/azure-sdk-for-java/blob/main/sdk/openai/azure-ai-openai/src/samples/java/com/azure/ai/openai/ChatbotSample.java
[cookbook_chat_bot_with_key]: https://github.com/Azure/azure-sdk-for-java/blob/main/sdk/openai/azure-ai-openai/src/samples/java/com/azure/ai/openai/ChatbotWithKeySample.java
diff --git a/sdk/openai/azure-ai-openai/src/samples/java/com/azure/ai/openai/ChatCompletionsWithYourData.java b/sdk/openai/azure-ai-openai/src/samples/java/com/azure/ai/openai/ChatCompletionsWithYourData.java
index 0d732704c90c..fecaa9dccf77 100644
--- a/sdk/openai/azure-ai-openai/src/samples/java/com/azure/ai/openai/ChatCompletionsWithYourData.java
+++ b/sdk/openai/azure-ai-openai/src/samples/java/com/azure/ai/openai/ChatCompletionsWithYourData.java
@@ -29,7 +29,7 @@ public class ChatCompletionsWithYourData {
*
* @param args Unused. Arguments to the program.
*/
- public static void main(String[] args){
+ public static void main(String[] args) {
String azureOpenaiKey = "{azure-open-ai-key}";
String endpoint = "{azure-open-ai-endpoint}";
String deploymentOrModelId = "{azure-open-ai-deployment-model-id}";
diff --git a/sdk/openai/azure-ai-openai/src/samples/java/com/azure/ai/openai/impl/ReadmeSamples.java b/sdk/openai/azure-ai-openai/src/samples/java/com/azure/ai/openai/impl/ReadmeSamples.java
index 3384e3cb3e2f..7488e04c3271 100644
--- a/sdk/openai/azure-ai-openai/src/samples/java/com/azure/ai/openai/impl/ReadmeSamples.java
+++ b/sdk/openai/azure-ai-openai/src/samples/java/com/azure/ai/openai/impl/ReadmeSamples.java
@@ -6,6 +6,10 @@
import com.azure.ai.openai.OpenAIAsyncClient;
import com.azure.ai.openai.OpenAIClient;
import com.azure.ai.openai.OpenAIClientBuilder;
+import com.azure.ai.openai.models.AudioTranscription;
+import com.azure.ai.openai.models.AudioTranscriptionFormat;
+import com.azure.ai.openai.models.AudioTranscriptionOptions;
+import com.azure.ai.openai.models.AudioTranslationOptions;
import com.azure.ai.openai.models.ChatChoice;
import com.azure.ai.openai.models.ChatCompletions;
import com.azure.ai.openai.models.ChatCompletionsOptions;
@@ -25,11 +29,14 @@
import com.azure.core.credential.TokenCredential;
import com.azure.core.http.ProxyOptions;
import com.azure.core.models.ResponseError;
+import com.azure.core.util.BinaryData;
import com.azure.core.util.HttpClientOptions;
import com.azure.core.util.IterableStream;
import com.azure.identity.DefaultAzureCredentialBuilder;
import java.net.InetSocketAddress;
+import java.nio.file.Path;
+import java.nio.file.Paths;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.List;
@@ -221,4 +228,34 @@ public void imageGeneration() {
}
// END: readme-sample-imageGeneration
}
+
+ public void audioTranscription() {
+ // BEGIN: readme-sample-audioTranscription
+ String fileName = "{your-file-name}";
+ Path filePath = Paths.get("{your-file-path}" + fileName);
+
+ byte[] file = BinaryData.fromFile(filePath).toBytes();
+ AudioTranscriptionOptions transcriptionOptions = new AudioTranscriptionOptions(file)
+ .setResponseFormat(AudioTranscriptionFormat.JSON);
+
+ AudioTranscription transcription = client.getAudioTranscription("{deploymentOrModelId}", fileName, transcriptionOptions);
+
+ System.out.println("Transcription: " + transcription.getText());
+ // END: readme-sample-audioTranscription
+ }
+
+ public void audioTranslation() {
+ // BEGIN: readme-sample-audioTranslation
+ String fileName = "{your-file-name}";
+ Path filePath = Paths.get("{your-file-path}" + fileName);
+
+ byte[] file = BinaryData.fromFile(filePath).toBytes();
+ AudioTranslationOptions translationOptions = new AudioTranslationOptions(file)
+ .setResponseFormat(AudioTranscriptionFormat.JSON);
+
+ AudioTranscription translation = client.getAudioTranslation("{deploymentOrModelId}", fileName, translationOptions);
+
+ System.out.println("Translation: " + translation.getText());
+ // END: readme-sample-audioTranslation
+ }
}
diff --git a/sdk/openai/azure-ai-openai/src/samples/java/com/azure/ai/openai/resources/JP_it_is_rainy_today.wav b/sdk/openai/azure-ai-openai/src/samples/java/com/azure/ai/openai/resources/JP_it_is_rainy_today.wav
new file mode 100644
index 000000000000..5970c85ec1cd
Binary files /dev/null and b/sdk/openai/azure-ai-openai/src/samples/java/com/azure/ai/openai/resources/JP_it_is_rainy_today.wav differ
diff --git a/sdk/openai/azure-ai-openai/src/samples/java/com/azure/ai/openai/resources/batman.wav b/sdk/openai/azure-ai-openai/src/samples/java/com/azure/ai/openai/resources/batman.wav
new file mode 100644
index 000000000000..4c0b7248a39c
Binary files /dev/null and b/sdk/openai/azure-ai-openai/src/samples/java/com/azure/ai/openai/resources/batman.wav differ
diff --git a/sdk/openai/azure-ai-openai/src/samples/java/com/azure/ai/openai/usage/AudioTranscriptionAsyncSample.java b/sdk/openai/azure-ai-openai/src/samples/java/com/azure/ai/openai/usage/AudioTranscriptionAsyncSample.java
new file mode 100644
index 000000000000..fbebd49b5965
--- /dev/null
+++ b/sdk/openai/azure-ai-openai/src/samples/java/com/azure/ai/openai/usage/AudioTranscriptionAsyncSample.java
@@ -0,0 +1,52 @@
+// Copyright (c) Microsoft Corporation. All rights reserved.
+// Licensed under the MIT License.
+
+package com.azure.ai.openai.usage;
+
+import com.azure.ai.openai.OpenAIAsyncClient;
+import com.azure.ai.openai.OpenAIClientBuilder;
+import com.azure.ai.openai.models.AudioTranscriptionFormat;
+import com.azure.ai.openai.models.AudioTranscriptionOptions;
+import com.azure.core.credential.AzureKeyCredential;
+import com.azure.core.util.BinaryData;
+
+import java.nio.file.Path;
+import java.nio.file.Paths;
+import java.util.concurrent.TimeUnit;
+
+/**
+ * An asynchronous sample demonstrates how to transcript a given audio file.
+ */
+public class AudioTranscriptionAsyncSample {
+ /**
+ * Runs the sample algorithm and demonstrates how to transcript a given audio file.
+ *
+ * @param args Unused. Arguments to the program.
+ */
+ public static void main(String[] args) throws InterruptedException {
+ String azureOpenaiKey = "{azure-open-ai-key}";
+ String endpoint = "{azure-open-ai-endpoint}";
+ String deploymentOrModelId = "{azure-open-ai-deployment-model-id}";
+ String fileName = "batman.wav";
+ Path filePath = Paths.get("src/samples/java/com/azure/ai/openai/resources/" + fileName);
+
+ OpenAIAsyncClient client = new OpenAIClientBuilder()
+ .endpoint(endpoint)
+ .credential(new AzureKeyCredential(azureOpenaiKey))
+ .buildAsyncClient();
+
+ byte[] file = BinaryData.fromFile(filePath).toBytes();
+ AudioTranscriptionOptions transcriptionOptions = new AudioTranscriptionOptions(file)
+ .setResponseFormat(AudioTranscriptionFormat.JSON);
+
+ client.getAudioTranscription(deploymentOrModelId, fileName, transcriptionOptions)
+ .subscribe(transcription -> {
+ System.out.println("Transcription: " + transcription.getText());
+ });
+
+ // The .subscribe() creation and assignment is not a blocking call. For the purpose of this example, we sleep
+ // the thread so the program does not end before the send operation is complete. Using .block() instead of
+ // .subscribe() will turn this into a synchronous call.
+ TimeUnit.SECONDS.sleep(10);
+ }
+}
diff --git a/sdk/openai/azure-ai-openai/src/samples/java/com/azure/ai/openai/usage/AudioTranscriptionSample.java b/sdk/openai/azure-ai-openai/src/samples/java/com/azure/ai/openai/usage/AudioTranscriptionSample.java
new file mode 100644
index 000000000000..e16238116533
--- /dev/null
+++ b/sdk/openai/azure-ai-openai/src/samples/java/com/azure/ai/openai/usage/AudioTranscriptionSample.java
@@ -0,0 +1,46 @@
+// Copyright (c) Microsoft Corporation. All rights reserved.
+// Licensed under the MIT License.
+
+package com.azure.ai.openai.usage;
+
+import com.azure.ai.openai.OpenAIClient;
+import com.azure.ai.openai.OpenAIClientBuilder;
+import com.azure.ai.openai.models.AudioTranscription;
+import com.azure.ai.openai.models.AudioTranscriptionFormat;
+import com.azure.ai.openai.models.AudioTranscriptionOptions;
+import com.azure.core.credential.AzureKeyCredential;
+import com.azure.core.util.BinaryData;
+
+import java.nio.file.Path;
+import java.nio.file.Paths;
+
+/**
+ * A sample demonstrates how to transcript a given audio file.
+ */
+public class AudioTranscriptionSample {
+ /**
+ * Runs the sample algorithm and demonstrates how to get the images for a given prompt.
+ *
+ * @param args Unused. Arguments to the program.
+ */
+ public static void main(String[] args) {
+ String azureOpenaiKey = "{azure-open-ai-key}";
+ String endpoint = "{azure-open-ai-endpoint}";
+ String deploymentOrModelId = "{azure-open-ai-deployment-model-id}";
+ String fileName = "batman.wav";
+ Path filePath = Paths.get("src/samples/java/com/azure/ai/openai/resources/" + fileName);
+
+ OpenAIClient client = new OpenAIClientBuilder()
+ .endpoint(endpoint)
+ .credential(new AzureKeyCredential(azureOpenaiKey))
+ .buildClient();
+
+ byte[] file = BinaryData.fromFile(filePath).toBytes();
+ AudioTranscriptionOptions transcriptionOptions = new AudioTranscriptionOptions(file)
+ .setResponseFormat(AudioTranscriptionFormat.JSON);
+
+ AudioTranscription transcription = client.getAudioTranscription(deploymentOrModelId, fileName, transcriptionOptions);
+
+ System.out.println("Transcription: " + transcription.getText());
+ }
+}
diff --git a/sdk/openai/azure-ai-openai/src/samples/java/com/azure/ai/openai/usage/AudioTranslationAsyncSample.java b/sdk/openai/azure-ai-openai/src/samples/java/com/azure/ai/openai/usage/AudioTranslationAsyncSample.java
new file mode 100644
index 000000000000..4ba19ad37b7f
--- /dev/null
+++ b/sdk/openai/azure-ai-openai/src/samples/java/com/azure/ai/openai/usage/AudioTranslationAsyncSample.java
@@ -0,0 +1,51 @@
+// Copyright (c) Microsoft Corporation. All rights reserved.
+// Licensed under the MIT License.
+
+package com.azure.ai.openai.usage;
+
+import com.azure.ai.openai.OpenAIAsyncClient;
+import com.azure.ai.openai.OpenAIClientBuilder;
+import com.azure.ai.openai.models.AudioTranscriptionFormat;
+import com.azure.ai.openai.models.AudioTranslationOptions;
+import com.azure.core.credential.AzureKeyCredential;
+import com.azure.core.util.BinaryData;
+
+import java.nio.file.Path;
+import java.nio.file.Paths;
+import java.util.concurrent.TimeUnit;
+
+/**
+ * An asynchronous sample demonstrates how to translate a given audio file.
+ */
+public class AudioTranslationAsyncSample {
+ /**
+ * Runs the sample algorithm and demonstrates how to translate a given audio file.
+ *
+ * @param args Unused. Arguments to the program.
+ */
+ public static void main(String[] args) throws InterruptedException {
+ String azureOpenaiKey = "{azure-open-ai-key}";
+ String endpoint = "{azure-open-ai-endpoint}";
+ String deploymentOrModelId = "{azure-open-ai-deployment-model-id}";
+ String fileName = "JP_it_is_rainy_today.wav";
+ Path filePath = Paths.get("src/samples/java/com/azure/ai/openai/resources/" + fileName);
+
+ OpenAIAsyncClient client = new OpenAIClientBuilder()
+ .endpoint(endpoint)
+ .credential(new AzureKeyCredential(azureOpenaiKey))
+ .buildAsyncClient();
+ byte[] file = BinaryData.fromFile(filePath).toBytes();
+ AudioTranslationOptions translationOptions = new AudioTranslationOptions(file)
+ .setResponseFormat(AudioTranscriptionFormat.JSON);
+
+ client.getAudioTranslation(deploymentOrModelId, fileName, translationOptions)
+ .subscribe(translation -> {
+ System.out.println("Translation: " + translation.getText());
+ });
+
+ // The .subscribe() creation and assignment is not a blocking call. For the purpose of this example, we sleep
+ // the thread so the program does not end before the send operation is complete. Using .block() instead of
+ // .subscribe() will turn this into a synchronous call.
+ TimeUnit.SECONDS.sleep(10);
+ }
+}
diff --git a/sdk/openai/azure-ai-openai/src/samples/java/com/azure/ai/openai/usage/AudioTranslationSample.java b/sdk/openai/azure-ai-openai/src/samples/java/com/azure/ai/openai/usage/AudioTranslationSample.java
new file mode 100644
index 000000000000..18a56d967fef
--- /dev/null
+++ b/sdk/openai/azure-ai-openai/src/samples/java/com/azure/ai/openai/usage/AudioTranslationSample.java
@@ -0,0 +1,45 @@
+// Copyright (c) Microsoft Corporation. All rights reserved.
+// Licensed under the MIT License.
+
+package com.azure.ai.openai.usage;
+
+import com.azure.ai.openai.OpenAIClient;
+import com.azure.ai.openai.OpenAIClientBuilder;
+import com.azure.ai.openai.models.AudioTranscription;
+import com.azure.ai.openai.models.AudioTranscriptionFormat;
+import com.azure.ai.openai.models.AudioTranslationOptions;
+import com.azure.core.credential.AzureKeyCredential;
+import com.azure.core.util.BinaryData;
+
+import java.nio.file.Path;
+import java.nio.file.Paths;
+
+/**
+ * A sample demonstrates how to translate a given audio file.
+ */
+public class AudioTranslationSample {
+ /**
+ * Runs the sample algorithm and demonstrates how to translate a given audio file.
+ *
+ * @param args Unused. Arguments to the program.
+ */
+ public static void main(String[] args) {
+ String azureOpenaiKey = "{azure-open-ai-key}";
+ String endpoint = "{azure-open-ai-endpoint}";
+ String deploymentOrModelId = "{azure-open-ai-deployment-model-id}";
+ String fileName = "JP_it_is_rainy_today.wav";
+ Path filePath = Paths.get("src/samples/java/com/azure/ai/openai/resources/" + fileName);
+
+ OpenAIClient client = new OpenAIClientBuilder()
+ .endpoint(endpoint)
+ .credential(new AzureKeyCredential(azureOpenaiKey))
+ .buildClient();
+ byte[] file = BinaryData.fromFile(filePath).toBytes();
+ AudioTranslationOptions translationOptions = new AudioTranslationOptions(file)
+ .setResponseFormat(AudioTranscriptionFormat.JSON);
+
+ AudioTranscription translation = client.getAudioTranslation(deploymentOrModelId, fileName, translationOptions);
+
+ System.out.println("Translation: " + translation.getText());
+ }
+}
diff --git a/sdk/openai/azure-ai-openai/src/test/java/com/azure/ai/openai/NonAzureOpenAIAsyncClientTest.java b/sdk/openai/azure-ai-openai/src/test/java/com/azure/ai/openai/NonAzureOpenAIAsyncClientTest.java
index 7cc7ec3429c6..fb842b09df8a 100644
--- a/sdk/openai/azure-ai-openai/src/test/java/com/azure/ai/openai/NonAzureOpenAIAsyncClientTest.java
+++ b/sdk/openai/azure-ai-openai/src/test/java/com/azure/ai/openai/NonAzureOpenAIAsyncClientTest.java
@@ -4,6 +4,10 @@
package com.azure.ai.openai;
import com.azure.ai.openai.functions.MyFunctionCallArguments;
+import com.azure.ai.openai.models.AudioTaskLabel;
+import com.azure.ai.openai.models.AudioTranscriptionFormat;
+import com.azure.ai.openai.models.AudioTranscriptionOptions;
+import com.azure.ai.openai.models.AudioTranslationOptions;
import com.azure.ai.openai.models.ChatChoice;
import com.azure.ai.openai.models.ChatCompletions;
import com.azure.ai.openai.models.ChatCompletionsOptions;
@@ -25,6 +29,7 @@
import java.util.ArrayList;
import java.util.Arrays;
+import java.util.List;
import static com.azure.ai.openai.TestUtils.DISPLAY_NAME_WITH_ARGUMENTS;
import static org.junit.jupiter.api.Assertions.assertEquals;
@@ -322,4 +327,281 @@ public void testCompletionContentFiltering(HttpClient httpClient, OpenAIServiceV
}).verifyComplete();
});
}
+
+ @ParameterizedTest(name = DISPLAY_NAME_WITH_ARGUMENTS)
+ @MethodSource("com.azure.ai.openai.TestUtils#getTestParameters")
+ public void testGetAudioTranscriptionJson(HttpClient httpClient, OpenAIServiceVersion serviceVersion) {
+ client = getNonAzureOpenAIAsyncClient(httpClient);
+
+ getAudioTranscriptionRunnerForNonAzure((deploymentName, fileName) -> {
+ byte[] file = BinaryData.fromFile(openTestResourceFile(fileName)).toBytes();
+ AudioTranscriptionOptions transcriptionOptions = new AudioTranscriptionOptions(file);
+ transcriptionOptions.setResponseFormat(AudioTranscriptionFormat.JSON);
+
+ StepVerifier.create(client.getAudioTranscription(deploymentName, fileName, transcriptionOptions))
+ .assertNext(transcription ->
+ assertAudioTranscriptionSimpleJson(transcription, BATMAN_TRANSCRIPTION))
+ .verifyComplete();
+ });
+ }
+
+ @ParameterizedTest(name = DISPLAY_NAME_WITH_ARGUMENTS)
+ @MethodSource("com.azure.ai.openai.TestUtils#getTestParameters")
+ public void testGetAudioTranscriptionVerboseJson(HttpClient httpClient, OpenAIServiceVersion serviceVersion) {
+ client = getNonAzureOpenAIAsyncClient(httpClient);
+
+ getAudioTranscriptionRunnerForNonAzure((deploymentName, fileName) -> {
+ byte[] file = BinaryData.fromFile(openTestResourceFile(fileName)).toBytes();
+ AudioTranscriptionOptions transcriptionOptions = new AudioTranscriptionOptions(file);
+ transcriptionOptions.setResponseFormat(AudioTranscriptionFormat.VERBOSE_JSON);
+
+ StepVerifier.create(client.getAudioTranscription(deploymentName, fileName, transcriptionOptions))
+ .assertNext(transcription ->
+ assertAudioTranscriptionVerboseJson(transcription, BATMAN_TRANSCRIPTION, AudioTaskLabel.TRANSCRIBE))
+ .verifyComplete();
+ });
+ }
+
+ @ParameterizedTest(name = DISPLAY_NAME_WITH_ARGUMENTS)
+ @MethodSource("com.azure.ai.openai.TestUtils#getTestParameters")
+ public void testGetAudioTranscriptionTextPlain(HttpClient httpClient, OpenAIServiceVersion serviceVersion) {
+ client = getNonAzureOpenAIAsyncClient(httpClient);
+
+ getAudioTranscriptionRunnerForNonAzure((deploymentName, fileName) -> {
+ byte[] file = BinaryData.fromFile(openTestResourceFile(fileName)).toBytes();
+ AudioTranscriptionOptions transcriptionOptions = new AudioTranscriptionOptions(file);
+ transcriptionOptions.setResponseFormat(AudioTranscriptionFormat.TEXT);
+
+ StepVerifier.create(client.getAudioTranscriptionText(deploymentName, fileName, transcriptionOptions))
+ .assertNext(transcription ->
+ // A plain/text request adds a line break as an artifact. Also observed for translations
+ assertEquals(BATMAN_TRANSCRIPTION + "\n", transcription))
+ .verifyComplete();
+ });
+ }
+
+ @ParameterizedTest(name = DISPLAY_NAME_WITH_ARGUMENTS)
+ @MethodSource("com.azure.ai.openai.TestUtils#getTestParameters")
+ public void testGetAudioTranscriptionSrt(HttpClient httpClient, OpenAIServiceVersion serviceVersion) {
+ client = getNonAzureOpenAIAsyncClient(httpClient);
+
+ getAudioTranscriptionRunnerForNonAzure((modelId, fileName) -> {
+ byte[] file = BinaryData.fromFile(openTestResourceFile(fileName)).toBytes();
+ AudioTranscriptionOptions transcriptionOptions = new AudioTranscriptionOptions(file);
+ transcriptionOptions.setResponseFormat(AudioTranscriptionFormat.SRT);
+
+ StepVerifier.create(client.getAudioTranscriptionText(modelId, fileName, transcriptionOptions))
+ .assertNext(translation -> {
+ // Sequence number
+ assertTrue(translation.contains("1\n"));
+ // First sequence starts at timestamp 0
+ assertTrue(translation.contains("00:00:00,000 --> "));
+ // Contains at least one expected word
+ assertTrue(translation.contains("Batman"));
+ }).verifyComplete();
+ });
+ }
+
+ @ParameterizedTest(name = DISPLAY_NAME_WITH_ARGUMENTS)
+ @MethodSource("com.azure.ai.openai.TestUtils#getTestParameters")
+ public void testGetAudioTranscriptionVtt(HttpClient httpClient, OpenAIServiceVersion serviceVersion) {
+ client = getNonAzureOpenAIAsyncClient(httpClient);
+
+ getAudioTranscriptionRunnerForNonAzure((modelId, fileName) -> {
+ byte[] file = BinaryData.fromFile(openTestResourceFile(fileName)).toBytes();
+ AudioTranscriptionOptions transcriptionOptions = new AudioTranscriptionOptions(file);
+ transcriptionOptions.setResponseFormat(AudioTranscriptionFormat.VTT);
+
+ StepVerifier.create(client.getAudioTranscriptionText(modelId, fileName, transcriptionOptions))
+ .assertNext(translation -> {
+ // Start value according to spec
+ assertTrue(translation.startsWith("WEBVTT\n"));
+ // First sequence starts at timestamp 0. Note: unlike SRT, the millisecond separator is a "."
+ assertTrue(translation.contains("00:00:00.000 --> "));
+ // Contains at least one expected word
+ assertTrue(translation.contains("Batman"));
+ }).verifyComplete();
+ });
+ }
+
+ @ParameterizedTest(name = DISPLAY_NAME_WITH_ARGUMENTS)
+ @MethodSource("com.azure.ai.openai.TestUtils#getTestParameters")
+ public void testGetAudioTranscriptionTextWrongFormats(HttpClient httpClient, OpenAIServiceVersion serviceVersion) {
+ client = getNonAzureOpenAIAsyncClient(httpClient);
+ List wrongFormats = Arrays.asList(
+ AudioTranscriptionFormat.JSON,
+ AudioTranscriptionFormat.VERBOSE_JSON
+ );
+
+ getAudioTranscriptionRunnerForNonAzure((modelId, fileName) -> {
+ byte[] file = BinaryData.fromFile(openTestResourceFile(fileName)).toBytes();
+ AudioTranscriptionOptions transcriptionOptions = new AudioTranscriptionOptions(file);
+
+ for (AudioTranscriptionFormat format: wrongFormats) {
+ transcriptionOptions.setResponseFormat(format);
+ StepVerifier.create(client.getAudioTranscriptionText(modelId, fileName, transcriptionOptions))
+ .verifyErrorSatisfies(error -> assertTrue(error instanceof IllegalArgumentException));
+ }
+ });
+ }
+
+ @ParameterizedTest(name = DISPLAY_NAME_WITH_ARGUMENTS)
+ @MethodSource("com.azure.ai.openai.TestUtils#getTestParameters")
+ public void testGetAudioTranscriptionJsonWrongFormats(HttpClient httpClient, OpenAIServiceVersion serviceVersion) {
+ client = getNonAzureOpenAIAsyncClient(httpClient);
+ List wrongFormats = Arrays.asList(
+ AudioTranscriptionFormat.TEXT,
+ AudioTranscriptionFormat.SRT,
+ AudioTranscriptionFormat.VTT
+ );
+
+ getAudioTranscriptionRunnerForNonAzure((modelId, fileName) -> {
+ byte[] file = BinaryData.fromFile(openTestResourceFile(fileName)).toBytes();
+ AudioTranscriptionOptions transcriptionOptions = new AudioTranscriptionOptions(file);
+
+ for (AudioTranscriptionFormat format: wrongFormats) {
+ transcriptionOptions.setResponseFormat(format);
+ StepVerifier.create(client.getAudioTranscription(modelId, fileName, transcriptionOptions))
+ .verifyErrorSatisfies(error -> assertTrue(error instanceof IllegalArgumentException));
+ }
+ });
+ }
+
+ @ParameterizedTest(name = DISPLAY_NAME_WITH_ARGUMENTS)
+ @MethodSource("com.azure.ai.openai.TestUtils#getTestParameters")
+ public void testGetAudioTranslationJson(HttpClient httpClient, OpenAIServiceVersion serviceVersion) {
+ client = getNonAzureOpenAIAsyncClient(httpClient);
+
+ getAudioTranslationRunnerForNonAzure((modelId, fileName) -> {
+ byte[] file = BinaryData.fromFile(openTestResourceFile(fileName)).toBytes();
+ AudioTranslationOptions translationOptions = new AudioTranslationOptions(file);
+ translationOptions.setResponseFormat(AudioTranscriptionFormat.JSON);
+
+ StepVerifier.create(client.getAudioTranslation(modelId, fileName, translationOptions))
+ .assertNext(translation ->
+ assertAudioTranscriptionSimpleJson(translation, "It's raining today."))
+ .verifyComplete();
+ });
+ }
+
+ @ParameterizedTest(name = DISPLAY_NAME_WITH_ARGUMENTS)
+ @MethodSource("com.azure.ai.openai.TestUtils#getTestParameters")
+ public void testGetAudioTranslationVerboseJson(HttpClient httpClient, OpenAIServiceVersion serviceVersion) {
+ client = getNonAzureOpenAIAsyncClient(httpClient);
+
+ getAudioTranslationRunnerForNonAzure((modelId, fileName) -> {
+ byte[] file = BinaryData.fromFile(openTestResourceFile(fileName)).toBytes();
+ AudioTranslationOptions translationOptions = new AudioTranslationOptions(file);
+ translationOptions.setResponseFormat(AudioTranscriptionFormat.VERBOSE_JSON);
+
+ StepVerifier.create(client.getAudioTranslation(modelId, fileName, translationOptions))
+ .assertNext(translation ->
+ assertAudioTranscriptionVerboseJson(translation, "It's raining today.", AudioTaskLabel.TRANSLATE))
+ .verifyComplete();
+ });
+ }
+
+ @ParameterizedTest(name = DISPLAY_NAME_WITH_ARGUMENTS)
+ @MethodSource("com.azure.ai.openai.TestUtils#getTestParameters")
+ public void testGetAudioTranslationTextPlain(HttpClient httpClient, OpenAIServiceVersion serviceVersion) {
+ client = getNonAzureOpenAIAsyncClient(httpClient);
+
+ getAudioTranslationRunnerForNonAzure((modelId, fileName) -> {
+ byte[] file = BinaryData.fromFile(openTestResourceFile(fileName)).toBytes();
+ AudioTranslationOptions translationOptions = new AudioTranslationOptions(file);
+ translationOptions.setResponseFormat(AudioTranscriptionFormat.TEXT);
+
+ StepVerifier.create(client.getAudioTranslationText(modelId, fileName, translationOptions))
+ .assertNext(translation -> {
+ assertEquals("It's raining today.\n", translation);
+ }).verifyComplete();
+ });
+ }
+
+ @ParameterizedTest(name = DISPLAY_NAME_WITH_ARGUMENTS)
+ @MethodSource("com.azure.ai.openai.TestUtils#getTestParameters")
+ public void testGetAudioTranslationSrt(HttpClient httpClient, OpenAIServiceVersion serviceVersion) {
+ client = getNonAzureOpenAIAsyncClient(httpClient);
+
+ getAudioTranslationRunnerForNonAzure((modelId, fileName) -> {
+ byte[] file = BinaryData.fromFile(openTestResourceFile(fileName)).toBytes();
+ AudioTranslationOptions translationOptions = new AudioTranslationOptions(file);
+ translationOptions.setResponseFormat(AudioTranscriptionFormat.SRT);
+
+ StepVerifier.create(client.getAudioTranslationText(modelId, fileName, translationOptions))
+ .assertNext(translation -> {
+ // Sequence number
+ assertTrue(translation.contains("1\n"));
+ // First sequence starts at timestamp 0
+ assertTrue(translation.contains("00:00:00,000 --> "));
+ // Actual translation value
+ assertTrue(translation.contains("It's raining today."));
+ }).verifyComplete();
+ });
+ }
+
+ @ParameterizedTest(name = DISPLAY_NAME_WITH_ARGUMENTS)
+ @MethodSource("com.azure.ai.openai.TestUtils#getTestParameters")
+ public void testGetAudioTranslationVtt(HttpClient httpClient, OpenAIServiceVersion serviceVersion) {
+ client = getNonAzureOpenAIAsyncClient(httpClient);
+
+ getAudioTranslationRunnerForNonAzure((modelId, fileName) -> {
+ byte[] file = BinaryData.fromFile(openTestResourceFile(fileName)).toBytes();
+ AudioTranslationOptions translationOptions = new AudioTranslationOptions(file);
+ translationOptions.setResponseFormat(AudioTranscriptionFormat.VTT);
+
+ StepVerifier.create(client.getAudioTranslationText(modelId, fileName, translationOptions))
+ .assertNext(translation -> {
+ // Start value according to spec
+ assertTrue(translation.startsWith("WEBVTT\n"));
+ // First sequence starts at timestamp 0. Note: unlike SRT, the millisecond separator is a "."
+ assertTrue(translation.contains("00:00:00.000 --> "));
+ // Actual translation value
+ assertTrue(translation.contains("It's raining today."));
+ }).verifyComplete();
+ });
+ }
+
+ @ParameterizedTest(name = DISPLAY_NAME_WITH_ARGUMENTS)
+ @MethodSource("com.azure.ai.openai.TestUtils#getTestParameters")
+ public void testGetAudioTranslationTextWrongFormats(HttpClient httpClient, OpenAIServiceVersion serviceVersion) {
+ client = getNonAzureOpenAIAsyncClient(httpClient);
+ List wrongFormats = Arrays.asList(
+ AudioTranscriptionFormat.JSON,
+ AudioTranscriptionFormat.VERBOSE_JSON
+ );
+
+ getAudioTranslationRunnerForNonAzure((modelId, fileName) -> {
+ byte[] file = BinaryData.fromFile(openTestResourceFile(fileName)).toBytes();
+ AudioTranslationOptions translationOptions = new AudioTranslationOptions(file);
+
+ for (AudioTranscriptionFormat format: wrongFormats) {
+ translationOptions.setResponseFormat(format);
+ StepVerifier.create(client.getAudioTranslationText(modelId, fileName, translationOptions))
+ .verifyErrorSatisfies(error -> assertTrue(error instanceof IllegalArgumentException));
+ }
+ });
+ }
+
+ @ParameterizedTest(name = DISPLAY_NAME_WITH_ARGUMENTS)
+ @MethodSource("com.azure.ai.openai.TestUtils#getTestParameters")
+ public void testGetAudioTranslationJsonWrongFormats(HttpClient httpClient, OpenAIServiceVersion serviceVersion) {
+ client = getNonAzureOpenAIAsyncClient(httpClient);
+ List wrongFormats = Arrays.asList(
+ AudioTranscriptionFormat.TEXT,
+ AudioTranscriptionFormat.SRT,
+ AudioTranscriptionFormat.VTT
+ );
+
+ getAudioTranslationRunnerForNonAzure((modelId, fileName) -> {
+ byte[] file = BinaryData.fromFile(openTestResourceFile(fileName)).toBytes();
+ AudioTranslationOptions translationOptions = new AudioTranslationOptions(file);
+
+ for (AudioTranscriptionFormat format: wrongFormats) {
+ translationOptions.setResponseFormat(format);
+ StepVerifier.create(client.getAudioTranslation(modelId, fileName, translationOptions))
+ .verifyErrorSatisfies(error -> assertTrue(error instanceof IllegalArgumentException));
+ }
+ });
+ }
}
diff --git a/sdk/openai/azure-ai-openai/src/test/java/com/azure/ai/openai/NonAzureOpenAISyncClientTest.java b/sdk/openai/azure-ai-openai/src/test/java/com/azure/ai/openai/NonAzureOpenAISyncClientTest.java
index 43af2bf9bc43..25055df90aa7 100644
--- a/sdk/openai/azure-ai-openai/src/test/java/com/azure/ai/openai/NonAzureOpenAISyncClientTest.java
+++ b/sdk/openai/azure-ai-openai/src/test/java/com/azure/ai/openai/NonAzureOpenAISyncClientTest.java
@@ -4,6 +4,11 @@
package com.azure.ai.openai;
import com.azure.ai.openai.functions.MyFunctionCallArguments;
+import com.azure.ai.openai.models.AudioTaskLabel;
+import com.azure.ai.openai.models.AudioTranscription;
+import com.azure.ai.openai.models.AudioTranscriptionFormat;
+import com.azure.ai.openai.models.AudioTranscriptionOptions;
+import com.azure.ai.openai.models.AudioTranslationOptions;
import com.azure.ai.openai.models.ChatChoice;
import com.azure.ai.openai.models.ChatCompletions;
import com.azure.ai.openai.models.ChatCompletionsOptions;
@@ -25,6 +30,7 @@
import org.junit.jupiter.params.provider.MethodSource;
import java.util.Arrays;
+import java.util.List;
import static com.azure.ai.openai.TestUtils.DISPLAY_NAME_WITH_ARGUMENTS;
import static org.junit.jupiter.api.Assertions.assertEquals;
@@ -274,4 +280,265 @@ public void testCompletionContentFiltering(HttpClient httpClient, OpenAIServiceV
assertNull(completions.getChoices().get(0).getContentFilterResults());
});
}
+
+ @ParameterizedTest(name = DISPLAY_NAME_WITH_ARGUMENTS)
+ @MethodSource("com.azure.ai.openai.TestUtils#getTestParameters")
+ public void testGetAudioTranscriptionJson(HttpClient httpClient, OpenAIServiceVersion serviceVersion) {
+ client = getNonAzureOpenAISyncClient(httpClient);
+
+ getAudioTranscriptionRunnerForNonAzure((modelId, fileName) -> {
+ byte[] file = BinaryData.fromFile(openTestResourceFile(fileName)).toBytes();
+ AudioTranscriptionOptions transcriptionOptions = new AudioTranscriptionOptions(file);
+ transcriptionOptions.setResponseFormat(AudioTranscriptionFormat.JSON);
+
+ AudioTranscription transcription = client.getAudioTranscription(modelId, fileName, transcriptionOptions);
+ assertAudioTranscriptionSimpleJson(transcription, BATMAN_TRANSCRIPTION);
+ });
+ }
+
+ @ParameterizedTest(name = DISPLAY_NAME_WITH_ARGUMENTS)
+ @MethodSource("com.azure.ai.openai.TestUtils#getTestParameters")
+ public void testGetAudioTranscriptionVerboseJson(HttpClient httpClient, OpenAIServiceVersion serviceVersion) {
+ client = getNonAzureOpenAISyncClient(httpClient);
+
+ getAudioTranscriptionRunnerForNonAzure((modelId, fileName) -> {
+ byte[] file = BinaryData.fromFile(openTestResourceFile(fileName)).toBytes();
+ AudioTranscriptionOptions transcriptionOptions = new AudioTranscriptionOptions(file);
+ transcriptionOptions.setResponseFormat(AudioTranscriptionFormat.VERBOSE_JSON);
+
+ AudioTranscription transcription = client.getAudioTranscription(modelId, fileName, transcriptionOptions);
+ assertAudioTranscriptionVerboseJson(transcription, BATMAN_TRANSCRIPTION, AudioTaskLabel.TRANSCRIBE);
+ });
+ }
+
+ @ParameterizedTest(name = DISPLAY_NAME_WITH_ARGUMENTS)
+ @MethodSource("com.azure.ai.openai.TestUtils#getTestParameters")
+ public void testGetAudioTranscriptionTextPlain(HttpClient httpClient, OpenAIServiceVersion serviceVersion) {
+ client = getNonAzureOpenAISyncClient(httpClient);
+
+ getAudioTranscriptionRunnerForNonAzure((modelId, fileName) -> {
+ byte[] file = BinaryData.fromFile(openTestResourceFile(fileName)).toBytes();
+ AudioTranscriptionOptions transcriptionOptions = new AudioTranscriptionOptions(file);
+ transcriptionOptions.setResponseFormat(AudioTranscriptionFormat.TEXT);
+
+ String transcription = client.getAudioTranscriptionText(modelId, fileName, transcriptionOptions);
+ // A plain/text request adds a line break as an artifact. Also observed for translations
+ assertEquals(BATMAN_TRANSCRIPTION + "\n", transcription);
+ });
+ }
+
+ @ParameterizedTest(name = DISPLAY_NAME_WITH_ARGUMENTS)
+ @MethodSource("com.azure.ai.openai.TestUtils#getTestParameters")
+ public void testGetAudioTranscriptionSrt(HttpClient httpClient, OpenAIServiceVersion serviceVersion) {
+ client = getNonAzureOpenAISyncClient(httpClient);
+
+ getAudioTranscriptionRunnerForNonAzure((modelId, fileName) -> {
+ byte[] file = BinaryData.fromFile(openTestResourceFile(fileName)).toBytes();
+ AudioTranscriptionOptions transcriptionOptions = new AudioTranscriptionOptions(file);
+ transcriptionOptions.setResponseFormat(AudioTranscriptionFormat.SRT);
+
+ String transcription = client.getAudioTranscriptionText(modelId, fileName, transcriptionOptions);
+ // Sequence number
+ assertTrue(transcription.contains("1\n"));
+ // First sequence starts at timestamp 0
+ assertTrue(transcription.contains("00:00:00,000 --> "));
+ // Contains one expected word
+ assertTrue(transcription.contains("Batman"));
+ });
+ }
+
+ @ParameterizedTest(name = DISPLAY_NAME_WITH_ARGUMENTS)
+ @MethodSource("com.azure.ai.openai.TestUtils#getTestParameters")
+ public void testGetAudioTranscriptionVtt(HttpClient httpClient, OpenAIServiceVersion serviceVersion) {
+ client = getNonAzureOpenAISyncClient(httpClient);
+
+ getAudioTranscriptionRunnerForNonAzure((modelId, fileName) -> {
+ byte[] file = BinaryData.fromFile(openTestResourceFile(fileName)).toBytes();
+ AudioTranscriptionOptions transcriptionOptions = new AudioTranscriptionOptions(file);
+ transcriptionOptions.setResponseFormat(AudioTranscriptionFormat.VTT);
+
+ String transcription = client.getAudioTranscriptionText(modelId, fileName, transcriptionOptions);
+ // Start value according to spec
+ assertTrue(transcription.startsWith("WEBVTT\n"));
+ // First sequence starts at timestamp 0. Note: unlike SRT, the millisecond separator is a "."
+ assertTrue(transcription.contains("00:00:00.000 --> "));
+ // Contains at least one expected word
+ assertTrue(transcription.contains("Batman"));
+ });
+ }
+
+ @ParameterizedTest(name = DISPLAY_NAME_WITH_ARGUMENTS)
+ @MethodSource("com.azure.ai.openai.TestUtils#getTestParameters")
+ public void testGetAudioTranscriptionTextWrongFormats(HttpClient httpClient, OpenAIServiceVersion serviceVersion) {
+ client = getNonAzureOpenAISyncClient(httpClient);
+ List wrongFormats = Arrays.asList(
+ AudioTranscriptionFormat.JSON,
+ AudioTranscriptionFormat.VERBOSE_JSON
+ );
+
+ getAudioTranscriptionRunnerForNonAzure((modelId, fileName) -> {
+ byte[] file = BinaryData.fromFile(openTestResourceFile(fileName)).toBytes();
+ AudioTranscriptionOptions transcriptionOptions = new AudioTranscriptionOptions(file);
+
+ for (AudioTranscriptionFormat format: wrongFormats) {
+ transcriptionOptions.setResponseFormat(format);
+ assertThrows(IllegalArgumentException.class, () -> {
+ client.getAudioTranscriptionText(modelId, fileName, transcriptionOptions);
+ });
+ }
+ });
+ }
+
+ @ParameterizedTest(name = DISPLAY_NAME_WITH_ARGUMENTS)
+ @MethodSource("com.azure.ai.openai.TestUtils#getTestParameters")
+ public void testGetAudioTranscriptionJsonWrongFormats(HttpClient httpClient, OpenAIServiceVersion serviceVersion) {
+ client = getNonAzureOpenAISyncClient(httpClient);
+ List wrongFormats = Arrays.asList(
+ AudioTranscriptionFormat.TEXT,
+ AudioTranscriptionFormat.SRT,
+ AudioTranscriptionFormat.VTT
+ );
+
+ getAudioTranscriptionRunnerForNonAzure((modelId, fileName) -> {
+ byte[] file = BinaryData.fromFile(openTestResourceFile(fileName)).toBytes();
+ AudioTranscriptionOptions transcriptionOptions = new AudioTranscriptionOptions(file);
+
+ for (AudioTranscriptionFormat format: wrongFormats) {
+ transcriptionOptions.setResponseFormat(format);
+ assertThrows(IllegalArgumentException.class, () -> {
+ client.getAudioTranscription(modelId, fileName, transcriptionOptions);
+ });
+ }
+ });
+ }
+
+ @ParameterizedTest(name = DISPLAY_NAME_WITH_ARGUMENTS)
+ @MethodSource("com.azure.ai.openai.TestUtils#getTestParameters")
+ public void testGetAudioTranslationJson(HttpClient httpClient, OpenAIServiceVersion serviceVersion) {
+ client = getNonAzureOpenAISyncClient(httpClient);
+
+ getAudioTranslationRunnerForNonAzure((modelId, fileName) -> {
+ byte[] file = BinaryData.fromFile(openTestResourceFile(fileName)).toBytes();
+ AudioTranslationOptions translationOptions = new AudioTranslationOptions(file);
+ translationOptions.setResponseFormat(AudioTranscriptionFormat.JSON);
+
+ AudioTranscription translation = client.getAudioTranslation(modelId, fileName, translationOptions);
+ assertAudioTranscriptionSimpleJson(translation, "It's raining today.");
+ });
+ }
+
+ @ParameterizedTest(name = DISPLAY_NAME_WITH_ARGUMENTS)
+ @MethodSource("com.azure.ai.openai.TestUtils#getTestParameters")
+ public void testGetAudioTranslationVerboseJson(HttpClient httpClient, OpenAIServiceVersion serviceVersion) {
+ client = getNonAzureOpenAISyncClient(httpClient);
+
+ getAudioTranslationRunnerForNonAzure((modelId, fileName) -> {
+ byte[] file = BinaryData.fromFile(openTestResourceFile(fileName)).toBytes();
+ AudioTranslationOptions translationOptions = new AudioTranslationOptions(file);
+ translationOptions.setResponseFormat(AudioTranscriptionFormat.VERBOSE_JSON);
+
+ AudioTranscription translation = client.getAudioTranslation(modelId, fileName, translationOptions);
+ assertAudioTranscriptionVerboseJson(translation, "It's raining today.", AudioTaskLabel.TRANSLATE);
+ });
+ }
+
+ @ParameterizedTest(name = DISPLAY_NAME_WITH_ARGUMENTS)
+ @MethodSource("com.azure.ai.openai.TestUtils#getTestParameters")
+ public void testGetAudioTranslationTextPlain(HttpClient httpClient, OpenAIServiceVersion serviceVersion) {
+ client = getNonAzureOpenAISyncClient(httpClient);
+
+ getAudioTranslationRunnerForNonAzure((modelId, fileName) -> {
+ byte[] file = BinaryData.fromFile(openTestResourceFile(fileName)).toBytes();
+ AudioTranslationOptions translationOptions = new AudioTranslationOptions(file);
+ translationOptions.setResponseFormat(AudioTranscriptionFormat.TEXT);
+
+ String transcription = client.getAudioTranslationText(modelId, fileName, translationOptions);
+ assertEquals("It's raining today.\n", transcription);
+ });
+ }
+
+ @ParameterizedTest(name = DISPLAY_NAME_WITH_ARGUMENTS)
+ @MethodSource("com.azure.ai.openai.TestUtils#getTestParameters")
+ public void testGetAudioTranslationSrt(HttpClient httpClient, OpenAIServiceVersion serviceVersion) {
+ client = getNonAzureOpenAISyncClient(httpClient);
+
+ getAudioTranslationRunnerForNonAzure((modelId, fileName) -> {
+ byte[] file = BinaryData.fromFile(openTestResourceFile(fileName)).toBytes();
+ AudioTranslationOptions translationOptions = new AudioTranslationOptions(file);
+ translationOptions.setResponseFormat(AudioTranscriptionFormat.SRT);
+
+ String transcription = client.getAudioTranslationText(modelId, fileName, translationOptions);
+ // Sequence number
+ assertTrue(transcription.contains("1\n"));
+ // First sequence starts at timestamp 0
+ assertTrue(transcription.contains("00:00:00,000 --> "));
+ // Actual translation value
+ assertTrue(transcription.contains("It's raining today."));
+ });
+ }
+
+ @ParameterizedTest(name = DISPLAY_NAME_WITH_ARGUMENTS)
+ @MethodSource("com.azure.ai.openai.TestUtils#getTestParameters")
+ public void testGetAudioTranslationVtt(HttpClient httpClient, OpenAIServiceVersion serviceVersion) {
+ client = getNonAzureOpenAISyncClient(httpClient);
+
+ getAudioTranslationRunnerForNonAzure((modelId, fileName) -> {
+ byte[] file = BinaryData.fromFile(openTestResourceFile(fileName)).toBytes();
+ AudioTranslationOptions translationOptions = new AudioTranslationOptions(file);
+ translationOptions.setResponseFormat(AudioTranscriptionFormat.VTT);
+
+ String transcription = client.getAudioTranslationText(modelId, fileName, translationOptions);
+ // Start value according to spec
+ assertTrue(transcription.startsWith("WEBVTT\n"));
+ // First sequence starts at timestamp 0. Note: unlike SRT, the millisecond separator is a "."
+ assertTrue(transcription.contains("00:00:00.000 --> "));
+ // Actual translation value
+ assertTrue(transcription.contains("It's raining today."));
+ });
+ }
+
+ @ParameterizedTest(name = DISPLAY_NAME_WITH_ARGUMENTS)
+ @MethodSource("com.azure.ai.openai.TestUtils#getTestParameters")
+ public void testGetAudioTranslationTextWrongFormats(HttpClient httpClient, OpenAIServiceVersion serviceVersion) {
+ client = getNonAzureOpenAISyncClient(httpClient);
+ List wrongFormats = Arrays.asList(
+ AudioTranscriptionFormat.JSON,
+ AudioTranscriptionFormat.VERBOSE_JSON
+ );
+
+ getAudioTranslationRunnerForNonAzure((modelId, fileName) -> {
+ byte[] file = BinaryData.fromFile(openTestResourceFile(fileName)).toBytes();
+ AudioTranslationOptions translationOptions = new AudioTranslationOptions(file);
+
+ for (AudioTranscriptionFormat format: wrongFormats) {
+ translationOptions.setResponseFormat(format);
+ assertThrows(IllegalArgumentException.class, () -> {
+ client.getAudioTranslationText(modelId, fileName, translationOptions);
+ });
+ }
+ });
+ }
+
+ @ParameterizedTest(name = DISPLAY_NAME_WITH_ARGUMENTS)
+ @MethodSource("com.azure.ai.openai.TestUtils#getTestParameters")
+ public void testGetAudioTranslationJsonWrongFormats(HttpClient httpClient, OpenAIServiceVersion serviceVersion) {
+ client = getNonAzureOpenAISyncClient(httpClient);
+ List wrongFormats = Arrays.asList(
+ AudioTranscriptionFormat.TEXT,
+ AudioTranscriptionFormat.SRT,
+ AudioTranscriptionFormat.VTT
+ );
+
+ getAudioTranslationRunnerForNonAzure((modelId, fileName) -> {
+ byte[] file = BinaryData.fromFile(openTestResourceFile(fileName)).toBytes();
+ AudioTranslationOptions translationOptions = new AudioTranslationOptions(file);
+
+ for (AudioTranscriptionFormat format: wrongFormats) {
+ translationOptions.setResponseFormat(format);
+ assertThrows(IllegalArgumentException.class, () -> {
+ client.getAudioTranslation(modelId, fileName, translationOptions);
+ });
+ }
+ });
+ }
}
diff --git a/sdk/openai/azure-ai-openai/src/test/java/com/azure/ai/openai/OpenAIAsyncClientTest.java b/sdk/openai/azure-ai-openai/src/test/java/com/azure/ai/openai/OpenAIAsyncClientTest.java
index 0c99aa4a6fb4..44987bd6c7c4 100644
--- a/sdk/openai/azure-ai-openai/src/test/java/com/azure/ai/openai/OpenAIAsyncClientTest.java
+++ b/sdk/openai/azure-ai-openai/src/test/java/com/azure/ai/openai/OpenAIAsyncClientTest.java
@@ -4,6 +4,10 @@
package com.azure.ai.openai;
import com.azure.ai.openai.functions.MyFunctionCallArguments;
+import com.azure.ai.openai.models.AudioTaskLabel;
+import com.azure.ai.openai.models.AudioTranscriptionFormat;
+import com.azure.ai.openai.models.AudioTranscriptionOptions;
+import com.azure.ai.openai.models.AudioTranslationOptions;
import com.azure.ai.openai.models.AzureChatExtensionConfiguration;
import com.azure.ai.openai.models.AzureChatExtensionType;
import com.azure.ai.openai.models.AzureCognitiveSearchChatExtensionConfiguration;
@@ -31,13 +35,10 @@
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Iterator;
+import java.util.List;
import static com.azure.ai.openai.TestUtils.DISPLAY_NAME_WITH_ARGUMENTS;
-import static org.junit.jupiter.api.Assertions.assertEquals;
-import static org.junit.jupiter.api.Assertions.assertInstanceOf;
-import static org.junit.jupiter.api.Assertions.assertNotNull;
-import static org.junit.jupiter.api.Assertions.assertNull;
-import static org.junit.jupiter.api.Assertions.assertTrue;
+import static org.junit.jupiter.api.Assertions.*;
public class OpenAIAsyncClientTest extends OpenAIClientTestBase {
private OpenAIAsyncClient client;
@@ -294,7 +295,7 @@ public void testChatFunctionNotSuppliedByNamePreset(HttpClient httpClient, OpenA
@ParameterizedTest(name = DISPLAY_NAME_WITH_ARGUMENTS)
@MethodSource("com.azure.ai.openai.TestUtils#getTestParameters")
public void testChatCompletionContentFiltering(HttpClient httpClient, OpenAIServiceVersion serviceVersion) {
- client = getOpenAIAsyncClient(httpClient, serviceVersion);
+ client = getOpenAIAsyncClient(httpClient, OpenAIServiceVersion.V2023_08_01_PREVIEW);
getChatCompletionsContentFilterRunner((modelId, chatMessages) -> {
StepVerifier.create(client.getChatCompletions(modelId, new ChatCompletionsOptions(chatMessages)))
.assertNext(chatCompletions -> {
@@ -310,7 +311,7 @@ public void testChatCompletionContentFiltering(HttpClient httpClient, OpenAIServ
@ParameterizedTest(name = DISPLAY_NAME_WITH_ARGUMENTS)
@MethodSource("com.azure.ai.openai.TestUtils#getTestParameters")
public void testChatCompletionStreamContentFiltering(HttpClient httpClient, OpenAIServiceVersion serviceVersion) {
- client = getOpenAIAsyncClient(httpClient, serviceVersion);
+ client = getOpenAIAsyncClient(httpClient, OpenAIServiceVersion.V2023_08_01_PREVIEW);
getChatCompletionsContentFilterRunner((modelId, chatMessages) -> {
StepVerifier.create(client.getChatCompletionsStream(modelId, new ChatCompletionsOptions(chatMessages)))
.recordWith(ArrayList::new)
@@ -362,7 +363,7 @@ public void testChatCompletionStreamContentFiltering(HttpClient httpClient, Open
@ParameterizedTest(name = DISPLAY_NAME_WITH_ARGUMENTS)
@MethodSource("com.azure.ai.openai.TestUtils#getTestParameters")
public void testCompletionContentFiltering(HttpClient httpClient, OpenAIServiceVersion serviceVersion) {
- client = getOpenAIAsyncClient(httpClient, serviceVersion);
+ client = getOpenAIAsyncClient(httpClient, OpenAIServiceVersion.V2023_08_01_PREVIEW);
getCompletionsContentFilterRunner((modelId, prompt) -> {
CompletionsOptions completionsOptions = new CompletionsOptions(Arrays.asList(prompt));
// work around for this model, there seem to be some issues with Completions in gpt-turbo models
@@ -380,7 +381,7 @@ public void testCompletionContentFiltering(HttpClient httpClient, OpenAIServiceV
@ParameterizedTest(name = DISPLAY_NAME_WITH_ARGUMENTS)
@MethodSource("com.azure.ai.openai.TestUtils#getTestParameters")
public void testCompletionStreamContentFiltering(HttpClient httpClient, OpenAIServiceVersion serviceVersion) {
- client = getOpenAIAsyncClient(httpClient, serviceVersion);
+ client = getOpenAIAsyncClient(httpClient, OpenAIServiceVersion.V2023_08_01_PREVIEW);
getCompletionsContentFilterRunner((modelId, prompt) -> {
CompletionsOptions completionsOptions = new CompletionsOptions(Arrays.asList(prompt));
// work around for this model, there seem to be some issues with Completions in gpt-turbo models
@@ -427,7 +428,7 @@ public void testCompletionStreamContentFiltering(HttpClient httpClient, OpenAISe
@ParameterizedTest(name = DISPLAY_NAME_WITH_ARGUMENTS)
@MethodSource("com.azure.ai.openai.TestUtils#getTestParameters")
public void testChatCompletionsBasicSearchExtension(HttpClient httpClient, OpenAIServiceVersion serviceVersion) {
- client = getOpenAIAsyncClient(httpClient, serviceVersion);
+ client = getOpenAIAsyncClient(httpClient, OpenAIServiceVersion.V2023_08_01_PREVIEW);
getChatCompletionsAzureChatSearchRunner((deploymentName, chatCompletionsOptions) -> {
AzureCognitiveSearchChatExtensionConfiguration cognitiveSearchConfiguration =
@@ -452,7 +453,7 @@ public void testChatCompletionsBasicSearchExtension(HttpClient httpClient, OpenA
@ParameterizedTest(name = DISPLAY_NAME_WITH_ARGUMENTS)
@MethodSource("com.azure.ai.openai.TestUtils#getTestParameters")
public void testChatCompletionsStreamingBasicSearchExtension(HttpClient httpClient, OpenAIServiceVersion serviceVersion) {
- client = getOpenAIAsyncClient(httpClient, serviceVersion);
+ client = getOpenAIAsyncClient(httpClient, OpenAIServiceVersion.V2023_08_01_PREVIEW);
getChatCompletionsAzureChatSearchRunner((deploymentName, chatCompletionsOptions) -> {
AzureCognitiveSearchChatExtensionConfiguration cognitiveSearchConfiguration =
@@ -475,4 +476,281 @@ public void testChatCompletionsStreamingBasicSearchExtension(HttpClient httpClie
.verifyComplete();
});
}
+
+ @ParameterizedTest(name = DISPLAY_NAME_WITH_ARGUMENTS)
+ @MethodSource("com.azure.ai.openai.TestUtils#getTestParameters")
+ public void testGetAudioTranscriptionJson(HttpClient httpClient, OpenAIServiceVersion serviceVersion) {
+ client = getOpenAIAsyncClient(httpClient, serviceVersion);
+
+ getAudioTranscriptionRunner((deploymentName, fileName) -> {
+ byte[] file = BinaryData.fromFile(openTestResourceFile(fileName)).toBytes();
+ AudioTranscriptionOptions transcriptionOptions = new AudioTranscriptionOptions(file);
+ transcriptionOptions.setResponseFormat(AudioTranscriptionFormat.JSON);
+
+ StepVerifier.create(client.getAudioTranscription(deploymentName, fileName, transcriptionOptions))
+ .assertNext(transcription ->
+ assertAudioTranscriptionSimpleJson(transcription, BATMAN_TRANSCRIPTION))
+ .verifyComplete();
+ });
+ }
+
+ @ParameterizedTest(name = DISPLAY_NAME_WITH_ARGUMENTS)
+ @MethodSource("com.azure.ai.openai.TestUtils#getTestParameters")
+ public void testGetAudioTranscriptionVerboseJson(HttpClient httpClient, OpenAIServiceVersion serviceVersion) {
+ client = getOpenAIAsyncClient(httpClient, serviceVersion);
+
+ getAudioTranscriptionRunner((deploymentName, fileName) -> {
+ byte[] file = BinaryData.fromFile(openTestResourceFile(fileName)).toBytes();
+ AudioTranscriptionOptions transcriptionOptions = new AudioTranscriptionOptions(file);
+ transcriptionOptions.setResponseFormat(AudioTranscriptionFormat.VERBOSE_JSON);
+
+ StepVerifier.create(client.getAudioTranscription(deploymentName, fileName, transcriptionOptions))
+ .assertNext(transcription ->
+ assertAudioTranscriptionVerboseJson(transcription, BATMAN_TRANSCRIPTION, AudioTaskLabel.TRANSCRIBE))
+ .verifyComplete();
+ });
+ }
+
+ @ParameterizedTest(name = DISPLAY_NAME_WITH_ARGUMENTS)
+ @MethodSource("com.azure.ai.openai.TestUtils#getTestParameters")
+ public void testGetAudioTranscriptionTextPlain(HttpClient httpClient, OpenAIServiceVersion serviceVersion) {
+ client = getOpenAIAsyncClient(httpClient, serviceVersion);
+
+ getAudioTranscriptionRunner((deploymentName, fileName) -> {
+ byte[] file = BinaryData.fromFile(openTestResourceFile(fileName)).toBytes();
+ AudioTranscriptionOptions transcriptionOptions = new AudioTranscriptionOptions(file);
+ transcriptionOptions.setResponseFormat(AudioTranscriptionFormat.TEXT);
+
+ StepVerifier.create(client.getAudioTranscriptionText(deploymentName, fileName, transcriptionOptions))
+ .assertNext(transcription ->
+ // A plain/text request adds a line break as an artifact. Also observed for translations
+ assertEquals(BATMAN_TRANSCRIPTION + "\n", transcription))
+ .verifyComplete();
+ });
+ }
+
+ @ParameterizedTest(name = DISPLAY_NAME_WITH_ARGUMENTS)
+ @MethodSource("com.azure.ai.openai.TestUtils#getTestParameters")
+ public void testGetAudioTranscriptionSrt(HttpClient httpClient, OpenAIServiceVersion serviceVersion) {
+ client = getOpenAIAsyncClient(httpClient, serviceVersion);
+
+ getAudioTranscriptionRunner((deploymentName, fileName) -> {
+ byte[] file = BinaryData.fromFile(openTestResourceFile(fileName)).toBytes();
+ AudioTranscriptionOptions transcriptionOptions = new AudioTranscriptionOptions(file);
+ transcriptionOptions.setResponseFormat(AudioTranscriptionFormat.SRT);
+
+ StepVerifier.create(client.getAudioTranscriptionText(deploymentName, fileName, transcriptionOptions))
+ .assertNext(translation -> {
+ // 1st Sequence number
+ assertTrue(translation.contains("1\n"));
+ // First sequence starts at timestamp 0
+ assertTrue(translation.contains("00:00:00,000 --> "));
+ // Transcription contains at least one expected word
+ assertTrue(translation.contains("Batman"));
+ }).verifyComplete();
+ });
+ }
+
+ @ParameterizedTest(name = DISPLAY_NAME_WITH_ARGUMENTS)
+ @MethodSource("com.azure.ai.openai.TestUtils#getTestParameters")
+ public void testGetAudioTranscriptionVtt(HttpClient httpClient, OpenAIServiceVersion serviceVersion) {
+ client = getOpenAIAsyncClient(httpClient, serviceVersion);
+
+ getAudioTranscriptionRunner((deploymentName, fileName) -> {
+ byte[] file = BinaryData.fromFile(openTestResourceFile(fileName)).toBytes();
+ AudioTranscriptionOptions transcriptionOptions = new AudioTranscriptionOptions(file);
+ transcriptionOptions.setResponseFormat(AudioTranscriptionFormat.VTT);
+
+ StepVerifier.create(client.getAudioTranscriptionText(deploymentName, fileName, transcriptionOptions))
+ .assertNext(translation -> {
+ // Start value according to spec
+ assertTrue(translation.startsWith("WEBVTT\n"));
+ // First sequence starts at timestamp 0. Note: unlike SRT, the millisecond separator is a "."
+ assertTrue(translation.contains("00:00:00.000 --> "));
+ // Transcription contains at least one expected word
+ assertTrue(translation.contains("Batman"));
+ }).verifyComplete();
+ });
+ }
+
+ @ParameterizedTest(name = DISPLAY_NAME_WITH_ARGUMENTS)
+ @MethodSource("com.azure.ai.openai.TestUtils#getTestParameters")
+ public void testGetAudioTranscriptionTextWrongFormats(HttpClient httpClient, OpenAIServiceVersion serviceVersion) {
+ client = getOpenAIAsyncClient(httpClient, serviceVersion);
+ List wrongFormats = Arrays.asList(
+ AudioTranscriptionFormat.JSON,
+ AudioTranscriptionFormat.VERBOSE_JSON
+ );
+
+ getAudioTranscriptionRunner((deploymentName, fileName) -> {
+ byte[] file = BinaryData.fromFile(openTestResourceFile(fileName)).toBytes();
+ AudioTranscriptionOptions transcriptionOptions = new AudioTranscriptionOptions(file);
+
+ for (AudioTranscriptionFormat format: wrongFormats) {
+ transcriptionOptions.setResponseFormat(format);
+ StepVerifier.create(client.getAudioTranscriptionText(deploymentName, fileName, transcriptionOptions))
+ .verifyErrorSatisfies(error -> assertTrue(error instanceof IllegalArgumentException));
+ }
+ });
+ }
+
+ @ParameterizedTest(name = DISPLAY_NAME_WITH_ARGUMENTS)
+ @MethodSource("com.azure.ai.openai.TestUtils#getTestParameters")
+ public void testGetAudioTranscriptionJsonWrongFormats(HttpClient httpClient, OpenAIServiceVersion serviceVersion) {
+ client = getOpenAIAsyncClient(httpClient, serviceVersion);
+ List wrongFormats = Arrays.asList(
+ AudioTranscriptionFormat.TEXT,
+ AudioTranscriptionFormat.SRT,
+ AudioTranscriptionFormat.VTT
+ );
+
+ getAudioTranscriptionRunner((deploymentName, fileName) -> {
+ byte[] file = BinaryData.fromFile(openTestResourceFile(fileName)).toBytes();
+ AudioTranscriptionOptions transcriptionOptions = new AudioTranscriptionOptions(file);
+
+ for (AudioTranscriptionFormat format: wrongFormats) {
+ transcriptionOptions.setResponseFormat(format);
+ StepVerifier.create(client.getAudioTranscription(deploymentName, fileName, transcriptionOptions))
+ .verifyErrorSatisfies(error -> assertTrue(error instanceof IllegalArgumentException));
+ }
+ });
+ }
+
+ @ParameterizedTest(name = DISPLAY_NAME_WITH_ARGUMENTS)
+ @MethodSource("com.azure.ai.openai.TestUtils#getTestParameters")
+ public void testGetAudioTranslationJson(HttpClient httpClient, OpenAIServiceVersion serviceVersion) {
+ client = getOpenAIAsyncClient(httpClient, serviceVersion);
+
+ getAudioTranslationRunner((deploymentName, fileName) -> {
+ byte[] file = BinaryData.fromFile(openTestResourceFile(fileName)).toBytes();
+ AudioTranslationOptions translationOptions = new AudioTranslationOptions(file);
+ translationOptions.setResponseFormat(AudioTranscriptionFormat.JSON);
+
+ StepVerifier.create(client.getAudioTranslation(deploymentName, fileName, translationOptions))
+ .assertNext(translation ->
+ assertAudioTranscriptionSimpleJson(translation, "It's raining today."))
+ .verifyComplete();
+ });
+ }
+
+ @ParameterizedTest(name = DISPLAY_NAME_WITH_ARGUMENTS)
+ @MethodSource("com.azure.ai.openai.TestUtils#getTestParameters")
+ public void testGetAudioTranslationVerboseJson(HttpClient httpClient, OpenAIServiceVersion serviceVersion) {
+ client = getOpenAIAsyncClient(httpClient, serviceVersion);
+
+ getAudioTranslationRunner((deploymentName, fileName) -> {
+ byte[] file = BinaryData.fromFile(openTestResourceFile(fileName)).toBytes();
+ AudioTranslationOptions translationOptions = new AudioTranslationOptions(file);
+ translationOptions.setResponseFormat(AudioTranscriptionFormat.VERBOSE_JSON);
+
+ StepVerifier.create(client.getAudioTranslation(deploymentName, fileName, translationOptions))
+ .assertNext(translation ->
+ assertAudioTranscriptionVerboseJson(translation, "It's raining today.", AudioTaskLabel.TRANSLATE))
+ .verifyComplete();
+ });
+ }
+
+ @ParameterizedTest(name = DISPLAY_NAME_WITH_ARGUMENTS)
+ @MethodSource("com.azure.ai.openai.TestUtils#getTestParameters")
+ public void testGetAudioTranslationTextPlain(HttpClient httpClient, OpenAIServiceVersion serviceVersion) {
+ client = getOpenAIAsyncClient(httpClient, serviceVersion);
+
+ getAudioTranslationRunner((deploymentName, fileName) -> {
+ byte[] file = BinaryData.fromFile(openTestResourceFile(fileName)).toBytes();
+ AudioTranslationOptions translationOptions = new AudioTranslationOptions(file);
+ translationOptions.setResponseFormat(AudioTranscriptionFormat.TEXT);
+
+ StepVerifier.create(client.getAudioTranslationText(deploymentName, fileName, translationOptions))
+ .assertNext(translation -> {
+ assertEquals("It's raining today.\n", translation);
+ }).verifyComplete();
+ });
+ }
+
+ @ParameterizedTest(name = DISPLAY_NAME_WITH_ARGUMENTS)
+ @MethodSource("com.azure.ai.openai.TestUtils#getTestParameters")
+ public void testGetAudioTranslationSrt(HttpClient httpClient, OpenAIServiceVersion serviceVersion) {
+ client = getOpenAIAsyncClient(httpClient, serviceVersion);
+
+ getAudioTranslationRunner((deploymentName, fileName) -> {
+ byte[] file = BinaryData.fromFile(openTestResourceFile(fileName)).toBytes();
+ AudioTranslationOptions translationOptions = new AudioTranslationOptions(file);
+ translationOptions.setResponseFormat(AudioTranscriptionFormat.SRT);
+
+ StepVerifier.create(client.getAudioTranslationText(deploymentName, fileName, translationOptions))
+ .assertNext(translation -> {
+ // Sequence number
+ assertTrue(translation.contains("1\n"));
+ // First sequence starts at timestamp 0
+ assertTrue(translation.contains("00:00:00,000 --> "));
+ // Actual translation value
+ assertTrue(translation.contains("It's raining today."));
+ }).verifyComplete();
+ });
+ }
+
+ @ParameterizedTest(name = DISPLAY_NAME_WITH_ARGUMENTS)
+ @MethodSource("com.azure.ai.openai.TestUtils#getTestParameters")
+ public void testGetAudioTranslationVtt(HttpClient httpClient, OpenAIServiceVersion serviceVersion) {
+ client = getOpenAIAsyncClient(httpClient, serviceVersion);
+
+ getAudioTranslationRunner((deploymentName, fileName) -> {
+ byte[] file = BinaryData.fromFile(openTestResourceFile(fileName)).toBytes();
+ AudioTranslationOptions translationOptions = new AudioTranslationOptions(file);
+ translationOptions.setResponseFormat(AudioTranscriptionFormat.VTT);
+
+ StepVerifier.create(client.getAudioTranslationText(deploymentName, fileName, translationOptions))
+ .assertNext(translation -> {
+ // Start value according to spec
+ assertTrue(translation.startsWith("WEBVTT\n"));
+ // First sequence starts at timestamp 0. Note: unlike SRT, the millisecond separator is a "."
+ assertTrue(translation.contains("00:00:00.000 --> "));
+ // Actual translation value
+ assertTrue(translation.contains("It's raining today."));
+ }).verifyComplete();
+ });
+ }
+
+ @ParameterizedTest(name = DISPLAY_NAME_WITH_ARGUMENTS)
+ @MethodSource("com.azure.ai.openai.TestUtils#getTestParameters")
+ public void testGetAudioTranslationTextWrongFormats(HttpClient httpClient, OpenAIServiceVersion serviceVersion) {
+ client = getOpenAIAsyncClient(httpClient, serviceVersion);
+ List wrongFormats = Arrays.asList(
+ AudioTranscriptionFormat.JSON,
+ AudioTranscriptionFormat.VERBOSE_JSON
+ );
+
+ getAudioTranslationRunner((deploymentName, fileName) -> {
+ byte[] file = BinaryData.fromFile(openTestResourceFile(fileName)).toBytes();
+ AudioTranslationOptions translationOptions = new AudioTranslationOptions(file);
+
+ for (AudioTranscriptionFormat format: wrongFormats) {
+ translationOptions.setResponseFormat(format);
+ StepVerifier.create(client.getAudioTranslationText(deploymentName, fileName, translationOptions))
+ .verifyErrorSatisfies(error -> assertTrue(error instanceof IllegalArgumentException));
+ }
+ });
+ }
+
+ @ParameterizedTest(name = DISPLAY_NAME_WITH_ARGUMENTS)
+ @MethodSource("com.azure.ai.openai.TestUtils#getTestParameters")
+ public void testGetAudioTranslationJsonWrongFormats(HttpClient httpClient, OpenAIServiceVersion serviceVersion) {
+ client = getOpenAIAsyncClient(httpClient, serviceVersion);
+ List wrongFormats = Arrays.asList(
+ AudioTranscriptionFormat.TEXT,
+ AudioTranscriptionFormat.SRT,
+ AudioTranscriptionFormat.VTT
+ );
+
+ getAudioTranslationRunner((deploymentName, fileName) -> {
+ byte[] file = BinaryData.fromFile(openTestResourceFile(fileName)).toBytes();
+ AudioTranslationOptions translationOptions = new AudioTranslationOptions(file);
+
+ for (AudioTranscriptionFormat format: wrongFormats) {
+ translationOptions.setResponseFormat(format);
+ StepVerifier.create(client.getAudioTranslation(deploymentName, fileName, translationOptions))
+ .verifyErrorSatisfies(error -> assertTrue(error instanceof IllegalArgumentException));
+ }
+ });
+ }
}
diff --git a/sdk/openai/azure-ai-openai/src/test/java/com/azure/ai/openai/OpenAIClientTestBase.java b/sdk/openai/azure-ai-openai/src/test/java/com/azure/ai/openai/OpenAIClientTestBase.java
index 9b8cb0014cd0..a3364ee90bdc 100644
--- a/sdk/openai/azure-ai-openai/src/test/java/com/azure/ai/openai/OpenAIClientTestBase.java
+++ b/sdk/openai/azure-ai-openai/src/test/java/com/azure/ai/openai/OpenAIClientTestBase.java
@@ -5,6 +5,8 @@
package com.azure.ai.openai;
import com.azure.ai.openai.functions.Parameters;
+import com.azure.ai.openai.models.AudioTaskLabel;
+import com.azure.ai.openai.models.AudioTranscription;
import com.azure.ai.openai.models.AzureChatExtensionsMessageContext;
import com.azure.ai.openai.models.ChatChoice;
import com.azure.ai.openai.models.ChatCompletions;
@@ -26,6 +28,8 @@
import com.azure.core.credential.AzureKeyCredential;
import com.azure.core.credential.KeyCredential;
import com.azure.core.http.HttpClient;
+import com.azure.core.http.policy.HttpLogDetailLevel;
+import com.azure.core.http.policy.HttpLogOptions;
import com.azure.core.http.rest.Response;
import com.azure.core.test.TestMode;
import com.azure.core.test.TestProxyTestBase;
@@ -35,6 +39,8 @@
import com.azure.core.util.Configuration;
import org.junit.jupiter.api.Test;
+import java.nio.file.Path;
+import java.nio.file.Paths;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.List;
@@ -55,7 +61,7 @@ public abstract class OpenAIClientTestBase extends TestProxyTestBase {
OpenAIClientBuilder getOpenAIClientBuilder(HttpClient httpClient, OpenAIServiceVersion serviceVersion) {
OpenAIClientBuilder builder = new OpenAIClientBuilder()
-// .httpLogOptions(new HttpLogOptions().setLogLevel(HttpLogDetailLevel.BODY_AND_HEADERS))
+ .httpLogOptions(new HttpLogOptions().setLogLevel(HttpLogDetailLevel.BODY_AND_HEADERS))
.httpClient(httpClient)
.serviceVersion(serviceVersion);
@@ -122,7 +128,6 @@ protected String getAzureCognitiveSearchKey() {
}
}
-
@Test
public abstract void testGetCompletions(HttpClient httpClient, OpenAIServiceVersion serviceVersion);
@@ -206,6 +211,22 @@ void getCompletionsContentFilterRunnerForNonAzure(BiConsumer tes
testRunner.accept("text-davinci-002", "What is 3 times 4?");
}
+ void getAudioTranscriptionRunner(BiConsumer testRunner) {
+ testRunner.accept("whisper-deployment", "batman.wav");
+ }
+
+ void getAudioTranslationRunner(BiConsumer testRunner) {
+ testRunner.accept("whisper-deployment", "JP_it_is_rainy_today.wav");
+ }
+
+ void getAudioTranscriptionRunnerForNonAzure(BiConsumer testRunner) {
+ testRunner.accept("whisper-1", "batman.wav");
+ }
+
+ void getAudioTranslationRunnerForNonAzure(BiConsumer testRunner) {
+ testRunner.accept("whisper-1", "JP_it_is_rainy_today.wav");
+ }
+
private List getChatMessages() {
List chatMessages = new ArrayList<>();
chatMessages.add(new ChatMessage(ChatRole.SYSTEM, "You are a helpful assistant. You will talk like a pirate."));
@@ -229,6 +250,10 @@ private ChatCompletionsOptions getChatMessagesWithFunction() {
return chatCompletionOptions;
}
+ static Path openTestResourceFile(String fileName) {
+ return Paths.get("src/test/resources/" + fileName);
+ }
+
static void assertCompletions(int choicesPerPrompt, Completions actual) {
assertCompletions(choicesPerPrompt, "stop", actual);
}
@@ -413,4 +438,42 @@ static void assertChatCompletionsStreamingCognitiveSearch(Stream {
ChatCompletions chatCompletions = client.getChatCompletions(modelId, new ChatCompletionsOptions(chatMessages));
assertSafeContentFilterResults(chatCompletions.getPromptFilterResults().get(0).getContentFilterResults());
@@ -260,7 +266,7 @@ public void testChatCompletionContentFiltering(HttpClient httpClient, OpenAIServ
@ParameterizedTest(name = DISPLAY_NAME_WITH_ARGUMENTS)
@MethodSource("com.azure.ai.openai.TestUtils#getTestParameters")
public void testChatCompletionStreamContentFiltering(HttpClient httpClient, OpenAIServiceVersion serviceVersion) {
- client = getOpenAIClient(httpClient, serviceVersion);
+ client = getOpenAIClient(httpClient, OpenAIServiceVersion.V2023_08_01_PREVIEW);
getChatCompletionsContentFilterRunner((modelId, chatMessages) -> {
IterableStream messageList = client.getChatCompletionsStream(modelId, new ChatCompletionsOptions(chatMessages));
@@ -306,7 +312,7 @@ public void testChatCompletionStreamContentFiltering(HttpClient httpClient, Open
@ParameterizedTest(name = DISPLAY_NAME_WITH_ARGUMENTS)
@MethodSource("com.azure.ai.openai.TestUtils#getTestParameters")
public void testCompletionContentFiltering(HttpClient httpClient, OpenAIServiceVersion serviceVersion) {
- client = getOpenAIClient(httpClient, serviceVersion);
+ client = getOpenAIClient(httpClient, OpenAIServiceVersion.V2023_08_01_PREVIEW);
getCompletionsContentFilterRunner((modelId, prompt) -> {
CompletionsOptions completionsOptions = new CompletionsOptions(Arrays.asList(prompt));
// work around for this model, there seem to be some issues with Completions in gpt-turbo models
@@ -358,7 +364,7 @@ public void testCompletionStreamContentFiltering(HttpClient httpClient, OpenAISe
@ParameterizedTest(name = DISPLAY_NAME_WITH_ARGUMENTS)
@MethodSource("com.azure.ai.openai.TestUtils#getTestParameters")
public void testChatCompletionsBasicSearchExtension(HttpClient httpClient, OpenAIServiceVersion serviceVersion) {
- client = getOpenAIClient(httpClient, serviceVersion);
+ client = getOpenAIClient(httpClient, OpenAIServiceVersion.V2023_08_01_PREVIEW);
getChatCompletionsAzureChatSearchRunner((deploymentName, chatCompletionsOptions) -> {
AzureCognitiveSearchChatExtensionConfiguration cognitiveSearchConfiguration =
@@ -382,7 +388,7 @@ public void testChatCompletionsBasicSearchExtension(HttpClient httpClient, OpenA
@ParameterizedTest(name = DISPLAY_NAME_WITH_ARGUMENTS)
@MethodSource("com.azure.ai.openai.TestUtils#getTestParameters")
public void testChatCompletionsStreamingBasicSearchExtension(HttpClient httpClient, OpenAIServiceVersion serviceVersion) {
- client = getOpenAIClient(httpClient, serviceVersion);
+ client = getOpenAIClient(httpClient, OpenAIServiceVersion.V2023_08_01_PREVIEW);
getChatCompletionsAzureChatSearchRunner((deploymentName, chatCompletionsOptions) -> {
AzureCognitiveSearchChatExtensionConfiguration cognitiveSearchConfiguration =
@@ -402,4 +408,263 @@ public void testChatCompletionsStreamingBasicSearchExtension(HttpClient httpClie
assertChatCompletionsStreamingCognitiveSearch(resultChatCompletions.stream());
});
}
+
+ @ParameterizedTest(name = DISPLAY_NAME_WITH_ARGUMENTS)
+ @MethodSource("com.azure.ai.openai.TestUtils#getTestParameters")
+ public void testGetAudioTranscriptionJson(HttpClient httpClient, OpenAIServiceVersion serviceVersion) {
+ client = getOpenAIClient(httpClient, serviceVersion);
+
+ getAudioTranscriptionRunner((deploymentName, fileName) -> {
+ byte[] file = BinaryData.fromFile(openTestResourceFile(fileName)).toBytes();
+ AudioTranscriptionOptions transcriptionOptions = new AudioTranscriptionOptions(file);
+ transcriptionOptions.setResponseFormat(AudioTranscriptionFormat.JSON);
+
+ AudioTranscription transcription = client.getAudioTranscription(deploymentName, fileName, transcriptionOptions);
+ assertAudioTranscriptionSimpleJson(transcription, BATMAN_TRANSCRIPTION);
+ });
+ }
+
+ @ParameterizedTest(name = DISPLAY_NAME_WITH_ARGUMENTS)
+ @MethodSource("com.azure.ai.openai.TestUtils#getTestParameters")
+ public void testGetAudioTranscriptionVerboseJson(HttpClient httpClient, OpenAIServiceVersion serviceVersion) {
+ client = getOpenAIClient(httpClient, serviceVersion);
+
+ getAudioTranscriptionRunner((deploymentName, fileName) -> {
+ byte[] file = BinaryData.fromFile(openTestResourceFile(fileName)).toBytes();
+ AudioTranscriptionOptions transcriptionOptions = new AudioTranscriptionOptions(file);
+ transcriptionOptions.setResponseFormat(AudioTranscriptionFormat.VERBOSE_JSON);
+
+ AudioTranscription transcription = client.getAudioTranscription(deploymentName, fileName, transcriptionOptions);
+ assertAudioTranscriptionVerboseJson(transcription, BATMAN_TRANSCRIPTION, AudioTaskLabel.TRANSCRIBE);
+ });
+ }
+
+ @ParameterizedTest(name = DISPLAY_NAME_WITH_ARGUMENTS)
+ @MethodSource("com.azure.ai.openai.TestUtils#getTestParameters")
+ public void testGetAudioTranscriptionTextPlain(HttpClient httpClient, OpenAIServiceVersion serviceVersion) {
+ client = getOpenAIClient(httpClient, serviceVersion);
+
+ getAudioTranscriptionRunner((deploymentName, fileName) -> {
+ byte[] file = BinaryData.fromFile(openTestResourceFile(fileName)).toBytes();
+ AudioTranscriptionOptions transcriptionOptions = new AudioTranscriptionOptions(file);
+ transcriptionOptions.setResponseFormat(AudioTranscriptionFormat.TEXT);
+
+ String transcription = client.getAudioTranscriptionText(deploymentName, fileName, transcriptionOptions);
+ // A plain/text request adds a line break as an artifact. Also observed for translations
+ assertEquals(BATMAN_TRANSCRIPTION + "\n", transcription);
+ });
+ }
+
+ @ParameterizedTest(name = DISPLAY_NAME_WITH_ARGUMENTS)
+ @MethodSource("com.azure.ai.openai.TestUtils#getTestParameters")
+ public void testGetAudioTranscriptionSrt(HttpClient httpClient, OpenAIServiceVersion serviceVersion) {
+ client = getOpenAIClient(httpClient, serviceVersion);
+
+ getAudioTranscriptionRunner((deploymentName, fileName) -> {
+ byte[] file = BinaryData.fromFile(openTestResourceFile(fileName)).toBytes();
+ AudioTranscriptionOptions transcriptionOptions = new AudioTranscriptionOptions(file);
+ transcriptionOptions.setResponseFormat(AudioTranscriptionFormat.SRT);
+
+ String transcription = client.getAudioTranscriptionText(deploymentName, fileName, transcriptionOptions);
+ // Contains at least one sequence
+ assertTrue(transcription.contains("1\n"));
+ // First sequence starts at timestamp 0
+ assertTrue(transcription.contains("00:00:00,000 --> "));
+ // Contains at least one expected word
+ assertTrue(transcription.contains("Batman"));
+ });
+ }
+
+ @ParameterizedTest(name = DISPLAY_NAME_WITH_ARGUMENTS)
+ @MethodSource("com.azure.ai.openai.TestUtils#getTestParameters")
+ public void testGetAudioTranscriptionVtt(HttpClient httpClient, OpenAIServiceVersion serviceVersion) {
+ client = getOpenAIClient(httpClient, serviceVersion);
+
+ getAudioTranscriptionRunner((deploymentName, fileName) -> {
+ byte[] file = BinaryData.fromFile(openTestResourceFile(fileName)).toBytes();
+ AudioTranscriptionOptions transcriptionOptions = new AudioTranscriptionOptions(file);
+ transcriptionOptions.setResponseFormat(AudioTranscriptionFormat.VTT);
+
+ String transcription = client.getAudioTranscriptionText(deploymentName, fileName, transcriptionOptions);
+ // Start value according to spec
+ assertTrue(transcription.startsWith("WEBVTT\n"));
+ // First sequence starts at timestamp 0. Note: unlike SRT, the millisecond separator is a "."
+ assertTrue(transcription.contains("00:00:00.000 --> "));
+ // Contains at least one expected word in the transcription
+ assertTrue(transcription.contains("Batman"));
+ });
+ }
+
+ @ParameterizedTest(name = DISPLAY_NAME_WITH_ARGUMENTS)
+ @MethodSource("com.azure.ai.openai.TestUtils#getTestParameters")
+ public void testGetAudioTranscriptionTextWrongFormats(HttpClient httpClient, OpenAIServiceVersion serviceVersion) {
+ client = getOpenAIClient(httpClient, serviceVersion);
+ List wrongFormats = Arrays.asList(
+ AudioTranscriptionFormat.JSON,
+ AudioTranscriptionFormat.VERBOSE_JSON
+ );
+
+ getAudioTranscriptionRunner((deploymentName, fileName) -> {
+ byte[] file = BinaryData.fromFile(openTestResourceFile(fileName)).toBytes();
+ AudioTranscriptionOptions audioTranscriptionOptions = new AudioTranscriptionOptions(file);
+
+ for (AudioTranscriptionFormat format: wrongFormats) {
+ audioTranscriptionOptions.setResponseFormat(format);
+ assertThrows(IllegalArgumentException.class, () ->
+ client.getAudioTranscriptionText(deploymentName, fileName, audioTranscriptionOptions));
+ }
+ });
+ }
+
+ @ParameterizedTest(name = DISPLAY_NAME_WITH_ARGUMENTS)
+ @MethodSource("com.azure.ai.openai.TestUtils#getTestParameters")
+ public void testGetAudioTranscriptionJsonWrongFormats(HttpClient httpClient, OpenAIServiceVersion serviceVersion) {
+ client = getOpenAIClient(httpClient, serviceVersion);
+ List wrongFormats = Arrays.asList(
+ AudioTranscriptionFormat.TEXT,
+ AudioTranscriptionFormat.SRT,
+ AudioTranscriptionFormat.VTT
+ );
+
+ getAudioTranscriptionRunner((deploymentName, fileName) -> {
+ byte[] file = BinaryData.fromFile(openTestResourceFile(fileName)).toBytes();
+ AudioTranscriptionOptions audioTranscriptionOptions = new AudioTranscriptionOptions(file);
+
+ for (AudioTranscriptionFormat format: wrongFormats) {
+ audioTranscriptionOptions.setResponseFormat(format);
+ assertThrows(IllegalArgumentException.class, () ->
+ client.getAudioTranscription(deploymentName, fileName, audioTranscriptionOptions));
+ }
+ });
+ }
+
+ @ParameterizedTest(name = DISPLAY_NAME_WITH_ARGUMENTS)
+ @MethodSource("com.azure.ai.openai.TestUtils#getTestParameters")
+ public void testGetAudioTranslationJson(HttpClient httpClient, OpenAIServiceVersion serviceVersion) {
+ client = getOpenAIClient(httpClient, serviceVersion);
+
+ getAudioTranslationRunner((deploymentName, fileName) -> {
+ byte[] file = BinaryData.fromFile(openTestResourceFile(fileName)).toBytes();
+ AudioTranslationOptions translationOptions = new AudioTranslationOptions(file);
+ translationOptions.setResponseFormat(AudioTranscriptionFormat.JSON);
+
+ AudioTranscription translation = client.getAudioTranslation(deploymentName, fileName, translationOptions);
+ assertAudioTranscriptionSimpleJson(translation, "It's raining today.");
+ });
+ }
+
+ @ParameterizedTest(name = DISPLAY_NAME_WITH_ARGUMENTS)
+ @MethodSource("com.azure.ai.openai.TestUtils#getTestParameters")
+ public void testGetAudioTranslationVerboseJson(HttpClient httpClient, OpenAIServiceVersion serviceVersion) {
+ client = getOpenAIClient(httpClient, serviceVersion);
+
+ getAudioTranslationRunner((deploymentName, fileName) -> {
+ byte[] file = BinaryData.fromFile(openTestResourceFile(fileName)).toBytes();
+ AudioTranslationOptions translationOptions = new AudioTranslationOptions(file);
+ translationOptions.setResponseFormat(AudioTranscriptionFormat.VERBOSE_JSON);
+
+ AudioTranscription translation = client.getAudioTranslation(deploymentName, fileName, translationOptions);
+ assertAudioTranscriptionVerboseJson(translation, "It's raining today.", AudioTaskLabel.TRANSLATE);
+ });
+ }
+
+ @ParameterizedTest(name = DISPLAY_NAME_WITH_ARGUMENTS)
+ @MethodSource("com.azure.ai.openai.TestUtils#getTestParameters")
+ public void testGetAudioTranslationTextPlain(HttpClient httpClient, OpenAIServiceVersion serviceVersion) {
+ client = getOpenAIClient(httpClient, serviceVersion);
+
+ getAudioTranslationRunner((deploymentName, fileName) -> {
+ byte[] file = BinaryData.fromFile(openTestResourceFile(fileName)).toBytes();
+ AudioTranslationOptions translationOptions = new AudioTranslationOptions(file);
+ translationOptions.setResponseFormat(AudioTranscriptionFormat.TEXT);
+
+ String transcription = client.getAudioTranslationText(deploymentName, fileName, translationOptions);
+ assertEquals("It's raining today.\n", transcription);
+ });
+ }
+
+ @ParameterizedTest(name = DISPLAY_NAME_WITH_ARGUMENTS)
+ @MethodSource("com.azure.ai.openai.TestUtils#getTestParameters")
+ public void testGetAudioTranslationSrt(HttpClient httpClient, OpenAIServiceVersion serviceVersion) {
+ client = getOpenAIClient(httpClient, serviceVersion);
+
+ getAudioTranslationRunner((deploymentName, fileName) -> {
+ byte[] file = BinaryData.fromFile(openTestResourceFile(fileName)).toBytes();
+ AudioTranslationOptions translationOptions = new AudioTranslationOptions(file);
+ translationOptions.setResponseFormat(AudioTranscriptionFormat.SRT);
+
+ String transcription = client.getAudioTranslationText(deploymentName, fileName, translationOptions);
+ // Sequence number
+ assertTrue(transcription.contains("1\n"));
+ // First sequence starts at timestamp 0
+ assertTrue(transcription.contains("00:00:00,000 --> "));
+ // Actual translation value
+ assertTrue(transcription.contains("It's raining today."));
+ });
+ }
+
+ @ParameterizedTest(name = DISPLAY_NAME_WITH_ARGUMENTS)
+ @MethodSource("com.azure.ai.openai.TestUtils#getTestParameters")
+ public void testGetAudioTranslationVtt(HttpClient httpClient, OpenAIServiceVersion serviceVersion) {
+ client = getOpenAIClient(httpClient, serviceVersion);
+
+ getAudioTranslationRunner((deploymentName, fileName) -> {
+ byte[] file = BinaryData.fromFile(openTestResourceFile(fileName)).toBytes();
+ AudioTranslationOptions translationOptions = new AudioTranslationOptions(file);
+ translationOptions.setResponseFormat(AudioTranscriptionFormat.VTT);
+
+ String transcription = client.getAudioTranslationText(deploymentName, fileName, translationOptions);
+ // Start value according to spec
+ assertTrue(transcription.startsWith("WEBVTT\n"));
+ // First sequence starts at timestamp 0. Note: unlike SRT, the millisecond separator is a "."
+ assertTrue(transcription.contains("00:00:00.000 --> "));
+ // Actual translation value
+ assertTrue(transcription.contains("It's raining today."));
+ });
+ }
+
+ @ParameterizedTest(name = DISPLAY_NAME_WITH_ARGUMENTS)
+ @MethodSource("com.azure.ai.openai.TestUtils#getTestParameters")
+ public void testGetAudioTranslationTextWrongFormats(HttpClient httpClient, OpenAIServiceVersion serviceVersion) {
+ client = getOpenAIClient(httpClient, serviceVersion);
+ List wrongFormats = Arrays.asList(
+ AudioTranscriptionFormat.JSON,
+ AudioTranscriptionFormat.VERBOSE_JSON
+ );
+
+ getAudioTranslationRunner((deploymentName, fileName) -> {
+ byte[] file = BinaryData.fromFile(openTestResourceFile(fileName)).toBytes();
+ AudioTranslationOptions translationOptions = new AudioTranslationOptions(file);
+
+ for (AudioTranscriptionFormat format: wrongFormats) {
+ translationOptions.setResponseFormat(format);
+ assertThrows(IllegalArgumentException.class, () -> {
+ client.getAudioTranslationText(deploymentName, fileName, translationOptions);
+ });
+ }
+ });
+ }
+
+ @ParameterizedTest(name = DISPLAY_NAME_WITH_ARGUMENTS)
+ @MethodSource("com.azure.ai.openai.TestUtils#getTestParameters")
+ public void testGetAudioTranslationJsonWrongFormats(HttpClient httpClient, OpenAIServiceVersion serviceVersion) {
+ client = getOpenAIClient(httpClient, serviceVersion);
+ List wrongFormats = Arrays.asList(
+ AudioTranscriptionFormat.TEXT,
+ AudioTranscriptionFormat.SRT,
+ AudioTranscriptionFormat.VTT
+ );
+
+ getAudioTranslationRunner((deploymentName, fileName) -> {
+ byte[] file = BinaryData.fromFile(openTestResourceFile(fileName)).toBytes();
+ AudioTranslationOptions translationOptions = new AudioTranslationOptions(file);
+
+ for (AudioTranscriptionFormat format: wrongFormats) {
+ translationOptions.setResponseFormat(format);
+ assertThrows(IllegalArgumentException.class, () -> {
+ client.getAudioTranslation(deploymentName, fileName, translationOptions);
+ });
+ }
+ });
+ }
}
diff --git a/sdk/openai/azure-ai-openai/src/test/java/com/azure/ai/openai/implementation/MultipartDataHelperTest.java b/sdk/openai/azure-ai-openai/src/test/java/com/azure/ai/openai/implementation/MultipartDataHelperTest.java
new file mode 100644
index 000000000000..be4fdadbe97e
--- /dev/null
+++ b/sdk/openai/azure-ai-openai/src/test/java/com/azure/ai/openai/implementation/MultipartDataHelperTest.java
@@ -0,0 +1,132 @@
+// Copyright (c) Microsoft Corporation. All rights reserved.
+// Licensed under the MIT License.
+
+package com.azure.ai.openai.implementation;
+
+import com.azure.ai.openai.models.AudioTranscriptionFormat;
+import com.azure.ai.openai.models.AudioTranscriptionOptions;
+import com.azure.ai.openai.models.AudioTranslationOptions;
+import com.azure.ai.openai.models.EmbeddingsOptions;
+import org.junit.jupiter.api.Test;
+
+import java.nio.charset.StandardCharsets;
+import java.util.ArrayList;
+
+import static org.junit.jupiter.api.Assertions.assertEquals;
+import static org.junit.jupiter.api.Assertions.assertThrows;
+
+/**
+ * Unit tests for {@link MultipartDataHelper}
+ */
+public class MultipartDataHelperTest {
+
+ private static final String TEST_BOUNDARY = "test-boundary";
+
+ @Test
+ public void serializeAudioTranslationOptionsAllFields() {
+ MultipartDataHelper helper = new MultipartDataHelper(TEST_BOUNDARY);
+ byte[] file = new byte[] {73, 32, 115, 104, 111, 117, 108, 100, 32, 104, 97, 118, 101, 32, 116, 104, 111, 117,
+ 103, 104, 116, 32, 111, 102, 32, 97, 32, 103, 111, 111, 100, 32, 101, 97, 115, 116, 101, 114, 32, 101,
+ 103, 103};
+ String fileName = "file_name.wav";
+ AudioTranslationOptions translationOptions = new AudioTranslationOptions(file);
+ translationOptions.setModel("model_name")
+ .setPrompt("prompt text")
+ .setResponseFormat(AudioTranscriptionFormat.TEXT)
+ .setTemperature(0.1);
+ MultipartDataSerializationResult actual = helper.serializeRequest(translationOptions, fileName);
+
+ String expected = multipartFileSegment(fileName, file)
+ + fieldFormData("response_format", "text")
+ + fieldFormData("model", "model_name")
+ + fieldFormData("prompt", "prompt text")
+ + fieldFormData("temperature", "0.1")
+ + closingMarker();
+
+ assertEquals(expected, actual.getData().toString());
+ assertEquals(expected.getBytes(StandardCharsets.US_ASCII).length, actual.getDataLength());
+ }
+
+ @Test
+ public void serializeAudioTranscriptionOptionsAllFields() {
+ MultipartDataHelper helper = new MultipartDataHelper(TEST_BOUNDARY);
+ byte[] file = new byte[] {73, 32, 115, 104, 111, 117, 108, 100, 32, 104, 97, 118, 101, 32, 116, 104, 111, 117,
+ 103, 104, 116, 32, 111, 102, 32, 97, 32, 103, 111, 111, 100, 32, 101, 97, 115, 116, 101, 114, 32, 101,
+ 103, 103};
+ String fileName = "file_name.wav";
+ AudioTranscriptionOptions transcriptionOptions = new AudioTranscriptionOptions(file);
+ transcriptionOptions.setModel("model_name")
+ .setPrompt("prompt text")
+ .setResponseFormat(AudioTranscriptionFormat.TEXT)
+ .setLanguage("en")
+ .setTemperature(0.1);
+ MultipartDataSerializationResult actual = helper.serializeRequest(transcriptionOptions, fileName);
+
+ String expected = multipartFileSegment(fileName, file)
+ + fieldFormData("response_format", "text")
+ + fieldFormData("model", "model_name")
+ + fieldFormData("prompt", "prompt text")
+ + fieldFormData("temperature", "0.1")
+ + fieldFormData("language", "en")
+ + closingMarker();
+
+ assertEquals(expected, actual.getData().toString());
+ assertEquals(expected.getBytes(StandardCharsets.US_ASCII).length, actual.getDataLength());
+ }
+
+ @Test
+ public void serializeAudioTranslationOptionsNoFields() {
+ MultipartDataHelper helper = new MultipartDataHelper(TEST_BOUNDARY);
+ byte[] file = new byte[] {};
+ String fileName = "file_name.wav";
+ AudioTranslationOptions translationOptions = new AudioTranslationOptions(file);
+ MultipartDataSerializationResult actual = helper.serializeRequest(translationOptions, fileName);
+
+ String expected = multipartFileSegment(fileName, file)
+ + closingMarker();
+
+ assertEquals(expected, actual.getData().toString());
+ assertEquals(expected.getBytes(StandardCharsets.US_ASCII).length, actual.getDataLength());
+ }
+
+ @Test
+ public void serializeAudioTranscriptionOptionsNoFields() {
+ MultipartDataHelper helper = new MultipartDataHelper(TEST_BOUNDARY);
+ byte[] file = new byte[] {};
+ String fileName = "file_name.wav";
+ AudioTranscriptionOptions transcriptionOptions = new AudioTranscriptionOptions(file);
+ MultipartDataSerializationResult actual = helper.serializeRequest(transcriptionOptions, fileName);
+
+ String expected = multipartFileSegment(fileName, file)
+ + closingMarker();
+
+ assertEquals(expected, actual.getData().toString());
+ assertEquals(expected.getBytes(StandardCharsets.US_ASCII).length, actual.getDataLength());
+ }
+
+ @Test
+ public void serializeUnsupportedType() {
+ assertThrows(IllegalArgumentException.class, () -> {
+ MultipartDataHelper helper = new MultipartDataHelper(TEST_BOUNDARY);
+ EmbeddingsOptions embeddingsOptions = new EmbeddingsOptions(new ArrayList<>());
+ helper.serializeRequest(embeddingsOptions, "path/to/file");
+ });
+ }
+
+ private static String fieldFormData(String fieldName, String fieldValue) {
+ return "\r\n--test-boundary"
+ + "\r\nContent-Disposition: form-data; name=\"" + fieldName + "\"\r\n\r\n"
+ + fieldValue;
+ }
+
+ private static String multipartFileSegment(String fileName, byte[] fileBytes) {
+ return "--test-boundary\r\n"
+ + "Content-Disposition: form-data; name=\"file\"; filename=\"" + fileName + "\"\r\n"
+ + "Content-Type: application/octet-stream\r\n\r\n"
+ + new String(fileBytes, StandardCharsets.US_ASCII);
+ }
+
+ private static String closingMarker() {
+ return "\r\n--test-boundary--";
+ }
+}
diff --git a/sdk/openai/azure-ai-openai/src/test/resources/JP_it_is_rainy_today.wav b/sdk/openai/azure-ai-openai/src/test/resources/JP_it_is_rainy_today.wav
new file mode 100644
index 000000000000..5970c85ec1cd
Binary files /dev/null and b/sdk/openai/azure-ai-openai/src/test/resources/JP_it_is_rainy_today.wav differ
diff --git a/sdk/openai/azure-ai-openai/src/test/resources/batman.wav b/sdk/openai/azure-ai-openai/src/test/resources/batman.wav
new file mode 100644
index 000000000000..4c0b7248a39c
Binary files /dev/null and b/sdk/openai/azure-ai-openai/src/test/resources/batman.wav differ
diff --git a/sdk/openai/azure-ai-openai/tsp-location.yaml b/sdk/openai/azure-ai-openai/tsp-location.yaml
index 368074679599..bc4052dd97e4 100644
--- a/sdk/openai/azure-ai-openai/tsp-location.yaml
+++ b/sdk/openai/azure-ai-openai/tsp-location.yaml
@@ -1,5 +1,5 @@
directory: specification/cognitiveservices/OpenAI.Inference
additionalDirectories:
- specification/cognitiveservices/OpenAI.Authoring
-commit: b646a42aa3b7a0ce488d05f1724827ea41d12cf1
+commit: dd2d1e8957ac6654272137e8d5874eacafd80a5f
repo: Azure/azure-rest-api-specs