Azure · mssfang · Mar 19, 2024 · Mar 15, 2024 · Mar 15, 2024
@@ -4,6 +4,9 @@
 
 ### Features Added
 
+- Added a new overload `getChatCompletionsStreamWithResponse` that takes `RequestOptions` to provide the flexibility to
+  modify the HTTP request.
+
 ### Breaking Changes
 
 ### Bugs Fixed

@@ -234,24 +234,20 @@ chatMessages.add(new ChatRequestUserMessage("Can you help me?"));
 chatMessages.add(new ChatRequestAssistantMessage("Of course, me hearty! What can I do for ye?"));
 chatMessages.add(new ChatRequestUserMessage("What's the best way to train a parrot?"));
 
-IterableStream<ChatCompletions> chatCompletionsStream = client.getChatCompletionsStream("{deploymentOrModelName}",
-    new ChatCompletionsOptions(chatMessages));
-
-chatCompletionsStream
-    .stream()
-    // Remove .skip(1) when using Non-Azure OpenAI API
-    // Note: the first chat completions can be ignored when using Azure OpenAI service which is a known service bug.
-    // TODO: remove .skip(1) when service fix the issue.
-    .skip(1)
-    .forEach(chatCompletions -> {
-        ChatResponseMessage delta = chatCompletions.getChoices().get(0).getDelta();
-        if (delta.getRole() != null) {
-            System.out.println("Role = " + delta.getRole());
-        }
-        if (delta.getContent() != null) {
-            System.out.print(delta.getContent());
-        }
-    });
+client.getChatCompletionsStream("{deploymentOrModelName}", new ChatCompletionsOptions(chatMessages))
+        .forEach(chatCompletions -> {
+            if (CoreUtils.isNullOrEmpty(chatCompletions.getChoices())) {
+                return;
+            }
+            ChatResponseMessage delta = chatCompletions.getChoices().get(0).getDelta();
+            if (delta.getRole() != null) {
+                System.out.println("Role = " + delta.getRole());
+            }
+            if (delta.getContent() != null) {
+                String content = delta.getContent();
+                System.out.print(content);
+            }
+        });
 ```
 
 To compute tokens in streaming chat completions, see sample [Streaming Chat Completions][sample_get_chat_completions_streaming].

@@ -2,5 +2,5 @@
   "AssetsRepo": "Azure/azure-sdk-assets",
   "AssetsRepoPrefixPath": "java",
   "TagPrefix": "java/openai/azure-ai-openai",
-  "Tag": "java/openai/azure-ai-openai_915389e465"
+  "Tag": "java/openai/azure-ai-openai_76031b0cb0"
 }
@@ -46,6 +46,7 @@
 import com.azure.core.util.logging.ClientLogger;
 import com.fasterxml.jackson.core.JsonProcessingException;
 import java.nio.ByteBuffer;
+import java.util.concurrent.atomic.AtomicReference;
 import reactor.core.publisher.Flux;
 import reactor.core.publisher.Mono;
 
@@ -556,6 +557,57 @@ public Mono<Response<ChatCompletions>> getChatCompletionsWithResponse(String dep
             .map(response -> new SimpleResponse<>(response, response.getValue().toObject(ChatCompletions.class)));
     }
 
+    /**
+     * Gets chat completions for the provided chat messages. Chat completions support a wide variety of tasks and
+     * generate text that continues from or "completes" provided prompt data.
+     *
+     * <p>
+     * <strong>Code Samples</strong>
+     * </p>
+     * <!-- src_embed
+     * com.azure.ai.openai.OpenAIAsyncClient.getChatCompletionsStream#String-ChatCompletionsOptionsMaxOverload -->
+     * <pre>
+     * openAIAsyncClient.getChatCompletionsStreamWithResponse&#40;deploymentOrModelId, new ChatCompletionsOptions&#40;chatMessages&#41;,
+     *                 new RequestOptions&#40;&#41;.setHeader&#40;&quot;my-header&quot;, &quot;my-header-value&quot;&#41;&#41;
+     *         .subscribe&#40;
+     *                 response -&gt; System.out.print&#40;response.getValue&#40;&#41;.getId&#40;&#41;&#41;,
+     *                 error -&gt; System.err.println&#40;&quot;There was an error getting chat completions.&quot; + error&#41;,
+     *                 &#40;&#41; -&gt; System.out.println&#40;&quot;Completed called getChatCompletionsStreamWithResponse.&quot;&#41;&#41;;
+     * </pre>
+     * <!-- end com.azure.ai.openai.OpenAIAsyncClient.getChatCompletionsStream#String-ChatCompletionsOptionsMaxOverload
+     * -->
+     *
+     * @param deploymentOrModelName Specifies either the model deployment name (when using Azure OpenAI) or model name
+     * (when using non-Azure OpenAI) to use for this request.
+     * @param chatCompletionsOptions The configuration information for a chat completions request. Completions support a
+     * wide variety of tasks and generate text that continues from or "completes" provided prompt data.
+     * @param requestOptions The options to configure the HTTP request before HTTP client sends it.
+     * @throws IllegalArgumentException thrown if parameters fail the validation.
+     * @throws HttpResponseException thrown if the request is rejected by server.
+     * @throws ClientAuthenticationException thrown if the request is rejected by server on status code 401.
+     * @throws ResourceNotFoundException thrown if the request is rejected by server on status code 404.
+     * @throws ResourceModifiedException thrown if the request is rejected by server on status code 409.
+     * @throws RuntimeException all other wrapped checked exceptions if the request fails to be sent.
+     * @return chat completions stream for the provided chat messages. Completions support a wide variety of tasks and
+     * generate text that continues from or "completes" provided prompt data.
+     */
+    @ServiceMethod(returns = ReturnType.COLLECTION)
+    public Flux<Response<ChatCompletions>> getChatCompletionsStreamWithResponse(String deploymentOrModelName,
+        ChatCompletionsOptions chatCompletionsOptions, RequestOptions requestOptions) {
+        chatCompletionsOptions.setStream(true);
+        Mono<Response<BinaryData>> chatCompletionsWithResponse = getChatCompletionsWithResponse(deploymentOrModelName,
+            BinaryData.fromObject(chatCompletionsOptions), requestOptions);
+        AtomicReference<Response<BinaryData>> responseCopy = new AtomicReference<>();
+        Flux<ByteBuffer> responseStream = chatCompletionsWithResponse.flatMapMany(response -> {
+            responseCopy.set(response);
+            return response.getValue().toFluxByteBuffer();
+        });
+        OpenAIServerSentEvents<ChatCompletions> chatCompletionsStream
+            = new OpenAIServerSentEvents<>(responseStream, ChatCompletions.class);
+        return chatCompletionsStream.getEvents()
+            .map(chatCompletions -> new SimpleResponse<>(responseCopy.get(), chatCompletions));
+    }
+
     /**
      * Return the embeddings for a given prompt.
      *
@@ -646,21 +698,10 @@ public Mono<Completions> getCompletions(String deploymentOrModelName, String pro
      * <pre>
      * openAIAsyncClient
      *         .getChatCompletionsStream&#40;deploymentOrModelId, new ChatCompletionsOptions&#40;chatMessages&#41;&#41;
-     *         .toStream&#40;&#41;
-     *         &#47;&#47; Remove .skip&#40;1&#41; when using Non-Azure OpenAI API
-     *         &#47;&#47; Note: the first chat completions can be ignored when using Azure OpenAI service which is a known service bug.
-     *         &#47;&#47; TODO: remove .skip&#40;1&#41; after service fixes the issue.
-     *         .skip&#40;1&#41;
-     *         .forEach&#40;chatCompletions -&gt; &#123;
-     *             ChatResponseMessage delta = chatCompletions.getChoices&#40;&#41;.get&#40;0&#41;.getDelta&#40;&#41;;
-     *             if &#40;delta.getRole&#40;&#41; != null&#41; &#123;
-     *                 System.out.println&#40;&quot;Role = &quot; + delta.getRole&#40;&#41;&#41;;
-     *             &#125;
-     *             if &#40;delta.getContent&#40;&#41; != null&#41; &#123;
-     *                 String content = delta.getContent&#40;&#41;;
-     *                 System.out.print&#40;content&#41;;
-     *             &#125;
-     *         &#125;&#41;;
+     *         .subscribe&#40;
+     *                 chatCompletions -&gt; System.out.print&#40;chatCompletions.getId&#40;&#41;&#41;,
+     *                 error -&gt; System.err.println&#40;&quot;There was an error getting chat completions.&quot; + error&#41;,
+     *                 &#40;&#41; -&gt; System.out.println&#40;&quot;Completed called getChatCompletionsStream.&quot;&#41;&#41;;
      * </pre>
      * <!-- end com.azure.ai.openai.OpenAIAsyncClient.getChatCompletionsStream#String-ChatCompletionsOptions -->
      *

@@ -689,12 +689,10 @@ public ChatCompletions getChatCompletions(String deploymentOrModelName,
      * <!-- src_embed com.azure.ai.openai.OpenAIClient.getChatCompletionsStream#String-ChatCompletionsOptions -->
      * <pre>
      * openAIClient.getChatCompletionsStream&#40;deploymentOrModelId, new ChatCompletionsOptions&#40;chatMessages&#41;&#41;
-     *         .stream&#40;&#41;
-     *         &#47;&#47; Remove .skip&#40;1&#41; when using Non-Azure OpenAI API
-     *         &#47;&#47; Note: the first chat completions can be ignored when using Azure OpenAI service which is a known service bug.
-     *         &#47;&#47; TODO: remove .skip&#40;1&#41; after service fixes the issue.
-     *         .skip&#40;1&#41;
      *         .forEach&#40;chatCompletions -&gt; &#123;
+     *             if &#40;CoreUtils.isNullOrEmpty&#40;chatCompletions.getChoices&#40;&#41;&#41;&#41; &#123;
+     *                 return;
+     *             &#125;
      *             ChatResponseMessage delta = chatCompletions.getChoices&#40;&#41;.get&#40;0&#41;.getDelta&#40;&#41;;
      *             if &#40;delta.getRole&#40;&#41; != null&#41; &#123;
      *                 System.out.println&#40;&quot;Role = &quot; + delta.getRole&#40;&#41;&#41;;
@@ -732,6 +730,60 @@ public IterableStream<ChatCompletions> getChatCompletionsStream(String deploymen
         return new IterableStream<>(chatCompletionsStream.getEvents());
     }
 
+    /**
+     * Gets chat completions for the provided chat messages in streaming mode. Chat completions support a wide variety
+     * of tasks and generate text that continues from or "completes" provided prompt data.
+     * <p>
+     * <strong>Code Samples</strong>
+     * </p>
+     * <!-- src_embed com.azure.ai.openai.OpenAIClient.getChatCompletionsStream#String-ChatCompletionsOptionsMaxOverload
+     * -->
+     * <pre>
+     * openAIClient.getChatCompletionsStreamWithResponse&#40;deploymentOrModelId, new ChatCompletionsOptions&#40;chatMessages&#41;,
+     *                 new RequestOptions&#40;&#41;.setHeader&#40;&quot;my-header&quot;, &quot;my-header-value&quot;&#41;&#41;
+     *         .getValue&#40;&#41;
+     *         .forEach&#40;chatCompletions -&gt; &#123;
+     *             if &#40;CoreUtils.isNullOrEmpty&#40;chatCompletions.getChoices&#40;&#41;&#41;&#41; &#123;
+     *                 return;
+     *             &#125;
+     *             ChatResponseMessage delta = chatCompletions.getChoices&#40;&#41;.get&#40;0&#41;.getDelta&#40;&#41;;
+     *             if &#40;delta.getRole&#40;&#41; != null&#41; &#123;
+     *                 System.out.println&#40;&quot;Role = &quot; + delta.getRole&#40;&#41;&#41;;
+     *             &#125;
+     *             if &#40;delta.getContent&#40;&#41; != null&#41; &#123;
+     *                 String content = delta.getContent&#40;&#41;;
+     *                 System.out.print&#40;content&#41;;
+     *             &#125;
+     *         &#125;&#41;;
+     * </pre>
+     * <!-- end com.azure.ai.openai.OpenAIClient.getChatCompletionsStream#String-ChatCompletionsOptionsMaxOverload -->
+     *
+     * @param deploymentOrModelName Specifies either the model deployment name (when using Azure OpenAI) or model name
+     * (when using non-Azure OpenAI) to use for this request.
+     * @param chatCompletionsOptions The configuration information for a chat completions request. Completions support a
+     * wide variety of tasks and generate text that continues from or "completes" provided prompt data.
+     * @param requestOptions The options to configure the HTTP request before HTTP client sends it.
+     * @throws IllegalArgumentException thrown if parameters fail the validation.
+     * @throws HttpResponseException thrown if the request is rejected by server.
+     * @throws ClientAuthenticationException thrown if the request is rejected by server on status code 401.
+     * @throws ResourceNotFoundException thrown if the request is rejected by server on status code 404.
+     * @throws ResourceModifiedException thrown if the request is rejected by server on status code 409.
+     * @throws RuntimeException all other wrapped checked exceptions if the request fails to be sent.
+     * @return chat completions stream for the provided chat messages. Completions support a wide variety of tasks and
+     * generate text that continues from or "completes" provided prompt data.
+     */
+    @ServiceMethod(returns = ReturnType.COLLECTION)
+    public Response<IterableStream<ChatCompletions>> getChatCompletionsStreamWithResponse(String deploymentOrModelName,
+        ChatCompletionsOptions chatCompletionsOptions, RequestOptions requestOptions) {
+        chatCompletionsOptions.setStream(true);
+        Response<BinaryData> response = getChatCompletionsWithResponse(deploymentOrModelName,
+            BinaryData.fromObject(chatCompletionsOptions), requestOptions);
+        Flux<ByteBuffer> responseStream = response.getValue().toFluxByteBuffer();
+        OpenAIServerSentEvents<ChatCompletions> chatCompletionsStream
+            = new OpenAIServerSentEvents<>(responseStream, ChatCompletions.class);
+        return new SimpleResponse<>(response, new IterableStream<>(chatCompletionsStream.getEvents()));
+    }
+
     /**
      * Gets transcribed text and associated metadata from provided spoken audio file data. Audio will be transcribed in
      * the written language corresponding to the language it was spoken in.

@@ -12,6 +12,7 @@
 import com.azure.ai.openai.models.ChatResponseMessage;
 import com.azure.core.credential.AzureKeyCredential;
 import com.azure.core.util.Configuration;
+import com.azure.core.util.CoreUtils;
 import com.azure.core.util.IterableStream;
 import com.knuddels.jtokkit.Encodings;
 import com.knuddels.jtokkit.api.Encoding;
@@ -73,11 +74,11 @@ public static void main(String[] args) {
         //  }
         chatCompletionsStream
                 .stream()
-                // Remove .skip(1) when using Non-Azure OpenAI API
-                // Note: the first chat completions can be ignored when using Azure OpenAI service which is a known service bug.
-                // TODO: remove .skip(1) after service fixes the issue.
-                .skip(1)
                 .forEach(chatCompletions -> {
+                    if (CoreUtils.isNullOrEmpty(chatCompletions.getChoices())) {
+                        return;
+                    }
+
                     ChatResponseMessage delta = chatCompletions.getChoices().get(0).getDelta();
 
                     if (delta.getRole() != null) {

@@ -11,13 +11,14 @@
 import com.azure.ai.openai.models.ChatRequestMessage;
 import com.azure.ai.openai.models.ChatRequestSystemMessage;
 import com.azure.ai.openai.models.ChatRequestUserMessage;
-import com.azure.ai.openai.models.ChatResponseMessage;
 import com.azure.core.credential.AzureKeyCredential;
+import com.azure.core.http.rest.RequestOptions;
 import com.azure.core.util.Configuration;
 import org.junit.jupiter.api.Test;
 
 import java.util.ArrayList;
 import java.util.List;
+import java.util.concurrent.TimeUnit;
 
 /**
  * Code snippets for {@link OpenAIAsyncClient}
@@ -29,32 +30,35 @@ public class OpenAIAsyncClientJavaDocCodeSnippets {
      * Code snippets for {@link OpenAIClient#getChatCompletionsStream(String, ChatCompletionsOptions)}
      */
     @Test
-    public void getChatCompletionsStream() {
-        String deploymentOrModelId = "gpt-4-1106-preview";
+    public void getChatCompletionsStream() throws InterruptedException {
+        String deploymentOrModelId = Configuration.getGlobalConfiguration().get("OPENAI_DEPLOYMENT_OR_MODEL_ID");
         List<ChatRequestMessage> chatMessages = new ArrayList<>();
         chatMessages.add(new ChatRequestSystemMessage("You are a helpful assistant. You will talk like a pirate."));
         chatMessages.add(new ChatRequestUserMessage("Can you help me?"));
         chatMessages.add(new ChatRequestAssistantMessage("Of course, me hearty! What can I do for ye?"));
         chatMessages.add(new ChatRequestUserMessage("What's the best way to train a parrot?"));
+
         // BEGIN: com.azure.ai.openai.OpenAIAsyncClient.getChatCompletionsStream#String-ChatCompletionsOptions
         openAIAsyncClient
                 .getChatCompletionsStream(deploymentOrModelId, new ChatCompletionsOptions(chatMessages))
-                .toStream()
-                // Remove .skip(1) when using Non-Azure OpenAI API
-                // Note: the first chat completions can be ignored when using Azure OpenAI service which is a known service bug.
-                // TODO: remove .skip(1) after service fixes the issue.
-                .skip(1)
-                .forEach(chatCompletions -> {
-                    ChatResponseMessage delta = chatCompletions.getChoices().get(0).getDelta();
-                    if (delta.getRole() != null) {
-                        System.out.println("Role = " + delta.getRole());
-                    }
-                    if (delta.getContent() != null) {
-                        String content = delta.getContent();
-                        System.out.print(content);
-                    }
-                });
+                .subscribe(
+                        chatCompletions -> System.out.print(chatCompletions.getId()),
+                        error -> System.err.println("There was an error getting chat completions." + error),
+                        () -> System.out.println("Completed called getChatCompletionsStream."));
         // END: com.azure.ai.openai.OpenAIAsyncClient.getChatCompletionsStream#String-ChatCompletionsOptions
+
+        // With Response Code Snippet
+
+        // BEGIN: com.azure.ai.openai.OpenAIAsyncClient.getChatCompletionsStream#String-ChatCompletionsOptionsMaxOverload
+        openAIAsyncClient.getChatCompletionsStreamWithResponse(deploymentOrModelId, new ChatCompletionsOptions(chatMessages),
+                        new RequestOptions().setHeader("my-header", "my-header-value"))
+                .subscribe(
+                        response -> System.out.print(response.getValue().getId()),
+                        error -> System.err.println("There was an error getting chat completions." + error),
+                        () -> System.out.println("Completed called getChatCompletionsStreamWithResponse."));
+        // END: com.azure.ai.openai.OpenAIAsyncClient.getChatCompletionsStream#String-ChatCompletionsOptionsMaxOverload
+
+        TimeUnit.SECONDS.sleep(10);
     }
 
     private OpenAIAsyncClient getOpenAIAsyncClient() {