Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions sdk/openai/azure-ai-openai/CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,9 @@

### Features Added

- Added a new overload `getChatCompletionsStreamWithResponse` that takes `RequestOptions` to provide the flexibility to
modify the HTTP request.

### Breaking Changes

### Bugs Fixed
Expand Down
32 changes: 14 additions & 18 deletions sdk/openai/azure-ai-openai/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -234,24 +234,20 @@ chatMessages.add(new ChatRequestUserMessage("Can you help me?"));
chatMessages.add(new ChatRequestAssistantMessage("Of course, me hearty! What can I do for ye?"));
chatMessages.add(new ChatRequestUserMessage("What's the best way to train a parrot?"));

IterableStream<ChatCompletions> chatCompletionsStream = client.getChatCompletionsStream("{deploymentOrModelName}",
new ChatCompletionsOptions(chatMessages));

chatCompletionsStream
.stream()
// Remove .skip(1) when using Non-Azure OpenAI API
// Note: the first chat completions can be ignored when using Azure OpenAI service which is a known service bug.
// TODO: remove .skip(1) when service fix the issue.
.skip(1)
.forEach(chatCompletions -> {
ChatResponseMessage delta = chatCompletions.getChoices().get(0).getDelta();
if (delta.getRole() != null) {
System.out.println("Role = " + delta.getRole());
}
if (delta.getContent() != null) {
System.out.print(delta.getContent());
}
});
client.getChatCompletionsStream("{deploymentOrModelName}", new ChatCompletionsOptions(chatMessages))
.forEach(chatCompletions -> {
if (CoreUtils.isNullOrEmpty(chatCompletions.getChoices())) {
return;
}
ChatResponseMessage delta = chatCompletions.getChoices().get(0).getDelta();
if (delta.getRole() != null) {
System.out.println("Role = " + delta.getRole());
}
if (delta.getContent() != null) {
String content = delta.getContent();
System.out.print(content);
}
});
```

To compute tokens in streaming chat completions, see sample [Streaming Chat Completions][sample_get_chat_completions_streaming].
Expand Down
2 changes: 1 addition & 1 deletion sdk/openai/azure-ai-openai/assets.json
Original file line number Diff line number Diff line change
Expand Up @@ -2,5 +2,5 @@
"AssetsRepo": "Azure/azure-sdk-assets",
"AssetsRepoPrefixPath": "java",
"TagPrefix": "java/openai/azure-ai-openai",
"Tag": "java/openai/azure-ai-openai_915389e465"
"Tag": "java/openai/azure-ai-openai_76031b0cb0"
}
Original file line number Diff line number Diff line change
Expand Up @@ -46,6 +46,7 @@
import com.azure.core.util.logging.ClientLogger;
import com.fasterxml.jackson.core.JsonProcessingException;
import java.nio.ByteBuffer;
import java.util.concurrent.atomic.AtomicReference;
import reactor.core.publisher.Flux;
import reactor.core.publisher.Mono;

Expand Down Expand Up @@ -556,6 +557,57 @@ public Mono<Response<ChatCompletions>> getChatCompletionsWithResponse(String dep
.map(response -> new SimpleResponse<>(response, response.getValue().toObject(ChatCompletions.class)));
}

/**
* Gets chat completions for the provided chat messages. Chat completions support a wide variety of tasks and
* generate text that continues from or "completes" provided prompt data.
*
* <p>
* <strong>Code Samples</strong>
* </p>
* <!-- src_embed
* com.azure.ai.openai.OpenAIAsyncClient.getChatCompletionsStream#String-ChatCompletionsOptionsMaxOverload -->
* <pre>
* openAIAsyncClient.getChatCompletionsStreamWithResponse&#40;deploymentOrModelId, new ChatCompletionsOptions&#40;chatMessages&#41;,
* new RequestOptions&#40;&#41;.setHeader&#40;&quot;my-header&quot;, &quot;my-header-value&quot;&#41;&#41;
* .subscribe&#40;
* response -&gt; System.out.print&#40;response.getValue&#40;&#41;.getId&#40;&#41;&#41;,
* error -&gt; System.err.println&#40;&quot;There was an error getting chat completions.&quot; + error&#41;,
* &#40;&#41; -&gt; System.out.println&#40;&quot;Completed called getChatCompletionsStreamWithResponse.&quot;&#41;&#41;;
* </pre>
* <!-- end com.azure.ai.openai.OpenAIAsyncClient.getChatCompletionsStream#String-ChatCompletionsOptionsMaxOverload
* -->
*
* @param deploymentOrModelName Specifies either the model deployment name (when using Azure OpenAI) or model name
* (when using non-Azure OpenAI) to use for this request.
* @param chatCompletionsOptions The configuration information for a chat completions request. Completions support a
* wide variety of tasks and generate text that continues from or "completes" provided prompt data.
* @param requestOptions The options to configure the HTTP request before HTTP client sends it.
* @throws IllegalArgumentException thrown if parameters fail the validation.
* @throws HttpResponseException thrown if the request is rejected by server.
* @throws ClientAuthenticationException thrown if the request is rejected by server on status code 401.
* @throws ResourceNotFoundException thrown if the request is rejected by server on status code 404.
* @throws ResourceModifiedException thrown if the request is rejected by server on status code 409.
* @throws RuntimeException all other wrapped checked exceptions if the request fails to be sent.
* @return chat completions stream for the provided chat messages. Completions support a wide variety of tasks and
* generate text that continues from or "completes" provided prompt data.
*/
@ServiceMethod(returns = ReturnType.COLLECTION)
public Flux<Response<ChatCompletions>> getChatCompletionsStreamWithResponse(String deploymentOrModelName,
ChatCompletionsOptions chatCompletionsOptions, RequestOptions requestOptions) {
chatCompletionsOptions.setStream(true);
Mono<Response<BinaryData>> chatCompletionsWithResponse = getChatCompletionsWithResponse(deploymentOrModelName,
BinaryData.fromObject(chatCompletionsOptions), requestOptions);
AtomicReference<Response<BinaryData>> responseCopy = new AtomicReference<>();
Flux<ByteBuffer> responseStream = chatCompletionsWithResponse.flatMapMany(response -> {
responseCopy.set(response);
return response.getValue().toFluxByteBuffer();
});
OpenAIServerSentEvents<ChatCompletions> chatCompletionsStream
= new OpenAIServerSentEvents<>(responseStream, ChatCompletions.class);
return chatCompletionsStream.getEvents()
.map(chatCompletions -> new SimpleResponse<>(responseCopy.get(), chatCompletions));
}

/**
* Return the embeddings for a given prompt.
*
Expand Down Expand Up @@ -646,21 +698,10 @@ public Mono<Completions> getCompletions(String deploymentOrModelName, String pro
* <pre>
* openAIAsyncClient
* .getChatCompletionsStream&#40;deploymentOrModelId, new ChatCompletionsOptions&#40;chatMessages&#41;&#41;
* .toStream&#40;&#41;
* &#47;&#47; Remove .skip&#40;1&#41; when using Non-Azure OpenAI API
* &#47;&#47; Note: the first chat completions can be ignored when using Azure OpenAI service which is a known service bug.
* &#47;&#47; TODO: remove .skip&#40;1&#41; after service fixes the issue.
* .skip&#40;1&#41;
* .forEach&#40;chatCompletions -&gt; &#123;
* ChatResponseMessage delta = chatCompletions.getChoices&#40;&#41;.get&#40;0&#41;.getDelta&#40;&#41;;
* if &#40;delta.getRole&#40;&#41; != null&#41; &#123;
* System.out.println&#40;&quot;Role = &quot; + delta.getRole&#40;&#41;&#41;;
* &#125;
* if &#40;delta.getContent&#40;&#41; != null&#41; &#123;
* String content = delta.getContent&#40;&#41;;
* System.out.print&#40;content&#41;;
* &#125;
* &#125;&#41;;
* .subscribe&#40;
* chatCompletions -&gt; System.out.print&#40;chatCompletions.getId&#40;&#41;&#41;,
* error -&gt; System.err.println&#40;&quot;There was an error getting chat completions.&quot; + error&#41;,
* &#40;&#41; -&gt; System.out.println&#40;&quot;Completed called getChatCompletionsStream.&quot;&#41;&#41;;
* </pre>
* <!-- end com.azure.ai.openai.OpenAIAsyncClient.getChatCompletionsStream#String-ChatCompletionsOptions -->
*
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -689,12 +689,10 @@ public ChatCompletions getChatCompletions(String deploymentOrModelName,
* <!-- src_embed com.azure.ai.openai.OpenAIClient.getChatCompletionsStream#String-ChatCompletionsOptions -->
* <pre>
* openAIClient.getChatCompletionsStream&#40;deploymentOrModelId, new ChatCompletionsOptions&#40;chatMessages&#41;&#41;
* .stream&#40;&#41;
* &#47;&#47; Remove .skip&#40;1&#41; when using Non-Azure OpenAI API
* &#47;&#47; Note: the first chat completions can be ignored when using Azure OpenAI service which is a known service bug.
* &#47;&#47; TODO: remove .skip&#40;1&#41; after service fixes the issue.
* .skip&#40;1&#41;
* .forEach&#40;chatCompletions -&gt; &#123;
* if &#40;CoreUtils.isNullOrEmpty&#40;chatCompletions.getChoices&#40;&#41;&#41;&#41; &#123;
* return;
* &#125;
* ChatResponseMessage delta = chatCompletions.getChoices&#40;&#41;.get&#40;0&#41;.getDelta&#40;&#41;;
* if &#40;delta.getRole&#40;&#41; != null&#41; &#123;
* System.out.println&#40;&quot;Role = &quot; + delta.getRole&#40;&#41;&#41;;
Expand Down Expand Up @@ -732,6 +730,60 @@ public IterableStream<ChatCompletions> getChatCompletionsStream(String deploymen
return new IterableStream<>(chatCompletionsStream.getEvents());
}

/**
* Gets chat completions for the provided chat messages in streaming mode. Chat completions support a wide variety
* of tasks and generate text that continues from or "completes" provided prompt data.
* <p>
* <strong>Code Samples</strong>
* </p>
* <!-- src_embed com.azure.ai.openai.OpenAIClient.getChatCompletionsStream#String-ChatCompletionsOptionsMaxOverload
* -->
* <pre>
* openAIClient.getChatCompletionsStreamWithResponse&#40;deploymentOrModelId, new ChatCompletionsOptions&#40;chatMessages&#41;,
* new RequestOptions&#40;&#41;.setHeader&#40;&quot;my-header&quot;, &quot;my-header-value&quot;&#41;&#41;
* .getValue&#40;&#41;
* .forEach&#40;chatCompletions -&gt; &#123;
* if &#40;CoreUtils.isNullOrEmpty&#40;chatCompletions.getChoices&#40;&#41;&#41;&#41; &#123;
* return;
* &#125;
* ChatResponseMessage delta = chatCompletions.getChoices&#40;&#41;.get&#40;0&#41;.getDelta&#40;&#41;;
* if &#40;delta.getRole&#40;&#41; != null&#41; &#123;
* System.out.println&#40;&quot;Role = &quot; + delta.getRole&#40;&#41;&#41;;
* &#125;
* if &#40;delta.getContent&#40;&#41; != null&#41; &#123;
* String content = delta.getContent&#40;&#41;;
* System.out.print&#40;content&#41;;
* &#125;
* &#125;&#41;;
* </pre>
* <!-- end com.azure.ai.openai.OpenAIClient.getChatCompletionsStream#String-ChatCompletionsOptionsMaxOverload -->
*
* @param deploymentOrModelName Specifies either the model deployment name (when using Azure OpenAI) or model name
* (when using non-Azure OpenAI) to use for this request.
* @param chatCompletionsOptions The configuration information for a chat completions request. Completions support a
* wide variety of tasks and generate text that continues from or "completes" provided prompt data.
* @param requestOptions The options to configure the HTTP request before HTTP client sends it.
* @throws IllegalArgumentException thrown if parameters fail the validation.
* @throws HttpResponseException thrown if the request is rejected by server.
* @throws ClientAuthenticationException thrown if the request is rejected by server on status code 401.
* @throws ResourceNotFoundException thrown if the request is rejected by server on status code 404.
* @throws ResourceModifiedException thrown if the request is rejected by server on status code 409.
* @throws RuntimeException all other wrapped checked exceptions if the request fails to be sent.
* @return chat completions stream for the provided chat messages. Completions support a wide variety of tasks and
* generate text that continues from or "completes" provided prompt data.
*/
@ServiceMethod(returns = ReturnType.COLLECTION)
public Response<IterableStream<ChatCompletions>> getChatCompletionsStreamWithResponse(String deploymentOrModelName,
ChatCompletionsOptions chatCompletionsOptions, RequestOptions requestOptions) {
chatCompletionsOptions.setStream(true);
Response<BinaryData> response = getChatCompletionsWithResponse(deploymentOrModelName,
BinaryData.fromObject(chatCompletionsOptions), requestOptions);
Flux<ByteBuffer> responseStream = response.getValue().toFluxByteBuffer();
OpenAIServerSentEvents<ChatCompletions> chatCompletionsStream
= new OpenAIServerSentEvents<>(responseStream, ChatCompletions.class);
return new SimpleResponse<>(response, new IterableStream<>(chatCompletionsStream.getEvents()));
}

/**
* Gets transcribed text and associated metadata from provided spoken audio file data. Audio will be transcribed in
* the written language corresponding to the language it was spoken in.
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@
import com.azure.ai.openai.models.ChatResponseMessage;
import com.azure.core.credential.AzureKeyCredential;
import com.azure.core.util.Configuration;
import com.azure.core.util.CoreUtils;
import com.azure.core.util.IterableStream;
import com.knuddels.jtokkit.Encodings;
import com.knuddels.jtokkit.api.Encoding;
Expand Down Expand Up @@ -73,11 +74,11 @@ public static void main(String[] args) {
// }
chatCompletionsStream
.stream()
// Remove .skip(1) when using Non-Azure OpenAI API
// Note: the first chat completions can be ignored when using Azure OpenAI service which is a known service bug.
// TODO: remove .skip(1) after service fixes the issue.
.skip(1)
.forEach(chatCompletions -> {
if (CoreUtils.isNullOrEmpty(chatCompletions.getChoices())) {
return;
}

ChatResponseMessage delta = chatCompletions.getChoices().get(0).getDelta();

if (delta.getRole() != null) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -11,13 +11,14 @@
import com.azure.ai.openai.models.ChatRequestMessage;
import com.azure.ai.openai.models.ChatRequestSystemMessage;
import com.azure.ai.openai.models.ChatRequestUserMessage;
import com.azure.ai.openai.models.ChatResponseMessage;
import com.azure.core.credential.AzureKeyCredential;
import com.azure.core.http.rest.RequestOptions;
import com.azure.core.util.Configuration;
import org.junit.jupiter.api.Test;

import java.util.ArrayList;
import java.util.List;
import java.util.concurrent.TimeUnit;

/**
* Code snippets for {@link OpenAIAsyncClient}
Expand All @@ -29,32 +30,35 @@ public class OpenAIAsyncClientJavaDocCodeSnippets {
* Code snippets for {@link OpenAIClient#getChatCompletionsStream(String, ChatCompletionsOptions)}
*/
@Test
public void getChatCompletionsStream() {
String deploymentOrModelId = "gpt-4-1106-preview";
public void getChatCompletionsStream() throws InterruptedException {
String deploymentOrModelId = Configuration.getGlobalConfiguration().get("OPENAI_DEPLOYMENT_OR_MODEL_ID");
List<ChatRequestMessage> chatMessages = new ArrayList<>();
chatMessages.add(new ChatRequestSystemMessage("You are a helpful assistant. You will talk like a pirate."));
chatMessages.add(new ChatRequestUserMessage("Can you help me?"));
chatMessages.add(new ChatRequestAssistantMessage("Of course, me hearty! What can I do for ye?"));
chatMessages.add(new ChatRequestUserMessage("What's the best way to train a parrot?"));

// BEGIN: com.azure.ai.openai.OpenAIAsyncClient.getChatCompletionsStream#String-ChatCompletionsOptions
openAIAsyncClient
.getChatCompletionsStream(deploymentOrModelId, new ChatCompletionsOptions(chatMessages))
.toStream()
// Remove .skip(1) when using Non-Azure OpenAI API
// Note: the first chat completions can be ignored when using Azure OpenAI service which is a known service bug.
// TODO: remove .skip(1) after service fixes the issue.
.skip(1)
.forEach(chatCompletions -> {
ChatResponseMessage delta = chatCompletions.getChoices().get(0).getDelta();
if (delta.getRole() != null) {
System.out.println("Role = " + delta.getRole());
}
if (delta.getContent() != null) {
String content = delta.getContent();
System.out.print(content);
}
});
.subscribe(
chatCompletions -> System.out.print(chatCompletions.getId()),
error -> System.err.println("There was an error getting chat completions." + error),
() -> System.out.println("Completed called getChatCompletionsStream."));
// END: com.azure.ai.openai.OpenAIAsyncClient.getChatCompletionsStream#String-ChatCompletionsOptions

// With Response Code Snippet

// BEGIN: com.azure.ai.openai.OpenAIAsyncClient.getChatCompletionsStream#String-ChatCompletionsOptionsMaxOverload
openAIAsyncClient.getChatCompletionsStreamWithResponse(deploymentOrModelId, new ChatCompletionsOptions(chatMessages),
new RequestOptions().setHeader("my-header", "my-header-value"))
.subscribe(
response -> System.out.print(response.getValue().getId()),
error -> System.err.println("There was an error getting chat completions." + error),
() -> System.out.println("Completed called getChatCompletionsStreamWithResponse."));
// END: com.azure.ai.openai.OpenAIAsyncClient.getChatCompletionsStream#String-ChatCompletionsOptionsMaxOverload

TimeUnit.SECONDS.sleep(10);
}

private OpenAIAsyncClient getOpenAIAsyncClient() {
Expand Down
Loading