Skip to content

Commit

Permalink
fix: Fix missing handling for AbortSignal in inference client
Browse files Browse the repository at this point in the history
  • Loading branch information
MohamedBassem committed Feb 1, 2025
1 parent fd7011a commit a698aea
Showing 1 changed file with 53 additions and 28 deletions.
81 changes: 53 additions & 28 deletions packages/shared/inference.ts
Original file line number Diff line number Diff line change
Expand Up @@ -67,13 +67,18 @@ class OpenAIInferenceClient implements InferenceClient {
...defaultInferenceOptions,
..._opts,
};
const chatCompletion = await this.openAI.chat.completions.create({
messages: [{ role: "user", content: prompt }],
model: serverConfig.inference.textModel,
response_format: optsWithDefaults.json
? { type: "json_object" }
: undefined,
});
const chatCompletion = await this.openAI.chat.completions.create(
{
messages: [{ role: "user", content: prompt }],
model: serverConfig.inference.textModel,
response_format: optsWithDefaults.json
? { type: "json_object" }
: undefined,
},
{
signal: optsWithDefaults.abortSignal,
},
);

const response = chatCompletion.choices[0].message.content;
if (!response) {
Expand All @@ -92,28 +97,33 @@ class OpenAIInferenceClient implements InferenceClient {
...defaultInferenceOptions,
..._opts,
};
const chatCompletion = await this.openAI.chat.completions.create({
model: serverConfig.inference.imageModel,
response_format: optsWithDefaults.json
? { type: "json_object" }
: undefined,
messages: [
{
role: "user",
content: [
{ type: "text", text: prompt },
{
type: "image_url",
image_url: {
url: `data:${contentType};base64,${image}`,
detail: "low",
const chatCompletion = await this.openAI.chat.completions.create(
{
model: serverConfig.inference.imageModel,
response_format: optsWithDefaults.json
? { type: "json_object" }
: undefined,
messages: [
{
role: "user",
content: [
{ type: "text", text: prompt },
{
type: "image_url",
image_url: {
url: `data:${contentType};base64,${image}`,
detail: "low",
},
},
},
],
},
],
max_tokens: 2000,
});
],
},
],
max_tokens: 2000,
},
{
signal: optsWithDefaults.abortSignal,
},
);

const response = chatCompletion.choices[0].message.content;
if (!response) {
Expand Down Expand Up @@ -156,6 +166,14 @@ class OllamaInferenceClient implements InferenceClient {
...defaultInferenceOptions,
..._opts,
};

let newAbortSignal = undefined;
if (optsWithDefaults.abortSignal) {
newAbortSignal = AbortSignal.any([optsWithDefaults.abortSignal]);
newAbortSignal.onabort = () => {
this.ollama.abort();
};
}
const chatCompletion = await this.ollama.chat({
model: model,
format: optsWithDefaults.json ? "json" : undefined,
Expand All @@ -182,13 +200,20 @@ class OllamaInferenceClient implements InferenceClient {
}
}
} catch (e) {
if (e instanceof Error && e.name === "AbortError") {
throw e;
}
// There seem to be some bug in ollama where you can get some successful response, but still throw an error.
// Using stream + accumulating the response so far is a workaround.
// https://github.com/ollama/ollama-js/issues/72
totalTokens = NaN;
logger.warn(
`Got an exception from ollama, will still attempt to deserialize the response we got so far: ${e}`,
);
} finally {
if (newAbortSignal) {
newAbortSignal.onabort = null;
}
}

return { response, totalTokens };
Expand Down

0 comments on commit a698aea

Please sign in to comment.