Skip to content

Commit

Permalink
server : various fixes for the prompt field in /completion (#5300)
Browse files Browse the repository at this point in the history
server : fix deadlock when prompt array contains strings and numbers

server : removed an unnecessary generation when generating multi-prompts

server : removed an unnecessary assert
  • Loading branch information
Niall- authored Feb 6, 2024
1 parent 906cff5 commit 4ffc7a1
Showing 1 changed file with 27 additions and 7 deletions.
34 changes: 27 additions & 7 deletions examples/server/server.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1163,13 +1163,30 @@ struct llama_server_context
task.multitask_id = multitask_id;

// when a completion task's prompt array is not a singleton, we split it into multiple requests
if (task.data.count("prompt") && task.data.at("prompt").size() > 1)
{
split_multiprompt_task(task_id, task);
}

// otherwise, it's a single-prompt task, we actually queue it
queue_tasks.post(task);
// if there's numbers in the prompt array it will be treated as an array of tokens
if (task.data.count("prompt") != 0 && task.data.at("prompt").size() > 1) {
bool numbers = false;
for (const auto& e : task.data.at("prompt")) {
if (e.is_number()) {
numbers = true;
break;
}
}

// NOTE: split_multiprompt_task() does not handle a mix of strings and numbers,
// it will completely stall the server. I don't know where the bug for this is.
//
// if there are numbers, it needs to be treated like a single prompt,
// queue_tasks handles a mix of strings and numbers just fine.
if (numbers) {
queue_tasks.post(task);
} else {
split_multiprompt_task(task_id, task);
}
} else {
queue_tasks.post(task);
}
}

// for multiple images processing
Expand Down Expand Up @@ -1251,7 +1268,10 @@ struct llama_server_context
void split_multiprompt_task(int multitask_id, task_server& multiprompt_task)
{
int prompt_count = multiprompt_task.data.at("prompt").size();
assert(prompt_count > 1);
if (prompt_count <= 1) {
send_error(multiprompt_task, "error while handling multiple prompts");
return;
}

// generate all the ID for subtask
std::vector<int> subtask_ids(prompt_count);
Expand Down

0 comments on commit 4ffc7a1

Please sign in to comment.