@@ -1552,11 +1552,11 @@ struct server_queue {
15521552 std::condition_variable condition_tasks;
15531553
15541554 // callback functions
1555- std::function<void (server_task)> callback_new_task;
1556- std::function<void (void )> callback_update_slots;
1555+ std::function<void (server_task& )> callback_new_task;
1556+ std::function<void (void )> callback_update_slots;
15571557
15581558 // Add a new task to the end of the queue
1559- int post (server_task task, bool front = false ) {
1559+ int post (server_task & task, bool front = false ) {
15601560 std::unique_lock<std::mutex> lock (mutex_tasks);
15611561 GGML_ASSERT (task.id != -1 );
15621562 // if this is cancel task make sure to clean up pending tasks
@@ -1596,7 +1596,7 @@ struct server_queue {
15961596 }
15971597
15981598 // Add a new task, but defer until one slot is available
1599- void defer (server_task task) {
1599+ void defer (server_task & task) {
16001600 std::unique_lock<std::mutex> lock (mutex_tasks);
16011601 QUE_DBG (" defer task, id = %d\n " , task.id );
16021602 queue_tasks_deferred.push_back (std::move (task));
@@ -1611,7 +1611,7 @@ struct server_queue {
16111611 }
16121612
16131613 // Register function to process a new task
1614- void on_new_task (std::function<void (server_task)> callback) {
1614+ void on_new_task (std::function<void (server_task& )> callback) {
16151615 callback_new_task = std::move (callback);
16161616 }
16171617
@@ -1660,12 +1660,12 @@ struct server_queue {
16601660 lock.unlock ();
16611661 break ;
16621662 }
1663- server_task task = queue_tasks.front ();
1663+ server_task task = std::move ( queue_tasks.front () );
16641664 queue_tasks.pop_front ();
16651665 lock.unlock ();
16661666
16671667 QUE_DBG (" processing task, id = %d\n " , task.id );
1668- callback_new_task (std::move ( task) );
1668+ callback_new_task (task);
16691669 }
16701670
16711671 // all tasks in the current loop is processed, slots data is now ready
@@ -2004,7 +2004,7 @@ struct server_context {
20042004
20052005 slot.reset ();
20062006
2007- slots.push_back (slot);
2007+ slots.push_back (std::move ( slot) );
20082008 }
20092009
20102010 default_generation_settings_for_props = slots[0 ].to_json ();
@@ -2547,7 +2547,7 @@ struct server_context {
25472547 server_task task (SERVER_TASK_TYPE_CANCEL);
25482548 task.id_target = id_task;
25492549 queue_results.remove_waiting_task_id (id_task);
2550- cancel_tasks.push_back (task);
2550+ cancel_tasks.push_back (std::move ( task) );
25512551 }
25522552 // push to beginning of the queue, so it has highest priority
25532553 queue_tasks.post (cancel_tasks, true );
@@ -2637,7 +2637,7 @@ struct server_context {
26372637 // Functions to process the task
26382638 //
26392639
2640- void process_single_task (server_task task) {
2640+ void process_single_task (server_task & task) {
26412641 switch (task.type ) {
26422642 case SERVER_TASK_TYPE_COMPLETION:
26432643 case SERVER_TASK_TYPE_INFILL:
@@ -3965,7 +3965,7 @@ int main(int argc, char ** argv) {
39653965 task.params .oaicompat_cmpl_id = completion_id;
39663966 // oaicompat_model is already populated by params_from_json_cmpl
39673967
3968- tasks.push_back (task);
3968+ tasks.push_back (std::move ( task) );
39693969 }
39703970 } catch (const std::exception & e) {
39713971 res_error (res, format_error_response (e.what (), ERROR_TYPE_INVALID_REQUEST));
@@ -4280,7 +4280,7 @@ int main(int argc, char ** argv) {
42804280 // OAI-compat
42814281 task.params .oaicompat = oaicompat;
42824282
4283- tasks.push_back (task);
4283+ tasks.push_back (std::move ( task) );
42844284 }
42854285
42864286 ctx_server.queue_results .add_waiting_tasks (tasks);
@@ -4376,7 +4376,7 @@ int main(int argc, char ** argv) {
43764376 task.id = ctx_server.queue_tasks .get_new_id ();
43774377 task.index = i;
43784378 task.prompt_tokens = format_rerank (ctx_server.vocab , tokenized_query, tokenized_docs[i]);
4379- tasks.push_back (task);
4379+ tasks.push_back (std::move ( task) );
43804380 }
43814381
43824382 ctx_server.queue_results .add_waiting_tasks (tasks);
@@ -4582,7 +4582,7 @@ int main(int argc, char ** argv) {
45824582 common_chat_templates_source (ctx_server.chat_templates .get ()),
45834583 common_chat_format_example (ctx_server.chat_templates .get (), ctx_server.params_base .use_jinja ).c_str ());
45844584
4585- ctx_server.queue_tasks .on_new_task ([&ctx_server](const server_task & task) {
4585+ ctx_server.queue_tasks .on_new_task ([&ctx_server](server_task & task) {
45864586 ctx_server.process_single_task (task);
45874587 });
45884588
0 commit comments