@@ -59,6 +59,7 @@ bool llama_supports_mlock(void) {
5959
6060bool llama_supports_gpu_offload (void ) {
6161 return ggml_backend_dev_by_type (GGML_BACKEND_DEVICE_TYPE_GPU) != nullptr ||
62+ ggml_backend_dev_by_type (GGML_BACKEND_DEVICE_TYPE_IGPU) != nullptr ||
6263 llama_supports_rpc ();
6364}
6465
@@ -184,8 +185,13 @@ static struct llama_model * llama_model_load_from_file_impl(
184185 model->devices .push_back (*dev);
185186 }
186187 } else {
188+ // default device selection
189+
190+ // build list of available devices
191+ std::vector<ggml_backend_dev_t > gpus;
192+ std::vector<ggml_backend_dev_t > igpus;
187193 std::vector<ggml_backend_dev_t > rpc_servers;
188- // use all available devices
194+
189195 for (size_t i = 0 ; i < ggml_backend_dev_count (); ++i) {
190196 ggml_backend_dev_t dev = ggml_backend_dev_get (i);
191197 switch (ggml_backend_dev_type (dev)) {
@@ -194,19 +200,51 @@ static struct llama_model * llama_model_load_from_file_impl(
194200 // skip CPU backends since they are handled separately
195201 break ;
196202
197- case GGML_BACKEND_DEVICE_TYPE_GPU:
203+ case GGML_BACKEND_DEVICE_TYPE_GPU: {
198204 ggml_backend_reg_t reg = ggml_backend_dev_backend_reg (dev);
199205 if (ggml_backend_reg_name (reg) == std::string (" RPC" )) {
200206 rpc_servers.push_back (dev);
201207 } else {
202- model->devices .push_back (dev);
208+ // check if there is already a GPU with the same device id
209+ ggml_backend_dev_props props;
210+ ggml_backend_dev_get_props (dev, &props);
211+ auto it = std::find_if (gpus.begin (), gpus.end (), [&props](ggml_backend_dev_t d) {
212+ ggml_backend_dev_props d_props;
213+ ggml_backend_dev_get_props (d, &d_props);
214+ if (props.device_id && d_props.device_id ) {
215+ return strcmp (props.device_id , d_props.device_id ) == 0 ;
216+ }
217+ return false ;
218+ });
219+
220+ if (it != gpus.end ()) {
221+ LLAMA_LOG_INFO (" %s: skipping device %s (%s) with id %s - already using device %s (%s) with the same id\n " ,
222+ __func__,
223+ ggml_backend_dev_name (dev), ggml_backend_dev_description (dev),
224+ props.device_id ? props.device_id : " unknown id" ,
225+ ggml_backend_dev_name (*it), ggml_backend_dev_description (*it));
226+ } else {
227+ gpus.push_back (dev);
228+ }
203229 }
204230 break ;
231+ }
232+
233+ case GGML_BACKEND_DEVICE_TYPE_IGPU:
234+ igpus.push_back (dev);
235+ break ;
205236 }
206237 }
207- // add RPC servers at the front of the list
208- if (!rpc_servers.empty ()) {
209- model->devices .insert (model->devices .begin (), rpc_servers.begin (), rpc_servers.end ());
238+
239+ // add RPC servers at the front of the list to minimize network transfers
240+ model->devices .insert (model->devices .begin (), rpc_servers.begin (), rpc_servers.end ());
241+
242+ // add GPUs
243+ model->devices .insert (model->devices .end (), gpus.begin (), gpus.end ());
244+
245+ // add integrated GPUs only if no other devices were found
246+ if (model->devices .empty ()) {
247+ model->devices .insert (model->devices .end (), igpus.begin (), igpus.end ());
210248 }
211249 }
212250
@@ -227,9 +265,12 @@ static struct llama_model * llama_model_load_from_file_impl(
227265 }
228266
229267 for (auto * dev : model->devices ) {
230- size_t free, total; // NOLINT
231- ggml_backend_dev_memory (dev, &free, &total);
232- LLAMA_LOG_INFO (" %s: using device %s (%s) - %zu MiB free\n " , __func__, ggml_backend_dev_name (dev), ggml_backend_dev_description (dev), free/1024 /1024 );
268+ ggml_backend_dev_props props;
269+ ggml_backend_dev_get_props (dev, &props);
270+ LLAMA_LOG_INFO (" %s: using device %s (%s) (%s) - %zu MiB free\n " , __func__,
271+ ggml_backend_dev_name (dev), ggml_backend_dev_description (dev),
272+ props.device_id ? props.device_id : " unknown id" ,
273+ props.memory_free /1024 /1024 );
233274 }
234275
235276 const int status = llama_model_load (path_model, splits, *model, params);
0 commit comments