diff --git a/common/arg.cpp b/common/arg.cpp
index f53b4798105..1ffaf704858 100644
--- a/common/arg.cpp
+++ b/common/arg.cpp
@@ -446,6 +446,12 @@ bool common_params_handle_models(common_params & params, llama_example curr_ex)
     opts.download_mtp    = spec_type_draft_mtp;
     opts.download_mmproj = !params.no_mmproj;
 
+    // sub-models (draft, mmproj, vocoder) are explicitly specified by the user,
+    // so we should not auto-discover mtp/mmproj siblings for them
+    common_download_opts sub_opts = opts;
+    sub_opts.download_mtp    = false;
+    sub_opts.download_mmproj = false;
+
     try {
         auto res = common_params_handle_model(params.model, opts);
         if (params.no_mmproj) {
@@ -457,7 +463,7 @@ bool common_params_handle_models(common_params & params, llama_example curr_ex)
         // only download mmproj if the current example is using it
         for (const auto & ex : mmproj_examples) {
             if (curr_ex == ex) {
-                common_params_handle_model(params.mmproj, opts);
+                common_params_handle_model(params.mmproj, sub_opts);
                 break;
             }
         }
@@ -470,8 +476,8 @@ bool common_params_handle_models(common_params & params, llama_example curr_ex)
             params.speculative.draft.mparams.url.empty()) {
             params.speculative.draft.mparams.path = res.mtp.path;
         }
-        common_params_handle_model(params.speculative.draft.mparams, opts);
-        common_params_handle_model(params.vocoder.model,             opts);
+        common_params_handle_model(params.speculative.draft.mparams, sub_opts);
+        common_params_handle_model(params.vocoder.model,             sub_opts);
         return true;
     } catch (const common_skip_download_exception &) {
         return false;