Skip to content

Commit 9d22adc

Browse files
authored
Merge pull request #42 from gianni-cor/fix_cl_mtmd
fix multimodal backend
2 parents c9115b9 + 0fe8a88 commit 9d22adc

File tree

6 files changed

+15
-6
lines changed

6 files changed

+15
-6
lines changed

common/arg.cpp

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2338,6 +2338,14 @@ common_params_context common_params_parser_init(common_params & params, llama_ex
23382338
params.mmproj_use_gpu = false;
23392339
}
23402340
).set_examples(mmproj_examples).set_env("LLAMA_ARG_NO_MMPROJ_OFFLOAD"));
2341+
add_opt(common_arg(
2342+
{"--mmproj-backend"}, "NAME",
2343+
"GPU backend for multimodal projector (e.g. CUDA, Metal, Vulkan)\n"
2344+
"if not specified, will use MTMD_BACKEND_DEVICE env var or default GPU backend",
2345+
[](common_params & params, const std::string & value) {
2346+
params.mmproj_backend = value;
2347+
}
2348+
).set_examples({LLAMA_EXAMPLE_MTMD, LLAMA_EXAMPLE_SERVER, LLAMA_EXAMPLE_MAIN}));
23412349
add_opt(common_arg(
23422350
{"--image", "--audio"}, "FILE",
23432351
"path to an image or audio file. use with multimodal models, can be repeated if you have multiple files\n",

common/common.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -402,6 +402,7 @@ struct common_params {
402402
// multimodal models (see tools/mtmd)
403403
struct common_params_model mmproj;
404404
bool mmproj_use_gpu = true; // use GPU for multimodal model
405+
std::string mmproj_backend = ""; // GPU backend for multimodal model (e.g. "CUDA", "Metal", "Vulkan")
405406
bool no_mmproj = false; // explicitly disable multimodal model
406407
std::vector<std::string> image; // path to image file(s)
407408

tools/mtmd/clip.cpp

Lines changed: 2 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -28,11 +28,7 @@
2828
#include <numeric>
2929
#include <functional>
3030

31-
struct clip_logger_state g_logger_state = {
32-
GGML_LOG_LEVEL_CONT, // verbosity_thold
33-
clip_log_callback_default, // log_callback
34-
NULL // log_callback_user_data
35-
};
31+
struct clip_logger_state g_logger_state = {GGML_LOG_LEVEL_CONT, clip_log_callback_default, NULL};
3632

3733
enum ffn_op_type {
3834
FFN_GELU,
@@ -401,7 +397,7 @@ struct clip_ctx {
401397
throw std::runtime_error("failed to initialize CPU backend");
402398
}
403399
if (ctx_params.use_gpu) {
404-
auto backend_name = std::getenv("MTMD_BACKEND_DEVICE");
400+
auto backend_name = ctx_params.backend_device ? ctx_params.backend_device : std::getenv("MTMD_BACKEND_DEVICE");
405401
if (backend_name != nullptr) {
406402
backend = ggml_backend_init_by_name(backend_name, nullptr);
407403
if (!backend) {

tools/mtmd/clip.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -25,6 +25,7 @@ enum clip_modality {
2525
struct clip_context_params {
2626
bool use_gpu;
2727
enum ggml_log_level verbosity;
28+
const char * backend_device; // optional, if null will use env var or default GPU backend
2829
};
2930

3031
struct clip_init_result {

tools/mtmd/mtmd.cpp

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -91,6 +91,7 @@ mtmd_context_params mtmd_context_params_default() {
9191
params.verbosity = GGML_LOG_LEVEL_INFO;
9292
params.image_marker = MTMD_DEFAULT_IMAGE_MARKER;
9393
params.media_marker = mtmd_default_marker();
94+
params.backend_device = nullptr;
9495
return params;
9596
}
9697

@@ -152,6 +153,7 @@ struct mtmd_context {
152153
clip_context_params ctx_clip_params;
153154
ctx_clip_params.use_gpu = ctx_params.use_gpu;
154155
ctx_clip_params.verbosity = ctx_params.verbosity;
156+
ctx_clip_params.backend_device = ctx_params.backend_device;
155157
auto res = clip_init(mmproj_fname, ctx_clip_params);
156158
ctx_v = res.ctx_v;
157159
ctx_a = res.ctx_a;

tools/mtmd/mtmd.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -82,6 +82,7 @@ struct mtmd_context_params {
8282
enum ggml_log_level verbosity;
8383
const char * image_marker; // deprecated, use media_marker instead
8484
const char * media_marker;
85+
const char * backend_device; // optional GPU backend name (e.g. "CUDA", "Metal", "Vulkan"), if null will use env var or default
8586
};
8687

8788
MTMD_API const char * mtmd_default_marker(void);

0 commit comments

Comments
 (0)