@@ -60,6 +60,7 @@ struct SDParams {
6060 std::string clip_vision_path;
6161 std::string t5xxl_path;
6262 std::string qwen2vl_path;
63+ std::string qwen2vl_vision_path;
6364 std::string diffusion_model_path;
6465 std::string high_noise_diffusion_model_path;
6566 std::string vae_path;
@@ -146,6 +147,7 @@ void print_params(SDParams params) {
146147 printf (" clip_vision_path: %s\n " , params.clip_vision_path .c_str ());
147148 printf (" t5xxl_path: %s\n " , params.t5xxl_path .c_str ());
148149 printf (" qwen2vl_path: %s\n " , params.qwen2vl_path .c_str ());
150+ printf (" qwen2vl_vision_path: %s\n " , params.qwen2vl_vision_path .c_str ());
149151 printf (" diffusion_model_path: %s\n " , params.diffusion_model_path .c_str ());
150152 printf (" high_noise_diffusion_model_path: %s\n " , params.high_noise_diffusion_model_path .c_str ());
151153 printf (" vae_path: %s\n " , params.vae_path .c_str ());
@@ -218,6 +220,7 @@ void print_usage(int argc, const char* argv[]) {
218220 printf (" --clip_vision path to the clip-vision encoder\n " );
219221 printf (" --t5xxl path to the t5xxl text encoder\n " );
220222 printf (" --qwen2vl path to the qwen2vl text encoder\n " );
223+ printf (" --qwen2vl_vision path to the qwen2vl vit\n " );
221224 printf (" --vae [VAE] path to vae\n " );
222225 printf (" --taesd [TAESD_PATH] path to taesd. Using Tiny AutoEncoder for fast decoding (low quality)\n " );
223226 printf (" --control-net [CONTROL_PATH] path to control net model\n " );
@@ -488,6 +491,7 @@ void parse_args(int argc, const char** argv, SDParams& params) {
488491 {" " , " --clip_vision" , " " , ¶ms.clip_vision_path },
489492 {" " , " --t5xxl" , " " , ¶ms.t5xxl_path },
490493 {" " , " --qwen2vl" , " " , ¶ms.qwen2vl_path },
494+ {" " , " --qwen2vl_vision" , " " , ¶ms.qwen2vl_vision_path },
491495 {" " , " --diffusion-model" , " " , ¶ms.diffusion_model_path },
492496 {" " , " --high-noise-diffusion-model" , " " , ¶ms.high_noise_diffusion_model_path },
493497 {" " , " --vae" , " " , ¶ms.vae_path },
@@ -947,7 +951,7 @@ std::string get_image_params(SDParams params, int64_t seed) {
947951 parameter_string += " " + std::string (sd_schedule_name (params.sample_params .scheduler ));
948952 }
949953 parameter_string += " , " ;
950- for (const auto & te : {params.clip_l_path , params.clip_g_path , params.t5xxl_path , params.qwen2vl_path }) {
954+ for (const auto & te : {params.clip_l_path , params.clip_g_path , params.t5xxl_path , params.qwen2vl_path , params. qwen2vl_vision_path }) {
951955 if (!te.empty ()) {
952956 parameter_string += " TE: " + sd_basename (te) + " , " ;
953957 }
@@ -1322,6 +1326,7 @@ int main(int argc, const char* argv[]) {
13221326 params.clip_vision_path .c_str (),
13231327 params.t5xxl_path .c_str (),
13241328 params.qwen2vl_path .c_str (),
1329+ params.qwen2vl_vision_path .c_str (),
13251330 params.diffusion_model_path .c_str (),
13261331 params.high_noise_diffusion_model_path .c_str (),
13271332 params.vae_path .c_str (),
0 commit comments