@@ -440,6 +440,10 @@ class StableDiffusionGGML {
440440 diffusion_model->alloc_params_buffer ();
441441 diffusion_model->get_param_tensors (tensors);
442442
443+ if (sd_version_is_unet_edit (version)) {
444+ vae_decode_only = false ;
445+ }
446+
443447 if (high_noise_diffusion_model) {
444448 high_noise_diffusion_model->alloc_params_buffer ();
445449 high_noise_diffusion_model->get_param_tensors (tensors);
@@ -2300,23 +2304,36 @@ sd_image_t* generate_image(sd_ctx_t* sd_ctx, const sd_img_gen_params_t* sd_img_g
23002304 init_latent = generate_init_latent (sd_ctx, work_ctx, width, height);
23012305 }
23022306
2307+ sd_guidance_params_t guidance = sd_img_gen_params->sample_params .guidance ;
2308+ std::vector<sd_image_t *> ref_images;
2309+ for (int i = 0 ; i < sd_img_gen_params->ref_images_count ; i++) {
2310+ ref_images.push_back (&sd_img_gen_params->ref_images [i]);
2311+ }
2312+
2313+ std::vector<uint8_t > empty_image_data;
2314+ sd_image_t empty_image = {(uint32_t )width, (uint32_t )height, 3 , nullptr };
2315+ if (ref_images.empty () && sd_version_is_unet_edit (sd_ctx->sd ->version ))
2316+ {
2317+ LOG_WARN (" This model needs at least one reference image; using an empty reference" );
2318+ empty_image_data.reserve (width * height * 3 );
2319+ ref_images.push_back (&empty_image);
2320+ empty_image.data = empty_image_data.data ();
2321+ guidance.img_cfg = 0 .f ;
2322+ }
2323+
23032324 if (sd_img_gen_params->ref_images_count > 0 ) {
23042325 LOG_INFO (" EDIT mode" );
23052326 }
2306- else if (sd_ctx->sd ->version == VERSION_SD1_PIX2PIX || sd_ctx->sd ->version == VERSION_SDXL_PIX2PIX) {
2307- LOG_ERROR (" This model needs at least one reference image" );
2308- return NULL ;
2309- }
23102327
23112328 std::vector<struct ggml_tensor *> ref_latents;
2312- for (int i = 0 ; i < sd_img_gen_params-> ref_images_count ; i++) {
2329+ for (int i = 0 ; i < ref_images. size () ; i++) {
23132330 ggml_tensor* img = ggml_new_tensor_4d (work_ctx,
23142331 GGML_TYPE_F32,
2315- sd_img_gen_params-> ref_images [i]. width ,
2316- sd_img_gen_params-> ref_images [i]. height ,
2332+ ref_images[i]-> width ,
2333+ ref_images[i]-> height ,
23172334 3 ,
23182335 1 );
2319- sd_image_to_tensor (sd_img_gen_params-> ref_images [i]. data , img);
2336+ sd_image_to_tensor (ref_images[i]-> data , img);
23202337
23212338 ggml_tensor* latent = NULL ;
23222339 if (sd_ctx->sd ->use_tiny_autoencoder ) {
@@ -2349,7 +2366,7 @@ sd_image_t* generate_image(sd_ctx_t* sd_ctx, const sd_img_gen_params_t* sd_img_g
23492366 SAFE_STR (sd_img_gen_params->prompt ),
23502367 SAFE_STR (sd_img_gen_params->negative_prompt ),
23512368 sd_img_gen_params->clip_skip ,
2352- sd_img_gen_params-> sample_params . guidance ,
2369+ guidance,
23532370 sd_img_gen_params->sample_params .eta ,
23542371 width,
23552372 height,
0 commit comments