Skip to content

Commit d765b95

Browse files
committed
perf: make ggml_conv_2d faster
1 parent 008d80a commit d765b95

File tree

2 files changed

+13
-1
lines changed

2 files changed

+13
-1
lines changed

ggml

stable-diffusion.cpp

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3256,6 +3256,10 @@ class StableDiffusionGGML {
32563256
}
32573257
}
32583258
}
3259+
3260+
#ifdef GGML_PERF
3261+
ggml_graph_print(&diffusion_graph);
3262+
#endif
32593263
int64_t t1 = ggml_time_ms();
32603264
LOG_INFO("step %d sampling completed, taking %.2fs", i + 1, (t1 - t0) * 1.0f / 1000);
32613265
LOG_DEBUG("diffusion graph use %.2fMB runtime memory: static %.2fMB, dynamic %.2fMB",
@@ -3345,6 +3349,10 @@ class StableDiffusionGGML {
33453349
int64_t t0 = ggml_time_ms();
33463350
ggml_graph_compute_with_ctx(ctx, &vae_graph, n_threads);
33473351
int64_t t1 = ggml_time_ms();
3352+
3353+
#ifdef GGML_PERF
3354+
ggml_graph_print(&vae_graph);
3355+
#endif
33483356
LOG_DEBUG("computing vae graph completed, taking %.2fs", (t1 - t0) * 1.0f / 1000);
33493357

33503358
result = ggml_dup_tensor(res_ctx, moments);
@@ -3470,6 +3478,10 @@ class StableDiffusionGGML {
34703478
int64_t t0 = ggml_time_ms();
34713479
ggml_graph_compute_with_ctx(ctx, &vae_graph, n_threads);
34723480
int64_t t1 = ggml_time_ms();
3481+
3482+
#ifdef GGML_PERF
3483+
ggml_graph_print(&vae_graph);
3484+
#endif
34733485
LOG_DEBUG("computing vae graph completed, taking %.2fs", (t1 - t0) * 1.0f / 1000);
34743486

34753487
result_img = ggml_dup_tensor(res_ctx, img);

0 commit comments

Comments
 (0)