Skip to content

Commit

Permalink
Revert "amd multigpu full layer offload w/o vram scratch"
Browse files Browse the repository at this point in the history
This reverts commit 9adfc8e.
  • Loading branch information
YellowRoseCx committed Jul 24, 2023
1 parent 9adfc8e commit b379f9d
Showing 1 changed file with 0 additions and 14 deletions.
14 changes: 0 additions & 14 deletions llama.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1224,32 +1224,18 @@ static void llama_model_load_internal(

#ifdef GGML_USE_CUBLAS
const int max_backend_supported_layers = hparams.n_layer + 3;
#if defined(GGML_USE_HIPBLAS)
const int max_offloadable_layers = low_vram ? hparams.n_layer + 3 : hparams.n_layer + 3;
#else
const int max_offloadable_layers = low_vram ? hparams.n_layer + 1 : hparams.n_layer + 3;
#endif
if (n_gpu_layers > (int) hparams.n_layer + 1) {
if (low_vram) {
#if defined(GGML_USE_HIPBLAS)
fprintf(stderr, "%s: offloading v cache to GPU\n", __func__);
vram_kv_cache += MEM_REQ_KV_SELF().at(model.type) / 2;
#else
fprintf(stderr, "%s: cannot offload v cache to GPU due to low VRAM option\n", __func__);
#endif
} else {
fprintf(stderr, "%s: offloading v cache to GPU\n", __func__);
vram_kv_cache += MEM_REQ_KV_SELF().at(model.type) / 2;
}
}
if (n_gpu_layers > (int) hparams.n_layer + 2) {
if (low_vram) {
#if defined(GGML_USE_HIPBLAS)
fprintf(stderr, "%s: offloading k cache to GPU\n", __func__);
vram_kv_cache += MEM_REQ_KV_SELF().at(model.type) / 2;
#else
fprintf(stderr, "%s: cannot offload k cache to GPU due to low VRAM option\n", __func__);
#endif
} else {
fprintf(stderr, "%s: offloading k cache to GPU\n", __func__);
vram_kv_cache += MEM_REQ_KV_SELF().at(model.type) / 2;
Expand Down

0 comments on commit b379f9d

Please sign in to comment.