diff --git a/src/llama-model.cpp b/src/llama-model.cpp index c98cb27e4d4..1f442d8a322 100644 --- a/src/llama-model.cpp +++ b/src/llama-model.cpp @@ -1636,7 +1636,8 @@ const float * llama_model::tensor_split() const { } uint32_t llama_model::n_gpu_layers() const { - return params.n_gpu_layers >= 0 ? params.n_gpu_layers : hparams.n_layer() + 1; + // note: plus 1 for the "output" layer + return params.n_gpu_layers >= 0 ? params.n_gpu_layers : hparams.n_layer_all + 1; } llama_split_mode llama_model::split_mode() const {