@@ -54,8 +54,10 @@ func EstimateGPULayers(gpus []gpu.GpuInfo, ggml *GGML, projectors []string, opts
54
54
}
55
55
56
56
layers := ggml .Tensors ().Layers ()
57
- // add one layer worth of memorr as a buffer
58
- memoryMinimum += layers ["blk.0" ].size ()
57
+ // add one layer worth of memory as a buffer
58
+ if blk0 , ok := layers ["blk.0" ]; ok {
59
+ memoryMinimum += blk0 .size ()
60
+ }
59
61
60
62
// fp16 k,v = (1 (k) + 1 (v)) * sizeof(float16) * n_ctx * n_layer * n_embd / n_head * n_head_kv
61
63
var kv uint64 = 2 * 2 * uint64 (opts .NumCtx ) * ggml .KV ().BlockCount () * ggml .KV ().EmbeddingLength () / ggml .KV ().HeadCount () * ggml .KV ().HeadCountKV ()
@@ -102,15 +104,17 @@ func EstimateGPULayers(gpus []gpu.GpuInfo, ggml *GGML, projectors []string, opts
102
104
103
105
var layerCount int
104
106
for i := 0 ; i < int (ggml .KV ().BlockCount ()); i ++ {
105
- memoryLayer := layers [fmt .Sprintf ("blk.%d" , i )].size ()
107
+ if blk , ok := layers [fmt .Sprintf ("blk.%d" , i )]; ok {
108
+ memoryLayer := blk .size ()
106
109
107
- // KV is proportional to the number of layers
108
- memoryLayer += kv / ggml .KV ().BlockCount ()
110
+ // KV is proportional to the number of layers
111
+ memoryLayer += kv / ggml .KV ().BlockCount ()
109
112
110
- memoryRequiredTotal += memoryLayer
111
- if (opts .NumGPU >= 0 && layerCount + 1 <= opts .NumGPU ) || (opts .NumGPU < 0 && memoryAvailable > memoryRequiredPartial + memoryLayer ) {
112
- memoryRequiredPartial += memoryLayer
113
- layerCount ++
113
+ memoryRequiredTotal += memoryLayer
114
+ if (opts .NumGPU >= 0 && layerCount + 1 <= opts .NumGPU ) || (opts .NumGPU < 0 && memoryAvailable > memoryRequiredPartial + memoryLayer ) {
115
+ memoryRequiredPartial += memoryLayer
116
+ layerCount ++
117
+ }
114
118
}
115
119
}
116
120
0 commit comments