Skip to content

Commit 6a1b471

Browse files
authored
Merge pull request ollama#4430 from dhiltgen/gpu_info
Remove VRAM convergence check for windows
2 parents 7ca71a6 + ec231a7 commit 6a1b471

File tree

1 file changed

+3
-2
lines changed

1 file changed

+3
-2
lines changed

server/sched.go

+3-2
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,7 @@ import (
66
"fmt"
77
"log/slog"
88
"reflect"
9+
"runtime"
910
"sort"
1011
"strings"
1112
"sync"
@@ -487,8 +488,8 @@ func (runner *runnerRef) needsReload(ctx context.Context, req *LlmRequest) bool
487488
func (runner *runnerRef) waitForVRAMRecovery() chan interface{} {
488489
finished := make(chan interface{}, 1)
489490

490-
// CPU or Metal don't need checking, so no waiting required
491-
if len(runner.gpus) == 1 && (runner.gpus[0].Library == "cpu" || runner.gpus[0].Library == "metal") {
491+
// CPU or Metal don't need checking, so no waiting required, windows can page VRAM, and the APIs we query tend to be optimistic on free space
492+
if (len(runner.gpus) == 1 && (runner.gpus[0].Library == "cpu" || runner.gpus[0].Library == "metal")) || runtime.GOOS == "windows" {
492493
finished <- struct{}{}
493494
return finished
494495
}

0 commit comments

Comments
 (0)