Skip to content

Commit ec231a7

Browse files
committed
Remove VRAM convergence check for windows
The APIs we query are optimistic on free space, and windows pages VRAM, so we don't have to wait to see reported usage recover on unload
1 parent 7ca71a6 commit ec231a7

File tree

1 file changed

+3
-2
lines changed

1 file changed

+3
-2
lines changed

server/sched.go

+3-2
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,7 @@ import (
66
"fmt"
77
"log/slog"
88
"reflect"
9+
"runtime"
910
"sort"
1011
"strings"
1112
"sync"
@@ -487,8 +488,8 @@ func (runner *runnerRef) needsReload(ctx context.Context, req *LlmRequest) bool
487488
func (runner *runnerRef) waitForVRAMRecovery() chan interface{} {
488489
finished := make(chan interface{}, 1)
489490

490-
// CPU or Metal don't need checking, so no waiting required
491-
if len(runner.gpus) == 1 && (runner.gpus[0].Library == "cpu" || runner.gpus[0].Library == "metal") {
491+
// CPU or Metal don't need checking, so no waiting required, windows can page VRAM, and the APIs we query tend to be optimistic on free space
492+
if (len(runner.gpus) == 1 && (runner.gpus[0].Library == "cpu" || runner.gpus[0].Library == "metal")) || runtime.GOOS == "windows" {
492493
finished <- struct{}{}
493494
return finished
494495
}

0 commit comments

Comments
 (0)