Skip to content

Commit 7a02b20

Browse files
committed
chore: Refactor runner selection logic in sched.go
1 parent e6cd8cb commit 7a02b20

File tree

1 file changed

+10
-4
lines changed

1 file changed

+10
-4
lines changed

server/sched.go

+10-4
Original file line numberDiff line numberDiff line change
@@ -122,10 +122,11 @@ func (s *Scheduler) processPending(ctx context.Context) {
122122
s.loadedMu.Unlock()
123123
var runner *runnerRef = nil
124124
if len(runners) > 0 {
125+
var minRef = runners[0].refCount
125126
for _, r := range runners {
126-
if !r.isAtCapacity() {
127+
if runner.refCount <= minRef{
127128
runner = r
128-
break
129+
minRef = r.refCount
129130
}
130131
}
131132
}
@@ -315,7 +316,7 @@ func (s *Scheduler) processCompleted(ctx context.Context) {
315316
// Complete the pending request and send the runner back to the requester
316317
// Wires up a finished event after the request context is completed
317318
// Updates session duration, and resets expiration timer
318-
func (pending *LlmRequest) useLoadedRunner(runner *runnerRef, finished chan *LlmRequest) {
319+
func (pending *LlmRequest) useLoadedRunner(runner *runnerRef, finished chan *runnerRef) {
319320
runner.refMu.Lock()
320321
defer runner.refMu.Unlock()
321322
runner.refCount++
@@ -387,7 +388,7 @@ func (s *Scheduler) load(req *LlmRequest, ggml *llm.GGML, gpus gpu.GpuInfoList)
387388
go func() {
388389
<-req.ctx.Done()
389390
slog.Debug("context for request finished")
390-
s.finishedReqCh <- req
391+
s.finishedReqCh <- runner
391392
}()
392393
req.successCh <- runner
393394
}()
@@ -465,6 +466,11 @@ type runnerRef struct {
465466
*api.Options
466467
}
467468

469+
func (r *runnerRef) isAtCapacity() bool {
470+
// Implement your capacity check logic here
471+
// Return true if the runner is at capacity, false otherwise
472+
}
473+
468474
// The refMu must already be held when calling unload
469475
func (runner *runnerRef) unload() {
470476
if runner.expireTimer != nil {

0 commit comments

Comments
 (0)