Skip to content

Commit 060649b

Browse files
authored
Refactor task scheduler and executor (milvus-io#20828)
Make the performance able to scale out Signed-off-by: yah01 <[email protected]> Signed-off-by: yah01 <[email protected]>
1 parent 18762f8 commit 060649b

File tree

9 files changed

+283
-92
lines changed

9 files changed

+283
-92
lines changed

codecov.yml

+1-1
Original file line numberDiff line numberDiff line change
@@ -38,7 +38,7 @@ ignore:
3838
- "**/*.pb.go"
3939
- "**/*.proto"
4040
- "internal/metastore/db/dbmodel/mocks/.*"
41-
- "internal/mocks"
41+
- "**/mock_*.go"
4242

4343

4444

configs/milvus.yaml

+1
Original file line numberDiff line numberDiff line change
@@ -184,6 +184,7 @@ queryCoord:
184184
loadTimeoutSeconds: 600
185185
checkHandoffInterval: 5000
186186
taskMergeCap: 16
187+
taskExecutionCap: 256
187188
enableActiveStandby: false # Enable active-standby
188189

189190
# Related configuration of queryNode, used to run hybrid search between vector and scalar data.

internal/querycoordv2/server.go

+3
Original file line numberDiff line numberDiff line change
@@ -311,6 +311,7 @@ func (s *Server) Start() error {
311311
}
312312
for _, node := range sessions {
313313
s.nodeMgr.Add(session.NewNodeInfo(node.ServerID, node.Address))
314+
s.taskScheduler.AddExecutor(node.ServerID)
314315
}
315316
s.checkReplicas()
316317
for _, node := range sessions {
@@ -571,6 +572,7 @@ func (s *Server) watchNodes(revision int64) {
571572

572573
func (s *Server) handleNodeUp(node int64) {
573574
log := log.With(zap.Int64("nodeID", node))
575+
s.taskScheduler.AddExecutor(node)
574576
s.distController.StartDistInstance(s.ctx, node)
575577

576578
for _, collection := range s.meta.CollectionManager.GetAll() {
@@ -598,6 +600,7 @@ func (s *Server) handleNodeUp(node int64) {
598600

599601
func (s *Server) handleNodeDown(node int64) {
600602
log := log.With(zap.Int64("nodeID", node))
603+
s.taskScheduler.RemoveExecutor(node)
601604
s.distController.Remove(node)
602605

603606
// Refresh the targets, to avoid consuming messages too early from channel

internal/querycoordv2/task/executor.go

+15-7
Original file line numberDiff line numberDiff line change
@@ -26,9 +26,11 @@ import (
2626
"github.com/milvus-io/milvus/internal/log"
2727
"github.com/milvus-io/milvus/internal/proto/querypb"
2828
"github.com/milvus-io/milvus/internal/querycoordv2/meta"
29+
. "github.com/milvus-io/milvus/internal/querycoordv2/params"
2930
"github.com/milvus-io/milvus/internal/querycoordv2/session"
3031
"github.com/milvus-io/milvus/internal/querycoordv2/utils"
3132
"github.com/milvus-io/milvus/internal/util/tsoutil"
33+
"go.uber.org/atomic"
3234
"go.uber.org/zap"
3335
)
3436

@@ -45,7 +47,8 @@ type Executor struct {
4547
// Merge load segment requests
4648
merger *Merger[segmentIndex, *querypb.LoadSegmentsRequest]
4749

48-
executingTasks sync.Map
50+
executingTasks sync.Map
51+
executingTaskNum atomic.Int32
4952
}
5053

5154
func NewExecutor(meta *meta.Meta,
@@ -82,10 +85,14 @@ func (ex *Executor) Stop() {
8285
// does nothing and returns false if the action is already committed,
8386
// returns true otherwise.
8487
func (ex *Executor) Execute(task Task, step int) bool {
88+
if ex.executingTaskNum.Load() > Params.QueryCoordCfg.TaskExecutionCap {
89+
return false
90+
}
8591
_, exist := ex.executingTasks.LoadOrStore(task.ID(), struct{}{})
8692
if exist {
8793
return false
8894
}
95+
ex.executingTaskNum.Inc()
8996

9097
log := log.With(
9198
zap.Int64("taskID", task.ID()),
@@ -137,7 +144,7 @@ func (ex *Executor) processMergeTask(mergeTask *LoadSegmentsTask) {
137144
defer func() {
138145
for i := range mergeTask.tasks {
139146
mergeTask.tasks[i].SetErr(task.Err())
140-
ex.removeAction(mergeTask.tasks[i], mergeTask.steps[i])
147+
ex.removeTask(mergeTask.tasks[i], mergeTask.steps[i])
141148
}
142149
}()
143150

@@ -180,7 +187,7 @@ func (ex *Executor) processMergeTask(mergeTask *LoadSegmentsTask) {
180187
log.Info("load segments done", zap.Int64("taskID", task.ID()), zap.Duration("timeTaken", elapsed))
181188
}
182189

183-
func (ex *Executor) removeAction(task Task, step int) {
190+
func (ex *Executor) removeTask(task Task, step int) {
184191
if task.Err() != nil {
185192
log.Info("excute action done, remove it",
186193
zap.Int64("taskID", task.ID()),
@@ -189,6 +196,7 @@ func (ex *Executor) removeAction(task Task, step int) {
189196
}
190197

191198
ex.executingTasks.Delete(task.ID())
199+
ex.executingTaskNum.Dec()
192200
}
193201

194202
func (ex *Executor) executeSegmentAction(task *SegmentTask, step int) {
@@ -218,7 +226,7 @@ func (ex *Executor) loadSegment(task *SegmentTask, step int) error {
218226
if err != nil {
219227
task.SetErr(err)
220228
task.Cancel()
221-
ex.removeAction(task, step)
229+
ex.removeTask(task, step)
222230
}
223231
}()
224232

@@ -270,7 +278,7 @@ func (ex *Executor) loadSegment(task *SegmentTask, step int) error {
270278
}
271279

272280
func (ex *Executor) releaseSegment(task *SegmentTask, step int) {
273-
defer ex.removeAction(task, step)
281+
defer ex.removeTask(task, step)
274282
startTs := time.Now()
275283
action := task.Actions()[step].(*SegmentAction)
276284
defer action.isReleaseCommitted.Store(true)
@@ -343,7 +351,7 @@ func (ex *Executor) executeDmChannelAction(task *ChannelTask, step int) {
343351
}
344352

345353
func (ex *Executor) subDmChannel(task *ChannelTask, step int) error {
346-
defer ex.removeAction(task, step)
354+
defer ex.removeTask(task, step)
347355
startTs := time.Now()
348356
action := task.Actions()[step].(*ChannelAction)
349357
log := log.With(
@@ -415,7 +423,7 @@ func (ex *Executor) subDmChannel(task *ChannelTask, step int) error {
415423
}
416424

417425
func (ex *Executor) unsubDmChannel(task *ChannelTask, step int) error {
418-
defer ex.removeAction(task, step)
426+
defer ex.removeTask(task, step)
419427
startTs := time.Now()
420428
action := task.Actions()[step].(*ChannelAction)
421429
log := log.With(

internal/querycoordv2/task/mock_scheduler.go

+57-1
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

0 commit comments

Comments
 (0)